ConfigureRequest embeds BackendConfiguration to improve readability

ilopezluna · ilopezluna · commit edb758e4ad28 · 2025-12-05T15:28:31.000+01:00
diff --git a/pkg/inference/scheduling/api.go b/pkg/inference/scheduling/api.go
@@ -93,12 +93,7 @@ type UnloadResponse struct {
 
 // ConfigureRequest specifies per-model runtime configuration options.
 type ConfigureRequest struct {
-	Model       string                               `json:"model"`
-	ContextSize int64                                `json:"context-size,omitempty"`
-	Mode        *inference.BackendMode               `json:"mode,omitempty"`
-	Speculative *inference.SpeculativeDecodingConfig `json:"speculative,omitempty"`
-
-	// Backend-specific configuration
-	VLLM     *inference.VLLMConfig     `json:"vllm,omitempty"`
-	LlamaCpp *inference.LlamaCppConfig `json:"llamacpp,omitempty"`
+	Model string                 `json:"model"`
+	Mode  *inference.BackendMode `json:"mode,omitempty"`
+	inference.BackendConfiguration
 }
diff --git a/pkg/inference/scheduling/http_handler.go b/pkg/inference/scheduling/http_handler.go
@@ -330,7 +330,9 @@ func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
 	}
 
 	configureRequest := ConfigureRequest{
-		ContextSize: -1,
+		BackendConfiguration: inference.BackendConfiguration{
+			ContextSize: -1,
+		},
 	}
 	if err := json.Unmarshal(body, &configureRequest); err != nil {
 		http.Error(w, "invalid request", http.StatusBadRequest)
diff --git a/pkg/ollama/http_handler.go b/pkg/ollama/http_handler.go
@@ -493,8 +493,10 @@ func (h *HTTPHandler) handleGenerate(w http.ResponseWriter, r *http.Request) {
 		// Empty prompt - preload the model
 		// ConfigureRunner is idempotent, so calling it again with the same context size is safe
 		configureRequest := scheduling.ConfigureRequest{
-			Model:       modelName,
-			ContextSize: ctxSize, // Use extracted value (or 0 for default)
+			Model: modelName,
+			BackendConfiguration: inference.BackendConfiguration{
+				ContextSize: ctxSize, // Use extracted value (or 0 for default)
+			},
 		}
 
 		_, err := h.scheduler.ConfigureRunner(ctx, nil, configureRequest, r.UserAgent()+" (Ollama API)")

Original file line number	Diff line number	Diff line change
`@@ -330,7 +330,9 @@ func (h HTTPHandler) Configure(w http.ResponseWriter, r http.Request) {`
`330`	`330`	`}`
`331`	`331`
`332`	`332`	`configureRequest := ConfigureRequest{`
`333`		`- ContextSize: -1,`
	`333`	`+ BackendConfiguration: inference.BackendConfiguration{`
	`334`	`+ ContextSize: -1,`
	`335`	`+ },`
`334`	`336`	`}`
`335`	`337`	`if err := json.Unmarshal(body, &configureRequest); err != nil {`
`336`	`338`	`http.Error(w, "invalid request", http.StatusBadRequest)`