ConfigureRequest embeds BackendConfiguration to improve readability

ilopezluna · ilopezluna · commit 73f6f0e4f777 · 2025-12-05T15:35:04.000+01:00
diff --git a/cmd/cli/commands/compose.go b/cmd/cli/commands/compose.go
@@ -83,9 +83,11 @@ func newUpCommand() *cobra.Command {
 
 			for _, model := range models {
 				if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{
-					Model:       model,
-					ContextSize: ctxSize,
-					Speculative: speculativeConfig,
+					Model: model,
+					BackendConfiguration: inference.BackendConfiguration{
+						ContextSize: ctxSize,
+						Speculative: speculativeConfig,
+					},
 				}); err != nil {
 					configErrFmtString := "failed to configure backend for model %s with context-size %d"
 					_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, err)
diff --git a/pkg/inference/scheduling/api.go b/pkg/inference/scheduling/api.go
@@ -93,12 +93,7 @@ type UnloadResponse struct {
 
 // ConfigureRequest specifies per-model runtime configuration options.
 type ConfigureRequest struct {
-	Model       string                               `json:"model"`
-	ContextSize int64                                `json:"context-size,omitempty"`
-	Mode        *inference.BackendMode               `json:"mode,omitempty"`
-	Speculative *inference.SpeculativeDecodingConfig `json:"speculative,omitempty"`
-
-	// Backend-specific configuration
-	VLLM     *inference.VLLMConfig     `json:"vllm,omitempty"`
-	LlamaCpp *inference.LlamaCppConfig `json:"llamacpp,omitempty"`
+	Model string                 `json:"model"`
+	Mode  *inference.BackendMode `json:"mode,omitempty"`
+	inference.BackendConfiguration
 }
diff --git a/pkg/inference/scheduling/http_handler.go b/pkg/inference/scheduling/http_handler.go
@@ -330,7 +330,9 @@ func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
 	}
 
 	configureRequest := ConfigureRequest{
-		ContextSize: -1,
+		BackendConfiguration: inference.BackendConfiguration{
+			ContextSize: -1,
+		},
 	}
 	if err := json.Unmarshal(body, &configureRequest); err != nil {
 		http.Error(w, "invalid request", http.StatusBadRequest)
diff --git a/pkg/ollama/http_handler.go b/pkg/ollama/http_handler.go
@@ -493,8 +493,10 @@ func (h *HTTPHandler) handleGenerate(w http.ResponseWriter, r *http.Request) {
 		// Empty prompt - preload the model
 		// ConfigureRunner is idempotent, so calling it again with the same context size is safe
 		configureRequest := scheduling.ConfigureRequest{
-			Model:       modelName,
-			ContextSize: ctxSize, // Use extracted value (or 0 for default)
+			Model: modelName,
+			BackendConfiguration: inference.BackendConfiguration{
+				ContextSize: ctxSize, // Use extracted value (or 0 for default)
+			},
 		}
 
 		_, err := h.scheduler.ConfigureRunner(ctx, nil, configureRequest, r.UserAgent()+" (Ollama API)")

Original file line number	Diff line number	Diff line change
`@@ -330,7 +330,9 @@ func (h HTTPHandler) Configure(w http.ResponseWriter, r http.Request) {`
`330`	`330`	`}`
`331`	`331`
`332`	`332`	`configureRequest := ConfigureRequest{`
`333`		`- ContextSize: -1,`
	`333`	`+ BackendConfiguration: inference.BackendConfiguration{`
	`334`	`+ ContextSize: -1,`
	`335`	`+ },`
`334`	`336`	`}`
`335`	`337`	`if err := json.Unmarshal(body, &configureRequest); err != nil {`
`336`	`338`	`http.Error(w, "invalid request", http.StatusBadRequest)`