Skip to content

Commit 73f6f0e

Browse files
committed
ConfigureRequest embeds BackendConfiguration to improve readability
1 parent a33e284 commit 73f6f0e

File tree

4 files changed

+15
-14
lines changed

4 files changed

+15
-14
lines changed

cmd/cli/commands/compose.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,11 @@ func newUpCommand() *cobra.Command {
8383

8484
for _, model := range models {
8585
if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{
86-
Model: model,
87-
ContextSize: ctxSize,
88-
Speculative: speculativeConfig,
86+
Model: model,
87+
BackendConfiguration: inference.BackendConfiguration{
88+
ContextSize: ctxSize,
89+
Speculative: speculativeConfig,
90+
},
8991
}); err != nil {
9092
configErrFmtString := "failed to configure backend for model %s with context-size %d"
9193
_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, err)

pkg/inference/scheduling/api.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,7 @@ type UnloadResponse struct {
9393

9494
// ConfigureRequest specifies per-model runtime configuration options.
9595
type ConfigureRequest struct {
96-
Model string `json:"model"`
97-
ContextSize int64 `json:"context-size,omitempty"`
98-
Mode *inference.BackendMode `json:"mode,omitempty"`
99-
Speculative *inference.SpeculativeDecodingConfig `json:"speculative,omitempty"`
100-
101-
// Backend-specific configuration
102-
VLLM *inference.VLLMConfig `json:"vllm,omitempty"`
103-
LlamaCpp *inference.LlamaCppConfig `json:"llamacpp,omitempty"`
96+
Model string `json:"model"`
97+
Mode *inference.BackendMode `json:"mode,omitempty"`
98+
inference.BackendConfiguration
10499
}

pkg/inference/scheduling/http_handler.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,9 @@ func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
330330
}
331331

332332
configureRequest := ConfigureRequest{
333-
ContextSize: -1,
333+
BackendConfiguration: inference.BackendConfiguration{
334+
ContextSize: -1,
335+
},
334336
}
335337
if err := json.Unmarshal(body, &configureRequest); err != nil {
336338
http.Error(w, "invalid request", http.StatusBadRequest)

pkg/ollama/http_handler.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,10 @@ func (h *HTTPHandler) handleGenerate(w http.ResponseWriter, r *http.Request) {
493493
// Empty prompt - preload the model
494494
// ConfigureRunner is idempotent, so calling it again with the same context size is safe
495495
configureRequest := scheduling.ConfigureRequest{
496-
Model: modelName,
497-
ContextSize: ctxSize, // Use extracted value (or 0 for default)
496+
Model: modelName,
497+
BackendConfiguration: inference.BackendConfiguration{
498+
ContextSize: ctxSize, // Use extracted value (or 0 for default)
499+
},
498500
}
499501

500502
_, err := h.scheduler.ConfigureRunner(ctx, nil, configureRequest, r.UserAgent()+" (Ollama API)")

0 commit comments

Comments
 (0)