Skip to content

Commit edb758e

Browse files
committed
ConfigureRequest embeds BackendConfiguration to improve readability
1 parent a33e284 commit edb758e

File tree

3 files changed

+10
-11
lines changed

3 files changed

+10
-11
lines changed

pkg/inference/scheduling/api.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,7 @@ type UnloadResponse struct {
9393

9494
// ConfigureRequest specifies per-model runtime configuration options.
9595
type ConfigureRequest struct {
96-
Model string `json:"model"`
97-
ContextSize int64 `json:"context-size,omitempty"`
98-
Mode *inference.BackendMode `json:"mode,omitempty"`
99-
Speculative *inference.SpeculativeDecodingConfig `json:"speculative,omitempty"`
100-
101-
// Backend-specific configuration
102-
VLLM *inference.VLLMConfig `json:"vllm,omitempty"`
103-
LlamaCpp *inference.LlamaCppConfig `json:"llamacpp,omitempty"`
96+
Model string `json:"model"`
97+
Mode *inference.BackendMode `json:"mode,omitempty"`
98+
inference.BackendConfiguration
10499
}

pkg/inference/scheduling/http_handler.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,9 @@ func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
330330
}
331331

332332
configureRequest := ConfigureRequest{
333-
ContextSize: -1,
333+
BackendConfiguration: inference.BackendConfiguration{
334+
ContextSize: -1,
335+
},
334336
}
335337
if err := json.Unmarshal(body, &configureRequest); err != nil {
336338
http.Error(w, "invalid request", http.StatusBadRequest)

pkg/ollama/http_handler.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,10 @@ func (h *HTTPHandler) handleGenerate(w http.ResponseWriter, r *http.Request) {
493493
// Empty prompt - preload the model
494494
// ConfigureRunner is idempotent, so calling it again with the same context size is safe
495495
configureRequest := scheduling.ConfigureRequest{
496-
Model: modelName,
497-
ContextSize: ctxSize, // Use extracted value (or 0 for default)
496+
Model: modelName,
497+
BackendConfiguration: inference.BackendConfiguration{
498+
ContextSize: ctxSize, // Use extracted value (or 0 for default)
499+
},
498500
}
499501

500502
_, err := h.scheduler.ConfigureRunner(ctx, nil, configureRequest, r.UserAgent()+" (Ollama API)")

0 commit comments

Comments
 (0)