-
Notifications
You must be signed in to change notification settings - Fork 366
Revert "fix: spool large mcp media to disk" #2893
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -160,11 +160,6 @@ type Toolset struct { | |
|
|
||
| supervisor *lifecycle.Supervisor | ||
|
|
||
| // mediaDir is the toolset-scoped temp dir holding spooled media | ||
| // payloads. Created lazily on first spool, removed by Stop. | ||
| mediaMu sync.Mutex | ||
| mediaDir string | ||
|
|
||
| mu sync.Mutex | ||
|
|
||
| // Cached tools and prompts, invalidated via MCP notifications and | ||
|
|
@@ -431,7 +426,6 @@ func (ts *Toolset) Start(ctx context.Context) error { | |
| // Stop tears the supervisor down. Idempotent. | ||
| func (ts *Toolset) Stop(ctx context.Context) error { | ||
| slog.DebugContext(ctx, "Stopping MCP toolset", "server", ts.logID) | ||
| defer ts.cleanupMediaDir() | ||
| if ts.supervisor == nil { | ||
| return nil | ||
| } | ||
|
|
@@ -700,7 +694,7 @@ func (ts *Toolset) callTool(ctx context.Context, toolCall tools.ToolCall) (*tool | |
| return nil, fmt.Errorf("failed to call tool: %w", err) | ||
| } | ||
|
|
||
| result := ts.processMCPContent(resp) | ||
| result := processMCPContent(resp) | ||
| slog.DebugContext(ctx, "MCP tool call completed", "tool", toolCall.Function.Name, "output_length", len(result.Output)) | ||
| slog.DebugContext(ctx, result.Output) | ||
| return result, nil | ||
|
|
@@ -720,13 +714,7 @@ func isInitNotificationSendError(err error) bool { | |
| return false | ||
| } | ||
|
|
||
| const maxInlineMediaBytes = 256 * 1024 | ||
|
|
||
| // writeMediaFile is a package-level indirection so tests can simulate | ||
| // disk failures without manipulating the filesystem. | ||
| var writeMediaFile = defaultWriteMediaFile | ||
|
|
||
| func (ts *Toolset) processMCPContent(toolResult *mcp.CallToolResult) *tools.ToolCallResult { | ||
| func processMCPContent(toolResult *mcp.CallToolResult) *tools.ToolCallResult { | ||
| var text strings.Builder | ||
| var images, audios []tools.MediaContent | ||
|
|
||
|
|
@@ -735,9 +723,9 @@ func (ts *Toolset) processMCPContent(toolResult *mcp.CallToolResult) *tools.Tool | |
| case *mcp.TextContent: | ||
| text.WriteString(c.Text) | ||
| case *mcp.ImageContent: | ||
| images = append(images, ts.encodeMedia(c.Data, c.MIMEType)) | ||
| images = append(images, encodeMedia(c.Data, c.MIMEType)) | ||
| case *mcp.AudioContent: | ||
| audios = append(audios, ts.encodeMedia(c.Data, c.MIMEType)) | ||
| audios = append(audios, encodeMedia(c.Data, c.MIMEType)) | ||
| case *mcp.ResourceLink: | ||
| if c.Name != "" { | ||
| // Escape ] in name and ) in URI to prevent broken markdown links. | ||
|
|
@@ -772,94 +760,12 @@ func (ts *Toolset) processMCPContent(toolResult *mcp.CallToolResult) *tools.Tool | |
| } | ||
| } | ||
|
|
||
| // encodeMedia keeps small payloads inline and spools larger ones to disk so the | ||
| // session and TUI do not retain duplicate base64 copies. Spooled files live | ||
| // under a toolset-scoped temp directory removed by Stop. | ||
| func (ts *Toolset) encodeMedia(data []byte, mimeType string) tools.MediaContent { | ||
| media := tools.MediaContent{MimeType: mimeType} | ||
| if len(data) <= maxInlineMediaBytes { | ||
| media.Data = base64.StdEncoding.EncodeToString(data) | ||
| return media | ||
| } | ||
|
|
||
| dir, err := ts.ensureMediaDir() | ||
| if err == nil { | ||
| var path string | ||
| path, err = writeMediaFile(dir, data, mimeType) | ||
| if err == nil { | ||
| media.FilePath = path | ||
| return media | ||
| } | ||
| } | ||
| slog.Warn("failed to spool MCP media to disk", "mime_type", mimeType, "bytes", len(data), "error", err) | ||
| media.Data = base64.StdEncoding.EncodeToString(data) | ||
| return media | ||
| } | ||
|
|
||
| // ensureMediaDir lazily creates the toolset-scoped temp dir for spooled | ||
| // media payloads. The directory is removed by Stop. | ||
| func (ts *Toolset) ensureMediaDir() (string, error) { | ||
| ts.mediaMu.Lock() | ||
| defer ts.mediaMu.Unlock() | ||
| if ts.mediaDir != "" { | ||
| return ts.mediaDir, nil | ||
| } | ||
| dir, err := os.MkdirTemp("", "docker-agent-mcp-media-*") | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
| ts.mediaDir = dir | ||
| return dir, nil | ||
| } | ||
|
|
||
| // cleanupMediaDir removes the toolset-scoped media spool directory, if any. | ||
| func (ts *Toolset) cleanupMediaDir() { | ||
| ts.mediaMu.Lock() | ||
| dir := ts.mediaDir | ||
| ts.mediaDir = "" | ||
| ts.mediaMu.Unlock() | ||
| if dir == "" { | ||
| return | ||
| } | ||
| if err := os.RemoveAll(dir); err != nil { | ||
| slog.Warn("failed to remove MCP media spool directory", "dir", dir, "error", err) | ||
| } | ||
| } | ||
|
|
||
| func defaultWriteMediaFile(dir string, data []byte, mimeType string) (string, error) { | ||
| f, err := os.CreateTemp(dir, "media-*"+mediaExtension(mimeType)) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
| path := f.Name() | ||
| if _, err := f.Write(data); err != nil { | ||
| _ = f.Close() | ||
| _ = os.Remove(path) | ||
| return "", err | ||
| } | ||
| if err := f.Close(); err != nil { | ||
| _ = os.Remove(path) | ||
| return "", err | ||
| } | ||
| return path, nil | ||
| } | ||
|
|
||
| func mediaExtension(mimeType string) string { | ||
| switch mimeType { | ||
| case "image/png": | ||
| return ".png" | ||
| case "image/jpeg": | ||
| return ".jpg" | ||
| case "image/gif": | ||
| return ".gif" | ||
| case "image/webp": | ||
| return ".webp" | ||
| case "audio/wav", "audio/wave", "audio/x-wav": | ||
| return ".wav" | ||
| case "audio/mpeg", "audio/mp3": | ||
| return ".mp3" | ||
| default: | ||
| return ".bin" | ||
| // encodeMedia re-encodes raw bytes (as decoded by the MCP SDK) back to base64 | ||
| // for our internal MediaContent representation. | ||
| func encodeMedia(data []byte, mimeType string) tools.MediaContent { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [MEDIUM] Memory pressure regression: large MCP media always held in-memory as base64 The reverted With this revert, a single MCP tool response containing, e.g., a 10 MB screenshot will allocate ~13 MB of base64 in RAM, duplicated across the session history and the TUI. Multiple such responses compound quickly. If the spool approach is being dropped intentionally, consider at minimum documenting the decision or adding a size cap/warning log for unexpectedly large payloads. |
||
| return tools.MediaContent{ | ||
| Data: base64.StdEncoding.EncodeToString(data), | ||
| MimeType: mimeType, | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,11 +73,8 @@ type FunctionCall struct { | |
| // MediaContent represents base64-encoded binary data (image, audio, etc.) | ||
| // returned by a tool. | ||
| type MediaContent struct { | ||
| // Data is the base64-encoded payload. It is kept only for small media; large | ||
| // MCP payloads are spooled to FilePath to avoid retaining duplicate base64. | ||
| Data string `json:"data,omitempty"` | ||
| // FilePath is an optional local file containing the decoded media payload. | ||
| FilePath string `json:"filePath,omitempty"` | ||
| // Data is the base64-encoded payload. | ||
| Data string `json:"data"` | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [LOW] Removing The field tag changed from In the current code path |
||
| // MimeType identifies the content type (e.g. "image/png", "audio/wav"). | ||
| MimeType string `json:"mimeType"` | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[MEDIUM] Missing empty-data guard may produce malformed data URIs
The previous
buildMultiContenthad acase img.Data != "":guard that silently skipped anyMediaContentwith an emptyDatafield. The restored code unconditionally builds aMessagePartfor every image:If
img.Datais empty (e.g., an MCP server returns anImageContentwith a zero-lengthDataslice —base64.StdEncoding.EncodeToString(nil)returns""), the resulting URL is"data:image/png;base64,"— an invalid data URI that will be sent verbatim to the LLM API. Most LLM clients will return an error or silently fail on such a URL.Since
encodeMediaalways callsbase64.StdEncoding.EncodeToString, this can only trigger when an MCP server sends an emptyImageContent.Data, but the old guard handled that defensively. Consider adding back a check: