diff --git a/pkg/runtime/toolexec/dispatcher.go b/pkg/runtime/toolexec/dispatcher.go index 4820a878c..bf901243b 100644 --- a/pkg/runtime/toolexec/dispatcher.go +++ b/pkg/runtime/toolexec/dispatcher.go @@ -724,21 +724,13 @@ func buildMultiContent(text string, images []tools.MediaContent) []chat.MessageP parts := make([]chat.MessagePart, 0, 1+len(images)) parts = append(parts, chat.MessagePart{Type: chat.MessagePartTypeText, Text: text}) for _, img := range images { - switch { - case img.FilePath != "": - parts = append(parts, chat.MessagePart{ - Type: chat.MessagePartTypeText, - Text: fmt.Sprintf("[image saved to %s (%s)]", img.FilePath, img.MimeType), - }) - case img.Data != "": - parts = append(parts, chat.MessagePart{ - Type: chat.MessagePartTypeImageURL, - ImageURL: &chat.MessageImageURL{ - URL: "data:" + img.MimeType + ";base64," + img.Data, - Detail: chat.ImageURLDetailAuto, - }, - }) - } + parts = append(parts, chat.MessagePart{ + Type: chat.MessagePartTypeImageURL, + ImageURL: &chat.MessageImageURL{ + URL: "data:" + img.MimeType + ";base64," + img.Data, + Detail: chat.ImageURLDetailAuto, + }, + }) } return parts } diff --git a/pkg/tools/mcp/mcp.go b/pkg/tools/mcp/mcp.go index 84734716f..42208e4fc 100644 --- a/pkg/tools/mcp/mcp.go +++ b/pkg/tools/mcp/mcp.go @@ -160,11 +160,6 @@ type Toolset struct { supervisor *lifecycle.Supervisor - // mediaDir is the toolset-scoped temp dir holding spooled media - // payloads. Created lazily on first spool, removed by Stop. - mediaMu sync.Mutex - mediaDir string - mu sync.Mutex // Cached tools and prompts, invalidated via MCP notifications and @@ -431,7 +426,6 @@ func (ts *Toolset) Start(ctx context.Context) error { // Stop tears the supervisor down. Idempotent. func (ts *Toolset) Stop(ctx context.Context) error { slog.DebugContext(ctx, "Stopping MCP toolset", "server", ts.logID) - defer ts.cleanupMediaDir() if ts.supervisor == nil { return nil } @@ -700,7 +694,7 @@ func (ts *Toolset) callTool(ctx context.Context, toolCall tools.ToolCall) (*tool return nil, fmt.Errorf("failed to call tool: %w", err) } - result := ts.processMCPContent(resp) + result := processMCPContent(resp) slog.DebugContext(ctx, "MCP tool call completed", "tool", toolCall.Function.Name, "output_length", len(result.Output)) slog.DebugContext(ctx, result.Output) return result, nil @@ -720,13 +714,7 @@ func isInitNotificationSendError(err error) bool { return false } -const maxInlineMediaBytes = 256 * 1024 - -// writeMediaFile is a package-level indirection so tests can simulate -// disk failures without manipulating the filesystem. -var writeMediaFile = defaultWriteMediaFile - -func (ts *Toolset) processMCPContent(toolResult *mcp.CallToolResult) *tools.ToolCallResult { +func processMCPContent(toolResult *mcp.CallToolResult) *tools.ToolCallResult { var text strings.Builder var images, audios []tools.MediaContent @@ -735,9 +723,9 @@ func (ts *Toolset) processMCPContent(toolResult *mcp.CallToolResult) *tools.Tool case *mcp.TextContent: text.WriteString(c.Text) case *mcp.ImageContent: - images = append(images, ts.encodeMedia(c.Data, c.MIMEType)) + images = append(images, encodeMedia(c.Data, c.MIMEType)) case *mcp.AudioContent: - audios = append(audios, ts.encodeMedia(c.Data, c.MIMEType)) + audios = append(audios, encodeMedia(c.Data, c.MIMEType)) case *mcp.ResourceLink: if c.Name != "" { // Escape ] in name and ) in URI to prevent broken markdown links. @@ -772,94 +760,12 @@ func (ts *Toolset) processMCPContent(toolResult *mcp.CallToolResult) *tools.Tool } } -// encodeMedia keeps small payloads inline and spools larger ones to disk so the -// session and TUI do not retain duplicate base64 copies. Spooled files live -// under a toolset-scoped temp directory removed by Stop. -func (ts *Toolset) encodeMedia(data []byte, mimeType string) tools.MediaContent { - media := tools.MediaContent{MimeType: mimeType} - if len(data) <= maxInlineMediaBytes { - media.Data = base64.StdEncoding.EncodeToString(data) - return media - } - - dir, err := ts.ensureMediaDir() - if err == nil { - var path string - path, err = writeMediaFile(dir, data, mimeType) - if err == nil { - media.FilePath = path - return media - } - } - slog.Warn("failed to spool MCP media to disk", "mime_type", mimeType, "bytes", len(data), "error", err) - media.Data = base64.StdEncoding.EncodeToString(data) - return media -} - -// ensureMediaDir lazily creates the toolset-scoped temp dir for spooled -// media payloads. The directory is removed by Stop. -func (ts *Toolset) ensureMediaDir() (string, error) { - ts.mediaMu.Lock() - defer ts.mediaMu.Unlock() - if ts.mediaDir != "" { - return ts.mediaDir, nil - } - dir, err := os.MkdirTemp("", "docker-agent-mcp-media-*") - if err != nil { - return "", err - } - ts.mediaDir = dir - return dir, nil -} - -// cleanupMediaDir removes the toolset-scoped media spool directory, if any. -func (ts *Toolset) cleanupMediaDir() { - ts.mediaMu.Lock() - dir := ts.mediaDir - ts.mediaDir = "" - ts.mediaMu.Unlock() - if dir == "" { - return - } - if err := os.RemoveAll(dir); err != nil { - slog.Warn("failed to remove MCP media spool directory", "dir", dir, "error", err) - } -} - -func defaultWriteMediaFile(dir string, data []byte, mimeType string) (string, error) { - f, err := os.CreateTemp(dir, "media-*"+mediaExtension(mimeType)) - if err != nil { - return "", err - } - path := f.Name() - if _, err := f.Write(data); err != nil { - _ = f.Close() - _ = os.Remove(path) - return "", err - } - if err := f.Close(); err != nil { - _ = os.Remove(path) - return "", err - } - return path, nil -} - -func mediaExtension(mimeType string) string { - switch mimeType { - case "image/png": - return ".png" - case "image/jpeg": - return ".jpg" - case "image/gif": - return ".gif" - case "image/webp": - return ".webp" - case "audio/wav", "audio/wave", "audio/x-wav": - return ".wav" - case "audio/mpeg", "audio/mp3": - return ".mp3" - default: - return ".bin" +// encodeMedia re-encodes raw bytes (as decoded by the MCP SDK) back to base64 +// for our internal MediaContent representation. +func encodeMedia(data []byte, mimeType string) tools.MediaContent { + return tools.MediaContent{ + Data: base64.StdEncoding.EncodeToString(data), + MimeType: mimeType, } } diff --git a/pkg/tools/mcp/mcp_test.go b/pkg/tools/mcp/mcp_test.go index 24bdb6a34..997ac7c8f 100644 --- a/pkg/tools/mcp/mcp_test.go +++ b/pkg/tools/mcp/mcp_test.go @@ -1,12 +1,9 @@ package mcp import ( - "bytes" "context" - "errors" "fmt" "iter" - "os" "sync" "sync/atomic" "testing" @@ -481,7 +478,7 @@ func TestProcessMCPContent(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - result := (&Toolset{}).processMCPContent(tt.input) + result := processMCPContent(tt.input) assert.Equal(t, tt.wantOutput, result.Output) assert.Equal(t, tt.wantIsError, result.IsError) @@ -539,75 +536,3 @@ func TestCallToolRecoversFromErrSessionMissing(t *testing.T) { assert.Equal(t, "recovered", result.Output) assert.Equal(t, int32(2), callCount.Load(), "expected exactly 2 CallTool invocations (1 failed + 1 retry)") } - -func TestProcessMCPContentSpoolsLargeMedia(t *testing.T) { - ts := &Toolset{} - t.Cleanup(ts.cleanupMediaDir) - - tests := []struct { - name string - size int - wantInline bool - }{ - {"at threshold stays inline", maxInlineMediaBytes, true}, - {"above threshold spools", maxInlineMediaBytes + 1, false}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - data := bytes.Repeat([]byte("x"), tt.size) - result := ts.processMCPContent(callToolResult(&mcp.ImageContent{Data: data, MIMEType: "image/png"})) - - require.Len(t, result.Images, 1) - img := result.Images[0] - assert.Equal(t, "image/png", img.MimeType) - - if tt.wantInline { - assert.NotEmpty(t, img.Data) - assert.Empty(t, img.FilePath) - return - } - - assert.Empty(t, img.Data) - require.NotEmpty(t, img.FilePath) - - got, err := os.ReadFile(img.FilePath) - require.NoError(t, err) - assert.Equal(t, data, got) - }) - } -} - -func TestEncodeMediaFallsBackToInlineOnDiskFailure(t *testing.T) { - original := writeMediaFile - t.Cleanup(func() { writeMediaFile = original }) - writeMediaFile = func(string, []byte, string) (string, error) { - return "", errors.New("disk full") - } - - ts := &Toolset{} - t.Cleanup(ts.cleanupMediaDir) - - data := bytes.Repeat([]byte("x"), maxInlineMediaBytes+1) - result := ts.processMCPContent(callToolResult(&mcp.ImageContent{Data: data, MIMEType: "image/png"})) - - require.Len(t, result.Images, 1) - img := result.Images[0] - assert.Empty(t, img.FilePath) - assert.NotEmpty(t, img.Data, "falls back to inline base64 when disk write fails") -} - -func TestToolsetStopRemovesMediaDir(t *testing.T) { - ts := &Toolset{} - data := bytes.Repeat([]byte("x"), maxInlineMediaBytes+1) - media := ts.encodeMedia(data, "image/png") - require.NotEmpty(t, media.FilePath) - - _, err := os.Stat(media.FilePath) - require.NoError(t, err) - - require.NoError(t, ts.Stop(t.Context())) - - _, err = os.Stat(media.FilePath) - assert.True(t, os.IsNotExist(err), "spooled media file should be removed by Stop") -} diff --git a/pkg/tools/tools.go b/pkg/tools/tools.go index ca0cf84e3..e96b58acd 100644 --- a/pkg/tools/tools.go +++ b/pkg/tools/tools.go @@ -73,11 +73,8 @@ type FunctionCall struct { // MediaContent represents base64-encoded binary data (image, audio, etc.) // returned by a tool. type MediaContent struct { - // Data is the base64-encoded payload. It is kept only for small media; large - // MCP payloads are spooled to FilePath to avoid retaining duplicate base64. - Data string `json:"data,omitempty"` - // FilePath is an optional local file containing the decoded media payload. - FilePath string `json:"filePath,omitempty"` + // Data is the base64-encoded payload. + Data string `json:"data"` // MimeType identifies the content type (e.g. "image/png", "audio/wav"). MimeType string `json:"mimeType"` }