Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/shiny-humans-greet.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@openai/agents-core': minor
---

feat: #663 Added helper functions for ComputerBaseInvoke
34 changes: 30 additions & 4 deletions packages/agents-core/src/computer.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
import * as protocol from './types/protocol';

export type Environment = 'mac' | 'windows' | 'ubuntu' | 'browser';
export type Button = 'left' | 'right' | 'wheel' | 'back' | 'forward';

import { Expand, SnakeToCamelCase } from './types/helpers';
import type { ComputerAction } from './types/protocol';
import { ComputerAction } from './types/protocol';
import type { RunContext } from './runContext';

type Promisable<T> = T | Promise<T>;

/**
* Interface to implement for a computer environment to be used by the agent.
*/
interface ComputerBase {
type ComputerCommon = {
environment: Environment;
dimensions: [number, number];
};

type ComputerBaseMethods = {
screenshot(): Promisable<string>;
click(x: number, y: number, button: Button): Promisable<void>;
doubleClick(x: number, y: number): Promisable<void>;
Expand All @@ -27,7 +32,15 @@ interface ComputerBase {
move(x: number, y: number): Promisable<void>;
keypress(keys: string[]): Promisable<void>;
drag(path: [number, number][]): Promisable<void>;
}
};

type ComputerBase = ComputerCommon & ComputerBaseMethods & {
invoke?: undefined;
};

type ComputerBaseInvoke = ComputerCommon & {
invoke(runContext: RunContext, toolCall: protocol.ComputerUseCallItem): Promisable<string>;
} & { [K in keyof ComputerBaseMethods]: never };

// This turns every snake_case string in the ComputerAction['type'] into a camelCase string
type ActionNames = SnakeToCamelCase<ComputerAction['type']>;
Expand All @@ -38,5 +51,18 @@ type ActionNames = SnakeToCamelCase<ComputerAction['type']>;
* action names beyond those in `ComputerAction` are present.
*/
export type Computer = Expand<
ComputerBase & Record<Exclude<ActionNames, keyof ComputerBase>, never>
| (ComputerBase & Record<Exclude<ActionNames, keyof ComputerBase>, never>)
| (ComputerBaseInvoke & Record<Exclude<'invoke', keyof ComputerBaseInvoke>, never>)
>;

export function isInvokeComputer(computer: Computer): computer is ComputerBaseInvoke {
return typeof (computer as { invoke?: unknown }).invoke === "function";
}

export function asInvokeComputer(
computer: ComputerCommon & {
invoke(runContext: RunContext, toolCall: protocol.ComputerUseCallItem): Promisable<string>
},
): Computer {
return computer as ComputerBaseInvoke;
}
2 changes: 1 addition & 1 deletion packages/agents-core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export {
ToolUseBehavior,
ToolUseBehaviorFlags,
} from './agent';
export { Computer } from './computer';
export { Computer, asInvokeComputer, isInvokeComputer } from './computer';
export { ShellAction, ShellResult, ShellOutputResult, Shell } from './shell';
export { ApplyPatchOperation, ApplyPatchResult, Editor } from './editor';
export {
Expand Down
8 changes: 6 additions & 2 deletions packages/agents-core/src/runImplementation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ import { RunItemStreamEvent, RunItemStreamEventName } from './events';
import { RunResult, StreamedRunResult } from './result';
import { z } from 'zod';
import * as protocol from './types/protocol';
import { Computer } from './computer';
import { Computer, isInvokeComputer } from './computer';
import type { ApplyPatchResult } from './editor';
import { RunState } from './runState';
import { isZodObject } from './utils';
Expand Down Expand Up @@ -1767,7 +1767,11 @@ export async function executeComputerActions(
// Run the action and get screenshot
let output: string;
try {
output = await _runComputerActionAndScreenshot(computer, toolCall);
if (isInvokeComputer(computer)) {
output = await computer.invoke(runContext, toolCall);
} else {
output = await _runComputerActionAndScreenshot(computer, toolCall);
}
} catch (err) {
_logger.error('Failed to execute computer action:', err);
output = '';
Expand Down
86 changes: 84 additions & 2 deletions packages/agents-core/test/runImplementation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import {
} from '../src/tool';
import { handoff } from '../src/handoff';
import { ModelBehaviorError, UserError } from '../src/errors';
import { Computer } from '../src/computer';
import { Computer, asInvokeComputer } from '../src/computer';
import { Usage } from '../src/usage';
import { setTracingDisabled, withTrace } from '../src';

Expand Down Expand Up @@ -1855,7 +1855,7 @@ describe('executeFunctionToolCalls', () => {
});
});

describe('executeComputerActions', () => {
describe('executeComputerActions - original', () => {
function makeComputer(): Computer {
return {
environment: 'mac',
Expand Down Expand Up @@ -1953,6 +1953,88 @@ describe('executeComputerActions', () => {
});
});

describe('executeComputerActions - invoke', () => {
function makeComputer(): Computer {
return asInvokeComputer({
environment: 'mac',
dimensions: [1, 1],
invoke: vi.fn(async () => 'img')
});
}

const actions: protocol.ComputerAction[] = [
{ type: 'click', x: 1, y: 2, button: 'left' },
{ type: 'double_click', x: 2, y: 2 },
{ type: 'drag', path: [{ x: 1, y: 1 }] },
{ type: 'keypress', keys: ['a'] },
{ type: 'move', x: 3, y: 3 },
{ type: 'screenshot' },
{ type: 'scroll', x: 0, y: 0, scroll_x: 0, scroll_y: 1 },
{ type: 'type', text: 'hi' },
{ type: 'wait' },
];

it('invokes computer methods and returns screenshots', async () => {
const comp = makeComputer();
const tool = computerTool({ computer: comp });
const calls = actions.map((a, i) => ({
toolCall: {
id: `id${i}`,
type: 'computer_call',
callId: `id${i}`,
status: 'completed',
action: a,
} as protocol.ComputerUseCallItem,
computer: tool,
}));

const result = await withTrace('test', () =>
executeComputerActions(
new Agent({ name: 'C' }),
calls,
new Runner({ tracingDisabled: true }),
new RunContext(),
),
);

expect(result).toHaveLength(actions.length);
expect(result.every((r) => r instanceof ToolCallOutputItem)).toBe(true);
});

it('throws if computer lacks screenshot', async () => {
const comp: any = {
environment: 'mac',
dimensions: [1, 1],
invoke: vi.fn(async () => null)
};
const tool = computerTool({ computer: comp });
const call = {
toolCall: {
id: 'id',
type: 'computer_call',
callId: 'id',
status: 'completed',
action: { type: 'click', x: 1, y: 1, button: 'left' },
} as protocol.ComputerUseCallItem,
computer: tool,
};
const res = await withTrace('test', () =>
executeComputerActions(
new Agent({ name: 'C' }),
[call],
new Runner({ tracingDisabled: true }),
new RunContext(),
{ error: (_: string) => {} } as unknown as Logger,
),
);

expect(res[0]).toBeInstanceOf(ToolCallOutputItem);
expect(res[0].type).toBe('tool_call_output_item');
expect(res[0].rawItem.type).toBe('computer_call_result');
expect((res[0].rawItem as any).output.data).toBe('');
});
});

describe('executeHandoffCalls', () => {
it('executes single handoff', async () => {
const target = new Agent({ name: 'Target' });
Expand Down