Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ Pinch:
Swipe timing:
- `swipe` accepts optional `durationMs` (default `250`, range `16..10000`).
- Android uses requested swipe duration directly.
- iOS uses a safe normalized duration to avoid longpress side effects.
- `scrollintoview` accepts either plain text or a snapshot ref (`@eN`); ref mode uses geometry-based scrolling.
- iOS clamps swipe duration to a safe range (`16..60ms`) to avoid longpress side effects.
- `scrollintoview` accepts either plain text or a snapshot ref (`@eN`); ref mode uses best-effort geometry-based scrolling without post-scroll verification. Run `snapshot` again before follow-up `@ref` commands.

## Skills
Install the automation skills listed in [SKILL.md](skills/agent-device/SKILL.md).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ final class RunnerTests: XCTestCase {
private let retryCooldown: TimeInterval = 0.2
private let postSnapshotInteractionDelay: TimeInterval = 0.2
private let firstInteractionAfterActivateDelay: TimeInterval = 0.25
private let scrollInteractionIdleTimeoutDefault: TimeInterval = 1.0
private let minRecordingFps = 1
private let maxRecordingFps = 120
private var needsPostSnapshotInteractionDelay = false
Expand Down Expand Up @@ -712,7 +713,9 @@ final class RunnerTests: XCTestCase {
return Response(ok: false, error: ErrorPayload(message: "drag requires x, y, x2, and y2"))
}
let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0)
dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
withTemporaryScrollIdleTimeoutIfSupported(activeApp) {
dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
}
return Response(ok: true, data: DataPayload(message: "dragged"))
case .dragSeries:
guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else {
Expand All @@ -725,12 +728,14 @@ final class RunnerTests: XCTestCase {
return Response(ok: false, error: ErrorPayload(message: "dragSeries pattern must be one-way or ping-pong"))
}
let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0)
runSeries(count: count, pauseMs: pauseMs) { idx in
let reverse = pattern == "ping-pong" && (idx % 2 == 1)
if reverse {
dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration)
} else {
dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
withTemporaryScrollIdleTimeoutIfSupported(activeApp) {
runSeries(count: count, pauseMs: pauseMs) { idx in
let reverse = pattern == "ping-pong" && (idx % 2 == 1)
if reverse {
dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration)
} else {
dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
}
}
}
return Response(ok: true, data: DataPayload(message: "drag series"))
Expand All @@ -756,7 +761,9 @@ final class RunnerTests: XCTestCase {
guard let direction = command.direction else {
return Response(ok: false, error: ErrorPayload(message: "swipe requires direction"))
}
swipe(app: activeApp, direction: direction)
withTemporaryScrollIdleTimeoutIfSupported(activeApp) {
swipe(app: activeApp, direction: direction)
}
return Response(ok: true, data: DataPayload(message: "swiped"))
case .findText:
guard let text = command.text else {
Expand Down Expand Up @@ -884,6 +891,38 @@ final class RunnerTests: XCTestCase {
return target
}

private func withTemporaryScrollIdleTimeoutIfSupported(
_ target: XCUIApplication,
operation: () -> Void
) {
let setter = NSSelectorFromString("setWaitForIdleTimeout:")
guard target.responds(to: setter) else {
operation()
return
}
let previous = target.value(forKey: "waitForIdleTimeout") as? NSNumber
target.setValue(resolveScrollInteractionIdleTimeout(), forKey: "waitForIdleTimeout")
defer {
if let previous {
target.setValue(previous.doubleValue, forKey: "waitForIdleTimeout")
}
}
operation()
}

private func resolveScrollInteractionIdleTimeout() -> TimeInterval {
guard
let raw = ProcessInfo.processInfo.environment["AGENT_DEVICE_IOS_INTERACTION_IDLE_TIMEOUT"],
!raw.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
else {
return scrollInteractionIdleTimeoutDefault
}
guard let parsed = Double(raw), parsed >= 0 else {
return scrollInteractionIdleTimeoutDefault
}
return min(parsed, 30)
}

private func shouldRetryCommand(_ command: Command) -> Bool {
if isEnvTruthy("AGENT_DEVICE_RUNNER_DISABLE_READONLY_RETRY") {
return false
Expand Down
7 changes: 6 additions & 1 deletion src/core/dispatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ export async function dispatchCommand(

const requestedDurationMs = positionals[4] ? Number(positionals[4]) : 250;
const durationMs = requireIntInRange(requestedDurationMs, 'durationMs', 16, 10_000);
const effectiveDurationMs = device.platform === 'ios' ? 60 : durationMs;
const effectiveDurationMs = device.platform === 'ios' ? clampIosSwipeDuration(durationMs) : durationMs;
const count = requireIntInRange(context?.count ?? 1, 'count', 1, 200);
const pauseMs = requireIntInRange(context?.pauseMs ?? 0, 'pause-ms', 0, 10_000);
const pattern = context?.pattern ?? 'one-way';
Expand Down Expand Up @@ -511,6 +511,11 @@ function requireIntInRange(value: number, name: string, min: number, max: number
return value;
}

function clampIosSwipeDuration(durationMs: number): number {
// Keep iOS swipes stable while allowing explicit fast durations for scroll-heavy flows.
return Math.min(60, Math.max(16, Math.round(durationMs)));
}

export function shouldUseIosTapSeries(
device: DeviceInfo,
count: number,
Expand Down
11 changes: 11 additions & 0 deletions src/daemon/__tests__/scroll-planner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ test('buildScrollIntoViewPlan computes downward content scroll when target is be
assert.ok(plan);
assert.equal(plan?.direction, 'down');
assert.ok((plan?.count ?? 0) > 1);
assert.equal(plan?.x, 80);
assert.equal(plan?.startY, 726);
assert.equal(plan?.endY, 118);
});

test('buildScrollIntoViewPlan returns null when already in safe viewport band', () => {
Expand All @@ -45,3 +48,11 @@ test('buildScrollIntoViewPlan returns null when already in safe viewport band',
assert.equal(plan, null);
assert.equal(isRectWithinSafeViewportBand(targetRect, viewportRect), true);
});

test('buildScrollIntoViewPlan keeps swipe lane inside viewport when target center is out of bounds', () => {
const targetRect = { x: 1000, y: 2100, width: 120, height: 40 };
const viewportRect = { x: 0, y: 0, width: 390, height: 844 };
const plan = buildScrollIntoViewPlan(targetRect, viewportRect);
assert.ok(plan);
assert.equal(plan?.x, 351);
});
25 changes: 5 additions & 20 deletions src/daemon/handlers/__tests__/interaction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ test('scrollintoview @ref dispatches geometry-based swipe series', async () => {
positionals: string[];
context: Record<string, unknown> | undefined;
}> = [];
let snapshotCallCount = 0;
const response = await handleInteractionCommands({
req: {
token: 't',
Expand All @@ -227,24 +226,13 @@ test('scrollintoview @ref dispatches geometry-based swipe series', async () => {
sessionStore,
contextFromFlags,
dispatch: async (_device, command, positionals, _out, context) => {
if (command === 'snapshot') {
snapshotCallCount += 1;
return {
nodes: [
{ index: 0, type: 'Application', rect: { x: 0, y: 0, width: 390, height: 844 } },
{ index: 1, type: 'XCUIElementTypeStaticText', label: 'Far item', rect: { x: 20, y: 320, width: 120, height: 40 } },
],
backend: 'xctest',
};
}
dispatchCalls.push({ command, positionals, context: context as Record<string, unknown> | undefined });
return { ok: true };
},
});

assert.ok(response);
assert.equal(response.ok, true);
assert.equal(snapshotCallCount, 1);
assert.equal(dispatchCalls.length, 1);
assert.equal(dispatchCalls[0]?.command, 'swipe');
assert.equal(dispatchCalls[0]?.positionals.length, 5);
Expand All @@ -259,8 +247,6 @@ test('scrollintoview @ref dispatches geometry-based swipe series', async () => {
assert.equal(stored?.actions[0]?.command, 'scrollintoview');
const result = (stored?.actions[0]?.result ?? {}) as Record<string, unknown>;
assert.equal(result.ref, 'e2');
assert.equal(result.strategy, 'ref-geometry');
assert.equal(result.verified, true);
});

test('scrollintoview @ref returns immediately when target is already in viewport safe band', async () => {
Expand Down Expand Up @@ -313,7 +299,7 @@ test('scrollintoview @ref returns immediately when target is already in viewport
}
});

test('scrollintoview @ref fails if target remains outside viewport after scroll', async () => {
test('scrollintoview @ref does not run post-scroll verification snapshot', async () => {
const sessionStore = makeSessionStore();
const sessionName = 'default';
const session = makeSession(sessionName);
Expand All @@ -335,6 +321,7 @@ test('scrollintoview @ref fails if target remains outside viewport after scroll'
backend: 'xctest',
};
sessionStore.set(sessionName, session);
let snapshotCallCount = 0;

const response = await handleInteractionCommands({
req: {
Expand All @@ -349,6 +336,7 @@ test('scrollintoview @ref fails if target remains outside viewport after scroll'
contextFromFlags,
dispatch: async (_device, command) => {
if (command === 'snapshot') {
snapshotCallCount += 1;
return {
nodes: [
{ index: 0, type: 'Application', rect: { x: 0, y: 0, width: 390, height: 844 } },
Expand All @@ -362,9 +350,6 @@ test('scrollintoview @ref fails if target remains outside viewport after scroll'
});

assert.ok(response);
assert.equal(response.ok, false);
if (!response.ok) {
assert.equal(response.error?.code, 'COMMAND_FAILED');
assert.match(response.error?.message ?? '', /outside viewport/i);
}
assert.equal(response.ok, true);
assert.equal(snapshotCallCount, 0);
});
92 changes: 4 additions & 88 deletions src/daemon/handlers/interaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {
splitSelectorFromArgs,
} from '../selectors.ts';
import { withDiagnosticTimer } from '../../utils/diagnostics.ts';
import { buildScrollIntoViewPlan, isRectWithinSafeViewportBand, resolveViewportRect } from '../scroll-planner.ts';
import { buildScrollIntoViewPlan, resolveViewportRect } from '../scroll-planner.ts';

type ContextFromFlags = (
flags: CommandFlags | undefined,
Expand Down Expand Up @@ -598,14 +598,14 @@ export async function handleInteractionCommands(params: {
command,
positionals: req.positionals ?? [],
flags: req.flags ?? {},
result: { ref, attempts: 0, alreadyVisible: true, strategy: 'ref-geometry', refLabel, selectorChain },
result: { ref, attempts: 0, alreadyVisible: true, refLabel, selectorChain },
});
return { ok: true, data: { ref, attempts: 0, alreadyVisible: true, strategy: 'ref-geometry' } };
return { ok: true, data: { ref, attempts: 0, alreadyVisible: true } };
}
const data = await dispatch(
session.device,
'swipe',
[String(plan.x), String(plan.startY), String(plan.x), String(plan.endY), '60'],
[String(plan.x), String(plan.startY), String(plan.x), String(plan.endY), '16'],
req.flags?.out,
{
...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath),
Expand All @@ -614,15 +614,6 @@ export async function handleInteractionCommands(params: {
pattern: 'one-way',
},
);
const verification = await verifyRefTargetInViewport({
session,
flags: req.flags,
sessionStore,
contextFromFlags,
dispatch,
selectorChain,
});
if (!verification.ok) return verification.response;
sessionStore.recordAction(session, {
command,
positionals: req.positionals ?? [],
Expand All @@ -632,8 +623,6 @@ export async function handleInteractionCommands(params: {
ref,
attempts: plan.count,
direction: plan.direction,
strategy: 'ref-geometry',
verified: true,
refLabel,
selectorChain,
},
Expand All @@ -645,8 +634,6 @@ export async function handleInteractionCommands(params: {
ref,
attempts: plan.count,
direction: plan.direction,
strategy: 'ref-geometry',
verified: true,
},
};
}
Expand Down Expand Up @@ -761,74 +748,3 @@ function resolveRefTarget(params: {
}
return { ok: true, target: { ref, node, snapshotNodes: session.snapshot.nodes } };
}

async function verifyRefTargetInViewport(params: {
session: SessionState;
flags: CommandFlags | undefined;
sessionStore: SessionStore;
contextFromFlags: ContextFromFlags;
dispatch: typeof dispatchCommand;
selectorChain: string[];
}): Promise<{ ok: true } | { ok: false; response: DaemonResponse }> {
const { session, flags, sessionStore, contextFromFlags, dispatch, selectorChain } = params;
if (selectorChain.length === 0) {
return {
ok: false,
response: { ok: false, error: { code: 'COMMAND_FAILED', message: 'scrollintoview verification selector is empty' } },
};
}
let chainExpression = '';
try {
chainExpression = selectorChain.join(' || ');
parseSelectorChain(chainExpression);
} catch {
return {
ok: false,
response: { ok: false, error: { code: 'COMMAND_FAILED', message: 'scrollintoview verification selector is invalid' } },
};
}
const snapshot = await captureSnapshotForSession(
session,
flags,
sessionStore,
contextFromFlags,
{ interactiveOnly: true },
dispatch,
);
const chain = parseSelectorChain(chainExpression);
const resolved = resolveSelectorChain(snapshot.nodes, chain, {
platform: session.device.platform,
requireRect: true,
requireUnique: false,
disambiguateAmbiguous: true,
});
if (!resolved?.node.rect) {
return {
ok: false,
response: {
ok: false,
error: { code: 'COMMAND_FAILED', message: 'scrollintoview target could not be verified after scrolling' },
},
};
}
const viewportRect = resolveViewportRect(snapshot.nodes, resolved.node.rect);
if (!viewportRect) {
return {
ok: false,
response: {
ok: false,
error: { code: 'COMMAND_FAILED', message: 'scrollintoview could not infer viewport during verification' },
},
};
}
if (!isRectWithinSafeViewportBand(resolved.node.rect, viewportRect)) {
return {
ok: false,
response: {
ok: false,
error: { code: 'COMMAND_FAILED', message: 'scrollintoview target is still outside viewport after scrolling' },
},
};
}
return { ok: true };
}
17 changes: 13 additions & 4 deletions src/daemon/scroll-planner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,27 @@ export function resolveViewportRect(nodes: RawSnapshotNode[], targetRect: Rect):

export function buildScrollIntoViewPlan(targetRect: Rect, viewportRect: Rect): ScrollIntoViewPlan | null {
const viewportHeight = Math.max(1, viewportRect.height);
const viewportWidth = Math.max(1, viewportRect.width);
const viewportTop = viewportRect.y;
const viewportBottom = viewportRect.y + viewportHeight;
const viewportLeft = viewportRect.x;
const viewportRight = viewportRect.x + viewportWidth;
const safeTop = viewportTop + viewportHeight * 0.25;
const safeBottom = viewportBottom - viewportHeight * 0.25;
const lanePaddingPx = Math.max(8, viewportWidth * 0.1);
const targetCenterY = targetRect.y + targetRect.height / 2;
const targetCenterX = targetRect.x + targetRect.width / 2;

if (targetCenterY >= safeTop && targetCenterY <= safeBottom) {
return null;
}

const x = Math.round(viewportRect.x + viewportRect.width / 2);
const dragUpStartY = Math.round(viewportTop + viewportHeight * 0.78);
const dragUpEndY = Math.round(viewportTop + viewportHeight * 0.22);
const x = Math.round(clamp(targetCenterX, viewportLeft + lanePaddingPx, viewportRight - lanePaddingPx));
const dragUpStartY = Math.round(viewportTop + viewportHeight * 0.86);
const dragUpEndY = Math.round(viewportTop + viewportHeight * 0.14);
const dragDownStartY = dragUpEndY;
const dragDownEndY = dragUpStartY;
const swipeStepPx = Math.max(1, Math.abs(dragUpStartY - dragUpEndY) * 0.9);
const swipeStepPx = Math.max(1, Math.abs(dragUpStartY - dragUpEndY));

if (targetCenterY > safeBottom) {
const delta = targetCenterY - safeBottom;
Expand Down Expand Up @@ -111,3 +116,7 @@ function pickLargestRect(rects: Rect[]): Rect | null {
function clampInt(value: number, min: number, max: number): number {
return Math.min(max, Math.max(min, Math.round(value)));
}

function clamp(value: number, min: number, max: number): number {
return Math.min(max, Math.max(min, value));
}
Loading
Loading