diff --git a/README.md b/README.md index 67d52bdb..1760d774 100644 --- a/README.md +++ b/README.md @@ -34,14 +34,15 @@ npx agent-device open SampleApp ## Quick Start Use refs for agent-driven exploration and normal automation flows. +Use `press` as the canonical tap command; `click` is an equivalent alias. ```bash agent-device open Contacts --platform ios # creates session on iOS Simulator agent-device snapshot -agent-device click @e5 +agent-device press @e5 agent-device fill @e6 "John" agent-device fill @e7 "Doe" -agent-device click @e3 +agent-device press @e3 agent-device close ``` @@ -56,7 +57,7 @@ Basic flow: ```bash agent-device open SampleApp agent-device snapshot -agent-device click @e7 +agent-device press @e7 agent-device fill @e8 "hello" agent-device close SampleApp ``` @@ -73,19 +74,22 @@ agent-device trace stop ./trace.log Coordinates: - All coordinate-based commands (`press`, `long-press`, `swipe`, `focus`, `fill`) use device coordinates with origin at top-left. - X increases to the right, Y increases downward. +- `press` is the canonical tap command. +- `click` is an equivalent alias and accepts the same targets (`x y`, `@ref`, selector) and flags. Gesture series examples: ```bash agent-device press 300 500 --count 12 --interval-ms 45 agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2 +agent-device press @e5 --count 5 --double-tap agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong ``` ## Command Index - `boot`, `open`, `close`, `reinstall`, `home`, `back`, `app-switcher` - `snapshot`, `find`, `get` -- `click`, `focus`, `type`, `fill`, `press`, `long-press`, `swipe`, `scroll`, `scrollintoview`, `pinch`, `is` +- `press` (alias: `click`), `focus`, `type`, `fill`, `long-press`, `swipe`, `scroll`, `scrollintoview`, `pinch`, `is` - `alert`, `wait`, `screenshot` - `trace start`, `trace stop` - `settings wifi|airplane|location on|off` @@ -110,6 +114,7 @@ Flags: - `--interval-ms ` delay between `press` iterations - `--hold-ms ` hold duration per `press` iteration - `--jitter-px ` deterministic coordinate jitter for `press` +- `--double-tap` use a double-tap gesture per `press`/`click` iteration (cannot be combined with `--hold-ms` or `--jitter-px`) - `--pause-ms ` delay between `swipe` iterations - `--pattern one-way|ping-pong` repeat pattern for `swipe` - `--verbose` for daemon and runner logs diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift index b5e36118..0a6c2599 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift @@ -219,6 +219,7 @@ final class RunnerTests: XCTestCase { let normalizedBundleId = command.appBundleId? .trimmingCharacters(in: .whitespacesAndNewlines) let requestedBundleId = (normalizedBundleId?.isEmpty == true) ? nil : normalizedBundleId + let switchedApp: Bool if let bundleId = requestedBundleId, currentBundleId != bundleId { let target = XCUIApplication(bundleIdentifier: bundleId) NSLog("AGENT_DEVICE_RUNNER_ACTIVATE bundle=%@ state=%d", bundleId, target.state.rawValue) @@ -226,13 +227,19 @@ final class RunnerTests: XCTestCase { target.activate() currentApp = target currentBundleId = bundleId + switchedApp = true } else if requestedBundleId == nil { // Do not reuse stale bundle targets when the caller does not explicitly request one. currentApp = nil currentBundleId = nil + switchedApp = false + } else { + switchedApp = false } let activeApp = currentApp ?? app - _ = activeApp.waitForExistence(timeout: 5) + if switchedApp { + _ = activeApp.waitForExistence(timeout: 5) + } switch command.command { case .shutdown: @@ -250,6 +257,23 @@ final class RunnerTests: XCTestCase { return Response(ok: true, data: DataPayload(message: "tapped")) } return Response(ok: false, error: ErrorPayload(message: "tap requires text or x/y")) + case .tapSeries: + guard let x = command.x, let y = command.y else { + return Response(ok: false, error: ErrorPayload(message: "tapSeries requires x and y")) + } + let count = max(Int(command.count ?? 1), 1) + let intervalMs = max(command.intervalMs ?? 0, 0) + let doubleTap = command.doubleTap ?? false + if doubleTap { + runSeries(count: count, pauseMs: intervalMs) { _ in + doubleTapAt(app: activeApp, x: x, y: y) + } + return Response(ok: true, data: DataPayload(message: "tap series")) + } + runSeries(count: count, pauseMs: intervalMs) { _ in + tapAt(app: activeApp, x: x, y: y) + } + return Response(ok: true, data: DataPayload(message: "tap series")) case .longPress: guard let x = command.x, let y = command.y else { return Response(ok: false, error: ErrorPayload(message: "longPress requires x and y")) @@ -264,6 +288,26 @@ final class RunnerTests: XCTestCase { let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0) dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) return Response(ok: true, data: DataPayload(message: "dragged")) + case .dragSeries: + guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else { + return Response(ok: false, error: ErrorPayload(message: "dragSeries requires x, y, x2, and y2")) + } + let count = max(Int(command.count ?? 1), 1) + let pauseMs = max(command.pauseMs ?? 0, 0) + let pattern = command.pattern ?? "one-way" + if pattern != "one-way" && pattern != "ping-pong" { + return Response(ok: false, error: ErrorPayload(message: "dragSeries pattern must be one-way or ping-pong")) + } + let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0) + runSeries(count: count, pauseMs: pauseMs) { idx in + let reverse = pattern == "ping-pong" && (idx % 2 == 1) + if reverse { + dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration) + } else { + dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) + } + } + return Response(ok: true, data: DataPayload(message: "drag series")) case .type: guard let text = command.text else { return Response(ok: false, error: ErrorPayload(message: "type requires text")) @@ -443,6 +487,12 @@ final class RunnerTests: XCTestCase { coordinate.tap() } + private func doubleTapAt(app: XCUIApplication, x: Double, y: Double) { + let origin = app.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0)) + let coordinate = origin.withOffset(CGVector(dx: x, dy: y)) + coordinate.doubleTap() + } + private func longPressAt(app: XCUIApplication, x: Double, y: Double, duration: TimeInterval) { let origin = app.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0)) let coordinate = origin.withOffset(CGVector(dx: x, dy: y)) @@ -463,6 +513,17 @@ final class RunnerTests: XCTestCase { start.press(forDuration: holdDuration, thenDragTo: end) } + private func runSeries(count: Int, pauseMs: Double, operation: (Int) -> Void) { + let total = max(count, 1) + let pause = max(pauseMs, 0) + for idx in 0.. 0 { + Thread.sleep(forTimeInterval: pause / 1000.0) + } + } + } + private func swipe(app: XCUIApplication, direction: SwipeDirection) { let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app let start = target.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.2)) @@ -982,8 +1043,10 @@ private func resolveRunnerPort() -> UInt16 { enum CommandType: String, Codable { case tap + case tapSeries case longPress case drag + case dragSeries case type case swipe case findText @@ -1012,6 +1075,11 @@ struct Command: Codable { let action: String? let x: Double? let y: Double? + let count: Double? + let intervalMs: Double? + let doubleTap: Bool? + let pauseMs: Double? + let pattern: String? let x2: Double? let y2: Double? let durationMs: Double? diff --git a/skills/agent-device/SKILL.md b/skills/agent-device/SKILL.md index ff54d6df..7442d437 100644 --- a/skills/agent-device/SKILL.md +++ b/skills/agent-device/SKILL.md @@ -12,7 +12,7 @@ For agent-driven exploration: use refs. For deterministic replay scripts: use se ```bash agent-device open Settings --platform ios agent-device snapshot -i -agent-device click @e3 +agent-device press @e3 agent-device wait text "Camera" agent-device alert wait 10000 agent-device fill @e5 "test" @@ -29,7 +29,7 @@ npx -y agent-device 1. Open app or deep link: `open [app|url] [url]` (`open` handles target selection + boot/activation in the normal flow) 2. Snapshot: `snapshot` to get refs from accessibility tree -3. Interact using refs (`click @ref`, `fill @ref "text"`) +3. Interact using refs (`press @ref`, `fill @ref "text"`; `click` is an alias of `press`) 4. Re-snapshot after navigation/UI changes 5. Close session when done @@ -109,13 +109,15 @@ agent-device appstate ### Interactions (use @refs from snapshot) ```bash -agent-device click @e1 +agent-device press @e1 # Canonical tap command (`click` is an alias) agent-device focus @e2 agent-device fill @e2 "text" # Clear then type (Android: verifies value and retries once on mismatch) agent-device type "text" # Type into focused field without clearing agent-device press 300 500 # Tap by coordinates agent-device press 300 500 --count 12 --interval-ms 45 agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2 +agent-device press @e1 --count 5 # Repeat taps on the same target +agent-device press @e1 --count 5 --double-tap # Use double-tap gesture per iteration agent-device swipe 540 1500 540 500 120 agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong agent-device long-press 300 500 800 # Long press (where supported) @@ -178,7 +180,10 @@ agent-device apps --platform android --user-installed ## Best practices -- `press` supports gesture series controls: `--count`, `--interval-ms`, `--hold-ms`, `--jitter-px`. +- `press` is the canonical tap command; `click` is an alias with the same behavior. +- `press` (and `click`) accepts `x y`, `@ref`, and selector targets. +- `press`/`click` support gesture series controls: `--count`, `--interval-ms`, `--hold-ms`, `--jitter-px`, `--double-tap`. +- `--double-tap` cannot be combined with `--hold-ms` or `--jitter-px`. - `swipe` supports coordinate + timing controls and repeat patterns: `swipe x1 y1 x2 y2 [durationMs] --count --pause-ms --pattern`. - `swipe` timing is platform-safe: Android uses requested duration; iOS uses normalized safe timing to avoid long-press side effects. - Pinch (`pinch [x y]`) is iOS simulator-only; scale > 1 zooms in, < 1 zooms out. diff --git a/skills/agent-device/references/snapshot-refs.md b/skills/agent-device/references/snapshot-refs.md index 7dd7e4e1..e9c448c5 100644 --- a/skills/agent-device/references/snapshot-refs.md +++ b/skills/agent-device/references/snapshot-refs.md @@ -3,6 +3,7 @@ ## Purpose Refs are useful for discovery/debugging. For deterministic scripts, use selectors. +For tap interactions, `press` is canonical; `click` is an equivalent alias. ## Snapshot @@ -24,14 +25,14 @@ App: com.apple.Preferences ## Using refs (discovery/debug) ```bash -agent-device click @e2 +agent-device press @e2 agent-device fill @e5 "test" ``` ## Using selectors (deterministic) ```bash -agent-device click 'id="camera_row" || label="Camera" role=button' +agent-device press 'id="camera_row" || label="Camera" role=button' agent-device fill 'id="search_input" editable=true' "test" agent-device is visible 'id="camera_settings_anchor"' ``` diff --git a/src/cli.ts b/src/cli.ts index 05777a51..66075af3 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -146,12 +146,12 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS): if (logTailStopper) logTailStopper(); return; } - if (command === 'click') { + if (command === 'click' || command === 'press') { const ref = (response.data as any)?.ref ?? ''; const x = (response.data as any)?.x; const y = (response.data as any)?.y; if (ref && typeof x === 'number' && typeof y === 'number') { - process.stdout.write(`Clicked @${ref} (${x}, ${y})\n`); + process.stdout.write(`Tapped @${ref} (${x}, ${y})\n`); } if (logTailStopper) logTailStopper(); return; diff --git a/src/core/__tests__/dispatch-press.test.ts b/src/core/__tests__/dispatch-press.test.ts new file mode 100644 index 00000000..64885655 --- /dev/null +++ b/src/core/__tests__/dispatch-press.test.ts @@ -0,0 +1,43 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { shouldUseIosDragSeries, shouldUseIosTapSeries } from '../dispatch.ts'; +import type { DeviceInfo } from '../../utils/device.ts'; + +const iosDevice: DeviceInfo = { + platform: 'ios', + id: 'ios-1', + name: 'iPhone 15', + kind: 'simulator', + booted: true, +}; + +const androidDevice: DeviceInfo = { + platform: 'android', + id: 'android-1', + name: 'Pixel', + kind: 'emulator', + booted: true, +}; + +test('shouldUseIosTapSeries enables fast path for repeated plain iOS taps', () => { + assert.equal(shouldUseIosTapSeries(iosDevice, 5, 0, 0), true); +}); + +test('shouldUseIosTapSeries disables fast path for single press or modified gestures', () => { + assert.equal(shouldUseIosTapSeries(iosDevice, 1, 0, 0), false); + assert.equal(shouldUseIosTapSeries(iosDevice, 5, 100, 0), false); + assert.equal(shouldUseIosTapSeries(iosDevice, 5, 0, 1), false); +}); + +test('shouldUseIosTapSeries disables fast path for non-iOS devices', () => { + assert.equal(shouldUseIosTapSeries(androidDevice, 5, 0, 0), false); +}); + +test('shouldUseIosDragSeries enables fast path for repeated iOS swipes', () => { + assert.equal(shouldUseIosDragSeries(iosDevice, 3), true); +}); + +test('shouldUseIosDragSeries disables fast path for single swipe and non-iOS', () => { + assert.equal(shouldUseIosDragSeries(iosDevice, 1), false); + assert.equal(shouldUseIosDragSeries(androidDevice, 3), false); +}); diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index 0703a6b9..922e7c68 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -74,6 +74,7 @@ export async function dispatchCommand( intervalMs?: number; holdMs?: number; jitterPx?: number; + doubleTap?: boolean; pauseMs?: number; pattern?: 'one-way' | 'ping-pong'; }, @@ -131,17 +132,45 @@ export async function dispatchCommand( const intervalMs = requireIntInRange(context?.intervalMs ?? 0, 'interval-ms', 0, 10_000); const holdMs = requireIntInRange(context?.holdMs ?? 0, 'hold-ms', 0, 10_000); const jitterPx = requireIntInRange(context?.jitterPx ?? 0, 'jitter-px', 0, 100); + const doubleTap = context?.doubleTap === true; - for (let index = 0; index < count; index += 1) { + if (doubleTap && holdMs > 0) { + throw new AppError('INVALID_ARGS', 'double-tap cannot be combined with hold-ms'); + } + if (doubleTap && jitterPx > 0) { + throw new AppError('INVALID_ARGS', 'double-tap cannot be combined with jitter-px'); + } + + if (shouldUseIosTapSeries(device, count, holdMs, jitterPx)) { + await runIosRunnerCommand( + device, + { + command: 'tapSeries', + x, + y, + count, + intervalMs, + doubleTap, + appBundleId: context?.appBundleId, + }, + { verbose: context?.verbose, logPath: context?.logPath, traceLogPath: context?.traceLogPath }, + ); + return { x, y, count, intervalMs, holdMs, jitterPx, doubleTap, timingMode: 'runner-series' }; + } + + await runRepeatedSeries(count, intervalMs, async (index) => { const [dx, dy] = computeDeterministicJitter(index, jitterPx); const targetX = x + dx; const targetY = y + dy; + if (doubleTap) { + await interactor.doubleTap(targetX, targetY); + return; + } if (holdMs > 0) await interactor.longPress(targetX, targetY, holdMs); else await interactor.tap(targetX, targetY); - if (index < count - 1 && intervalMs > 0) await sleep(intervalMs); - } + }); - return { x, y, count, intervalMs, holdMs, jitterPx }; + return { x, y, count, intervalMs, holdMs, jitterPx, doubleTap }; } case 'swipe': { const x1 = Number(positionals[0]); @@ -162,12 +191,42 @@ export async function dispatchCommand( throw new AppError('INVALID_ARGS', `Invalid pattern: ${pattern}`); } - for (let index = 0; index < count; index += 1) { + if (shouldUseIosDragSeries(device, count)) { + await runIosRunnerCommand( + device, + { + command: 'dragSeries', + x: x1, + y: y1, + x2, + y2, + durationMs: effectiveDurationMs, + count, + pauseMs, + pattern, + appBundleId: context?.appBundleId, + }, + { verbose: context?.verbose, logPath: context?.logPath, traceLogPath: context?.traceLogPath }, + ); + return { + x1, + y1, + x2, + y2, + durationMs, + effectiveDurationMs, + timingMode: 'runner-series', + count, + pauseMs, + pattern, + }; + } + + await runRepeatedSeries(count, pauseMs, async (index) => { const reverse = pattern === 'ping-pong' && index % 2 === 1; if (reverse) await interactor.swipe(x2, y2, x1, y1, effectiveDurationMs); else await interactor.swipe(x1, y1, x2, y2, effectiveDurationMs); - if (index < count - 1 && pauseMs > 0) await sleep(pauseMs); - } + }); return { x1, @@ -357,12 +416,38 @@ function requireIntInRange(value: number, name: string, min: number, max: number return value; } +export function shouldUseIosTapSeries( + device: DeviceInfo, + count: number, + holdMs: number, + jitterPx: number, +): boolean { + return device.platform === 'ios' && count > 1 && holdMs === 0 && jitterPx === 0; +} + +export function shouldUseIosDragSeries(device: DeviceInfo, count: number): boolean { + return device.platform === 'ios' && count > 1; +} + function computeDeterministicJitter(index: number, jitterPx: number): [number, number] { if (jitterPx <= 0) return [0, 0]; const [dx, dy] = DETERMINISTIC_JITTER_PATTERN[index % DETERMINISTIC_JITTER_PATTERN.length]; return [dx * jitterPx, dy * jitterPx]; } +async function runRepeatedSeries( + count: number, + pauseMs: number, + operation: (index: number) => Promise, +): Promise { + for (let index = 0; index < count; index += 1) { + await operation(index); + if (index < count - 1 && pauseMs > 0) { + await sleep(pauseMs); + } + } +} + async function sleep(ms: number): Promise { await new Promise((resolve) => setTimeout(resolve, ms)); } diff --git a/src/daemon.ts b/src/daemon.ts index f72b710b..4a4a289c 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -55,15 +55,16 @@ async function handleRequest(req: DaemonRequest): Promise { return { ok: false, error: { code: 'UNAUTHORIZED', message: 'Invalid token' } }; } - const command = req.command; - const sessionName = resolveEffectiveSessionName(req, sessionStore); + const normalizedReq = normalizeAliasedCommands(req); + const command = normalizedReq.command; + const sessionName = resolveEffectiveSessionName(normalizedReq, sessionStore); const existingSession = sessionStore.get(sessionName); if (existingSession && !selectorValidationExemptCommands.has(command)) { - assertSessionSelectorMatches(existingSession, req.flags); + assertSessionSelectorMatches(existingSession, normalizedReq.flags); } const sessionResponse = await handleSessionCommands({ - req, + req: normalizedReq, sessionName, logPath, sessionStore, @@ -72,7 +73,7 @@ async function handleRequest(req: DaemonRequest): Promise { if (sessionResponse) return sessionResponse; const snapshotResponse = await handleSnapshotCommands({ - req, + req: normalizedReq, sessionName, logPath, sessionStore, @@ -80,14 +81,14 @@ async function handleRequest(req: DaemonRequest): Promise { if (snapshotResponse) return snapshotResponse; const recordTraceResponse = await handleRecordTraceCommands({ - req, + req: normalizedReq, sessionName, sessionStore, }); if (recordTraceResponse) return recordTraceResponse; const findResponse = await handleFindCommands({ - req, + req: normalizedReq, sessionName, logPath, sessionStore, @@ -96,7 +97,7 @@ async function handleRequest(req: DaemonRequest): Promise { if (findResponse) return findResponse; const interactionResponse = await handleInteractionCommands({ - req, + req: normalizedReq, sessionName, sessionStore, contextFromFlags, @@ -119,18 +120,23 @@ async function handleRequest(req: DaemonRequest): Promise { }; } - const data = await dispatchCommand(session.device, command, req.positionals ?? [], req.flags?.out, { - ...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath), + const data = await dispatchCommand(session.device, command, normalizedReq.positionals ?? [], normalizedReq.flags?.out, { + ...contextFromFlags(normalizedReq.flags, session.appBundleId, session.trace?.outPath), }); sessionStore.recordAction(session, { command, - positionals: req.positionals ?? [], - flags: req.flags ?? {}, + positionals: normalizedReq.positionals ?? [], + flags: normalizedReq.flags ?? {}, result: data ?? {}, }); return { ok: true, data: data ?? {} }; } +function normalizeAliasedCommands(req: DaemonRequest): DaemonRequest { + if (req.command !== 'click') return req; + return { ...req, command: 'press' }; +} + function writeInfo(port: number): void { if (!fs.existsSync(baseDir)) fs.mkdirSync(baseDir, { recursive: true }); fs.writeFileSync(logPath, ''); diff --git a/src/daemon/__tests__/session-store.test.ts b/src/daemon/__tests__/session-store.test.ts index e611970b..343fd47f 100644 --- a/src/daemon/__tests__/session-store.test.ts +++ b/src/daemon/__tests__/session-store.test.ts @@ -140,3 +140,63 @@ test('writeSessionLog persists open --relaunch in script output', () => { const script = fs.readFileSync(path.join(root, scriptFile!), 'utf8'); assert.match(script, /open "Settings" --relaunch/); }); + +test('writeSessionLog preserves interaction series flags for click/press/swipe', () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-session-log-series-flags-')); + const store = new SessionStore(root); + const session = makeSession('default'); + store.recordAction(session, { + command: 'open', + positionals: ['Settings'], + flags: { platform: 'ios', saveScript: true }, + result: {}, + }); + store.recordAction(session, { + command: 'click', + positionals: ['id="continue_button"'], + flags: { + platform: 'ios', + count: 5, + intervalMs: 1, + holdMs: 2, + jitterPx: 3, + doubleTap: true, + }, + result: {}, + }); + store.recordAction(session, { + command: 'press', + positionals: ['201', '545'], + flags: { + platform: 'ios', + count: 4, + intervalMs: 8, + }, + result: {}, + }); + store.recordAction(session, { + command: 'swipe', + positionals: ['10', '20', '30', '40'], + flags: { + platform: 'ios', + count: 3, + pauseMs: 12, + pattern: 'ping-pong', + }, + result: {}, + }); + store.recordAction(session, { + command: 'close', + positionals: [], + flags: { platform: 'ios' }, + result: {}, + }); + + store.writeSessionLog(session); + const scriptFile = fs.readdirSync(root).find((file) => file.endsWith('.ad')); + assert.ok(scriptFile); + const script = fs.readFileSync(path.join(root, scriptFile!), 'utf8'); + assert.match(script, /click "id=\\"continue_button\\"" --count 5 --interval-ms 1 --hold-ms 2 --jitter-px 3 --double-tap/); + assert.match(script, /press 201 545 --count 4 --interval-ms 8/); + assert.match(script, /swipe 10 20 30 40 --count 3 --pause-ms 12 --pattern ping-pong/); +}); diff --git a/src/daemon/context.ts b/src/daemon/context.ts index 169d980f..5b7ee2ed 100644 --- a/src/daemon/context.ts +++ b/src/daemon/context.ts @@ -15,6 +15,7 @@ export type DaemonCommandContext = { intervalMs?: number; holdMs?: number; jitterPx?: number; + doubleTap?: boolean; pauseMs?: number; pattern?: 'one-way' | 'ping-pong'; }; @@ -40,6 +41,7 @@ export function contextFromFlags( intervalMs: flags?.intervalMs, holdMs: flags?.holdMs, jitterPx: flags?.jitterPx, + doubleTap: flags?.doubleTap, pauseMs: flags?.pauseMs, pattern: flags?.pattern, }; diff --git a/src/daemon/handlers/__tests__/interaction.test.ts b/src/daemon/handlers/__tests__/interaction.test.ts index 233baeed..04abf1d5 100644 --- a/src/daemon/handlers/__tests__/interaction.test.ts +++ b/src/daemon/handlers/__tests__/interaction.test.ts @@ -1,6 +1,42 @@ import test from 'node:test'; import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; import { unsupportedRefSnapshotFlags } from '../interaction.ts'; +import { handleInteractionCommands } from '../interaction.ts'; +import { SessionStore } from '../../session-store.ts'; +import type { SessionState } from '../../types.ts'; +import type { CommandFlags } from '../../../core/dispatch.ts'; +import { attachRefs } from '../../../utils/snapshot.ts'; + +function makeSessionStore(): SessionStore { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-interaction-handler-')); + return new SessionStore(path.join(root, 'sessions')); +} + +function makeSession(name: string): SessionState { + return { + name, + device: { + platform: 'ios', + id: 'sim-1', + name: 'iPhone 17 Pro', + kind: 'simulator', + booted: true, + }, + createdAt: Date.now(), + actions: [], + }; +} + +const contextFromFlags = (flags: CommandFlags | undefined) => ({ + count: flags?.count, + intervalMs: flags?.intervalMs, + holdMs: flags?.holdMs, + jitterPx: flags?.jitterPx, + doubleTap: flags?.doubleTap, +}); test('unsupportedRefSnapshotFlags returns unsupported snapshot flags for @ref flows', () => { const unsupported = unsupportedRefSnapshotFlags({ @@ -19,3 +55,133 @@ test('unsupportedRefSnapshotFlags returns empty when no ref-unsupported flags ar }); assert.deepEqual(unsupported, []); }); + +test('press coordinates dispatches press and records as press', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'default'; + sessionStore.set(sessionName, makeSession(sessionName)); + + const dispatchCalls: Array<{ command: string; positionals: string[]; context: Record | undefined }> = + []; + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['100', '200'], + flags: { count: 3, intervalMs: 1, doubleTap: true }, + }, + sessionName, + sessionStore, + contextFromFlags, + dispatch: async (_device, command, positionals, _out, context) => { + dispatchCalls.push({ command, positionals, context: context as Record | undefined }); + return { ok: true }; + }, + }); + + assert.ok(response); + assert.equal(response.ok, true); + assert.equal(dispatchCalls.length, 1); + assert.equal(dispatchCalls[0]?.command, 'press'); + assert.deepEqual(dispatchCalls[0]?.positionals, ['100', '200']); + assert.equal(dispatchCalls[0]?.context?.count, 3); + assert.equal(dispatchCalls[0]?.context?.intervalMs, 1); + assert.equal(dispatchCalls[0]?.context?.doubleTap, true); + + const session = sessionStore.get(sessionName); + assert.ok(session); + assert.equal(session?.actions.length, 1); + assert.equal(session?.actions[0]?.command, 'press'); + assert.deepEqual(session?.actions[0]?.positionals, ['100', '200']); +}); + +test('press @ref resolves snapshot node and records press action', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'default'; + const session = makeSession(sessionName); + session.snapshot = { + nodes: attachRefs([ + { + index: 0, + type: 'XCUIElementTypeButton', + label: 'Continue', + identifier: 'auth_continue', + rect: { x: 10, y: 20, width: 100, height: 40 }, + enabled: true, + hittable: true, + }, + ]), + createdAt: Date.now(), + backend: 'xctest', + }; + sessionStore.set(sessionName, session); + + const dispatchCalls: Array<{ command: string; positionals: string[] }> = []; + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['@e1'], + flags: {}, + }, + sessionName, + sessionStore, + contextFromFlags, + dispatch: async (_device, command, positionals) => { + dispatchCalls.push({ command, positionals }); + return { pressed: true }; + }, + }); + + assert.ok(response); + assert.equal(response.ok, true); + if (response.ok) { + assert.equal(response.data?.ref, 'e1'); + assert.equal(response.data?.x, 60); + assert.equal(response.data?.y, 40); + } + assert.equal(dispatchCalls.length, 1); + assert.equal(dispatchCalls[0]?.command, 'press'); + assert.deepEqual(dispatchCalls[0]?.positionals, ['60', '40']); + + const stored = sessionStore.get(sessionName); + assert.ok(stored); + assert.equal(stored?.actions.length, 1); + assert.equal(stored?.actions[0]?.command, 'press'); + const result = (stored?.actions[0]?.result ?? {}) as Record; + assert.equal(result.ref, 'e1'); + assert.ok(Array.isArray(result.selectorChain)); +}); + +test('press coordinates does not treat extra trailing args as selector', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'default'; + sessionStore.set(sessionName, makeSession(sessionName)); + + const dispatchCalls: Array<{ command: string; positionals: string[] }> = []; + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['100', '200', 'extra'], + flags: { count: 2 }, + }, + sessionName, + sessionStore, + contextFromFlags, + dispatch: async (_device, command, positionals) => { + dispatchCalls.push({ command, positionals }); + return { ok: true }; + }, + }); + + assert.ok(response); + assert.equal(response.ok, true); + assert.equal(dispatchCalls.length, 1); + assert.equal(dispatchCalls[0]?.command, 'press'); + assert.deepEqual(dispatchCalls[0]?.positionals, ['100', '200']); + assert.equal(sessionStore.get(sessionName)?.actions.length, 1); +}); diff --git a/src/daemon/handlers/__tests__/replay-heal.test.ts b/src/daemon/handlers/__tests__/replay-heal.test.ts index 7c4c1459..ff1c6cd2 100644 --- a/src/daemon/handlers/__tests__/replay-heal.test.ts +++ b/src/daemon/handlers/__tests__/replay-heal.test.ts @@ -518,6 +518,87 @@ test('replay --update heals numeric get text drift when numeric candidate value assert.equal(invokeCalls.length, 2); }); +test('replay --update heals selector in press command and preserves press series flags', async () => { + const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-replay-heal-press-')); + const sessionsDir = path.join(tempRoot, 'sessions'); + const replayPath = path.join(tempRoot, 'replay.ad'); + const sessionStore = new SessionStore(sessionsDir); + const sessionName = 'heal-press-session'; + sessionStore.set(sessionName, makeSession(sessionName)); + fs.writeFileSync( + replayPath, + 'press "id=\\"old_continue\\" || label=\\"Continue\\"" --count 3 --interval-ms 1 --double-tap\n', + ); + + const invokeCalls: DaemonRequest[] = []; + const invoke = async (request: DaemonRequest): Promise => { + if (request.command !== 'press') { + return { ok: false, error: { code: 'INVALID_ARGS', message: `unexpected command ${request.command}` } }; + } + invokeCalls.push(request); + const selector = request.positionals?.[0] ?? ''; + if (selector.includes('old_continue')) { + return { ok: false, error: { code: 'COMMAND_FAILED', message: 'selector no longer exists' } }; + } + if (selector.includes('auth_continue')) { + return { ok: true, data: { pressed: true } }; + } + return { ok: false, error: { code: 'COMMAND_FAILED', message: 'unexpected selector' } }; + }; + + const dispatch = async (): Promise | void> => { + return { + nodes: [ + { + index: 0, + type: 'XCUIElementTypeButton', + label: 'Continue', + identifier: 'auth_continue', + rect: { x: 10, y: 10, width: 100, height: 44 }, + enabled: true, + hittable: true, + }, + ], + truncated: false, + backend: 'xctest', + }; + }; + + const response = await handleSessionCommands({ + req: { + token: 't', + session: sessionName, + command: 'replay', + positionals: [replayPath], + flags: { replayUpdate: true }, + }, + sessionName, + logPath: path.join(tempRoot, 'daemon.log'), + sessionStore, + invoke, + dispatch, + }); + + assert.ok(response); + assert.equal(response.ok, true, JSON.stringify(response)); + if (response.ok) { + assert.equal(response.data?.healed, 1); + assert.equal(response.data?.replayed, 1); + } + assert.equal(invokeCalls.length, 2); + assert.equal(invokeCalls[0]?.flags?.count, 3); + assert.equal(invokeCalls[0]?.flags?.intervalMs, 1); + assert.equal(invokeCalls[0]?.flags?.doubleTap, true); + const updatedLine = fs + .readFileSync(replayPath, 'utf8') + .split(/\r?\n/) + .find((line) => line.startsWith('press ')); + assert.ok(updatedLine); + const tokens = tokenizeReplayLine(updatedLine!); + assert.ok(tokens[1]?.includes('auth_continue')); + assert.deepEqual(tokens.slice(2), ['--count', '3', '--interval-ms', '1', '--double-tap']); +}); + test('replay rejects legacy JSON payload files', async () => { const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-replay-json-rejected-')); const sessionsDir = path.join(tempRoot, 'sessions'); diff --git a/src/daemon/handlers/__tests__/session.test.ts b/src/daemon/handlers/__tests__/session.test.ts index 94d8935d..3e423e56 100644 --- a/src/daemon/handlers/__tests__/session.test.ts +++ b/src/daemon/handlers/__tests__/session.test.ts @@ -818,3 +818,39 @@ test('replay parses open --relaunch flag and replays open with relaunch semantic assert.deepEqual(invoked[0]?.positionals, ['Settings']); assert.equal(invoked[0]?.flags?.relaunch, true); }); + +test('replay parses press series flags and passes them to invoke', async () => { + const sessionStore = makeSessionStore(); + const replayRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-replay-press-series-')); + const replayPath = path.join(replayRoot, 'press-series.ad'); + fs.writeFileSync(replayPath, 'press 201 545 --count 5 --interval-ms 1 --hold-ms 2 --jitter-px 3 --double-tap\n'); + + const invoked: DaemonRequest[] = []; + const response = await handleSessionCommands({ + req: { + token: 't', + session: 'default', + command: 'replay', + positionals: [replayPath], + flags: {}, + }, + sessionName: 'default', + logPath: path.join(os.tmpdir(), 'daemon.log'), + sessionStore, + invoke: async (req) => { + invoked.push(req); + return { ok: true, data: {} }; + }, + }); + + assert.ok(response); + assert.equal(response?.ok, true); + assert.equal(invoked.length, 1); + assert.equal(invoked[0]?.command, 'press'); + assert.deepEqual(invoked[0]?.positionals, ['201', '545']); + assert.equal(invoked[0]?.flags?.count, 5); + assert.equal(invoked[0]?.flags?.intervalMs, 1); + assert.equal(invoked[0]?.flags?.holdMs, 2); + assert.equal(invoked[0]?.flags?.jitterPx, 3); + assert.equal(invoked[0]?.flags?.doubleTap, true); +}); diff --git a/src/daemon/handlers/interaction.ts b/src/daemon/handlers/interaction.ts index 4c0af616..3a3c3f64 100644 --- a/src/daemon/handlers/interaction.ts +++ b/src/daemon/handlers/interaction.ts @@ -27,11 +27,13 @@ export async function handleInteractionCommands(params: { sessionName: string; sessionStore: SessionStore; contextFromFlags: ContextFromFlags; + dispatch?: typeof dispatchCommand; }): Promise { const { req, sessionName, sessionStore, contextFromFlags } = params; + const dispatch = params.dispatch ?? dispatchCommand; const command = req.command; - if (command === 'click') { + if (command === 'press') { const session = sessionStore.get(sessionName); if (!session) { return { @@ -39,16 +41,40 @@ export async function handleInteractionCommands(params: { error: { code: 'SESSION_NOT_FOUND', message: 'No active session. Run open first.' }, }; } + const directCoordinates = parseCoordinateTarget(req.positionals ?? []); + if (directCoordinates) { + const data = await dispatch( + session.device, + 'press', + [String(directCoordinates.x), String(directCoordinates.y)], + req.flags?.out, + { + ...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath), + }, + ); + sessionStore.recordAction(session, { + command, + positionals: req.positionals ?? [String(directCoordinates.x), String(directCoordinates.y)], + flags: req.flags ?? {}, + result: data ?? { x: directCoordinates.x, y: directCoordinates.y }, + }); + return { ok: true, data: data ?? { x: directCoordinates.x, y: directCoordinates.y } }; + } + + const selectorAction = 'click'; const refInput = req.positionals?.[0] ?? ''; if (refInput.startsWith('@')) { - const invalidRefFlagsResponse = refSnapshotFlagGuardResponse('click', req.flags); + const invalidRefFlagsResponse = refSnapshotFlagGuardResponse('press', req.flags); if (invalidRefFlagsResponse) return invalidRefFlagsResponse; if (!session.snapshot) { return { ok: false, error: { code: 'INVALID_ARGS', message: 'No snapshot in session. Run snapshot first.' } }; } const ref = normalizeRef(refInput); if (!ref) { - return { ok: false, error: { code: 'INVALID_ARGS', message: 'click requires a ref like @e2' } }; + return { + ok: false, + error: { code: 'INVALID_ARGS', message: `${command} requires a ref like @e2` }, + }; } let node = findNodeByRef(session.snapshot.nodes, ref); if (!node?.rect && req.positionals.length > 1) { @@ -64,9 +90,9 @@ export async function handleInteractionCommands(params: { }; } const refLabel = resolveRefLabel(node, session.snapshot.nodes); - const selectorChain = buildSelectorChainForNode(node, session.device.platform, { action: 'click' }); + const selectorChain = buildSelectorChainForNode(node, session.device.platform, { action: selectorAction }); const { x, y } = centerOfRect(node.rect); - await dispatchCommand(session.device, 'press', [String(x), String(y)], req.flags?.out, { + const data = await dispatch(session.device, 'press', [String(x), String(y)], req.flags?.out, { ...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath), }); sessionStore.recordAction(session, { @@ -75,20 +101,25 @@ export async function handleInteractionCommands(params: { flags: req.flags ?? {}, result: { ref, x, y, refLabel, selectorChain }, }); - return { ok: true, data: { ref, x, y } }; + return { ok: true, data: { ...(data ?? {}), ref, x, y } }; } const selectorExpression = (req.positionals ?? []).join(' ').trim(); if (!selectorExpression) { return { ok: false, - error: { code: 'INVALID_ARGS', message: 'click requires @ref or selector expression' }, + error: { code: 'INVALID_ARGS', message: `${command} requires @ref, selector expression, or x y coordinates` }, }; } const chain = parseSelectorChain(selectorExpression); - const snapshot = await captureSnapshotForSession(session, req.flags, sessionStore, contextFromFlags, { - interactiveOnly: true, - }); + const snapshot = await captureSnapshotForSession( + session, + req.flags, + sessionStore, + contextFromFlags, + { interactiveOnly: true }, + dispatch, + ); const resolved = resolveSelectorChain(snapshot.nodes, chain, { platform: session.device.platform, requireRect: true, @@ -105,10 +136,10 @@ export async function handleInteractionCommands(params: { }; } const { x, y } = centerOfRect(resolved.node.rect); - await dispatchCommand(session.device, 'press', [String(x), String(y)], req.flags?.out, { + const data = await dispatch(session.device, 'press', [String(x), String(y)], req.flags?.out, { ...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath), }); - const selectorChain = buildSelectorChainForNode(resolved.node, session.device.platform, { action: 'click' }); + const selectorChain = buildSelectorChainForNode(resolved.node, session.device.platform, { action: selectorAction }); const refLabel = resolveRefLabel(resolved.node, snapshot.nodes); sessionStore.recordAction(session, { command, @@ -122,7 +153,7 @@ export async function handleInteractionCommands(params: { refLabel, }, }); - return { ok: true, data: { selector: resolved.selector.raw, x, y } }; + return { ok: true, data: { ...(data ?? {}), selector: resolved.selector.raw, x, y } }; } if (command === 'fill') { @@ -157,7 +188,7 @@ export async function handleInteractionCommands(params: { const refLabel = resolveRefLabel(node, session.snapshot.nodes); const selectorChain = buildSelectorChainForNode(node, session.device.platform, { action: 'fill' }); const { x, y } = centerOfRect(node.rect); - const data = await dispatchCommand( + const data = await dispatch( session.device, 'fill', [String(x), String(y), text], @@ -196,9 +227,14 @@ export async function handleInteractionCommands(params: { return { ok: false, error: { code: 'INVALID_ARGS', message: 'fill requires text after selector' } }; } const chain = parseSelectorChain(selectorArgs.selectorExpression); - const snapshot = await captureSnapshotForSession(session, req.flags, sessionStore, contextFromFlags, { - interactiveOnly: true, - }); + const snapshot = await captureSnapshotForSession( + session, + req.flags, + sessionStore, + contextFromFlags, + { interactiveOnly: true }, + dispatch, + ); const resolved = resolveSelectorChain(snapshot.nodes, chain, { platform: session.device.platform, requireRect: true, @@ -221,7 +257,7 @@ export async function handleInteractionCommands(params: { ? `fill target ${resolved.selector.raw} resolved to "${nodeType}", attempting fill anyway.` : undefined; const { x, y } = centerOfRect(resolved.node.rect); - const data = await dispatchCommand(session.device, 'fill', [String(x), String(y), text], req.flags?.out, { + const data = await dispatch(session.device, 'fill', [String(x), String(y), text], req.flags?.out, { ...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath), }); const selectorChain = buildSelectorChainForNode(node, session.device.platform, { action: 'fill' }); @@ -309,9 +345,14 @@ export async function handleInteractionCommands(params: { }; } const chain = parseSelectorChain(selectorExpression); - const snapshot = await captureSnapshotForSession(session, req.flags, sessionStore, contextFromFlags, { - interactiveOnly: false, - }); + const snapshot = await captureSnapshotForSession( + session, + req.flags, + sessionStore, + contextFromFlags, + { interactiveOnly: false }, + dispatch, + ); const resolved = resolveSelectorChain(snapshot.nodes, chain, { platform: session.device.platform, requireRect: false, @@ -407,9 +448,14 @@ export async function handleInteractionCommands(params: { }; } const chain = parseSelectorChain(split.selectorExpression); - const snapshot = await captureSnapshotForSession(session, req.flags, sessionStore, contextFromFlags, { - interactiveOnly: false, - }); + const snapshot = await captureSnapshotForSession( + session, + req.flags, + sessionStore, + contextFromFlags, + { interactiveOnly: false }, + dispatch, + ); if (predicate === 'exists') { const matched = findSelectorChainMatch(snapshot.nodes, chain, { platform: session.device.platform, @@ -490,8 +536,9 @@ async function captureSnapshotForSession( sessionStore: SessionStore, contextFromFlags: ContextFromFlags, options: { interactiveOnly: boolean }, + dispatch: typeof dispatchCommand = dispatchCommand, ) { - const data = (await dispatchCommand(session.device, 'snapshot', [], flags?.out, { + const data = (await dispatch(session.device, 'snapshot', [], flags?.out, { ...contextFromFlags( { ...(flags ?? {}), @@ -525,7 +572,7 @@ const REF_UNSUPPORTED_FLAG_MAP: ReadonlyArray<[keyof CommandFlags, string]> = [ ]; function refSnapshotFlagGuardResponse( - command: 'click' | 'fill' | 'get', + command: 'press' | 'fill' | 'get', flags: CommandFlags | undefined, ): DaemonResponse | null { const unsupported = unsupportedRefSnapshotFlags(flags); @@ -539,6 +586,14 @@ function refSnapshotFlagGuardResponse( }; } +function parseCoordinateTarget(positionals: string[]): { x: number; y: number } | null { + if (positionals.length < 2) return null; + const x = Number(positionals[0]); + const y = Number(positionals[1]); + if (!Number.isFinite(x) || !Number.isFinite(y)) return null; + return { x, y }; +} + export function unsupportedRefSnapshotFlags(flags: CommandFlags | undefined): string[] { if (!flags) return []; const unsupported: string[] = []; diff --git a/src/daemon/handlers/session.ts b/src/daemon/handlers/session.ts index e6f61f2d..7298b32a 100644 --- a/src/daemon/handlers/session.ts +++ b/src/daemon/handlers/session.ts @@ -19,6 +19,13 @@ import { tryParseSelectorChain, } from '../selectors.ts'; import { inferFillText, uniqueStrings } from '../action-utils.ts'; +import { + appendScriptSeriesFlags, + formatScriptActionSummary, + formatScriptArg, + isClickLikeCommand, + parseReplaySeriesFlags, +} from '../script-utils.ts'; type ReinstallOps = { ios: (device: DeviceInfo, app: string, appPath: string) => Promise<{ bundleId: string }>; @@ -649,13 +656,7 @@ function withReplayFailureContext( } function formatReplayActionSummary(action: SessionAction): string { - const values = (action.positionals ?? []).map((value) => { - const trimmed = value.trim(); - if (/^-?\d+(\.\d+)?$/.test(trimmed)) return trimmed; - if (trimmed.startsWith('@')) return trimmed; - return JSON.stringify(trimmed); - }); - return [action.command, ...values].join(' '); + return formatScriptActionSummary(action); } async function healReplayAction(params: { @@ -666,12 +667,12 @@ async function healReplayAction(params: { dispatch: typeof dispatchCommand; }): Promise { const { action, sessionName, logPath, sessionStore, dispatch } = params; - if (!['click', 'fill', 'get', 'is', 'wait'].includes(action.command)) return null; + if (!(isClickLikeCommand(action.command) || ['fill', 'get', 'is', 'wait'].includes(action.command))) return null; const session = sessionStore.get(sessionName); if (!session) return null; - const requiresRect = action.command === 'click' || action.command === 'fill'; + const requiresRect = isClickLikeCommand(action.command) || action.command === 'fill'; const allowDisambiguation = - action.command === 'click' || + isClickLikeCommand(action.command) || action.command === 'fill' || (action.command === 'get' && action.positionals?.[0] === 'text'); const snapshot = await captureSnapshotForReplay(session, action, logPath, requiresRect, dispatch, sessionStore); @@ -687,10 +688,10 @@ async function healReplayAction(params: { }); if (!resolved) continue; const selectorChain = buildSelectorChainForNode(resolved.node, session.device.platform, { - action: action.command === 'click' ? 'click' : action.command === 'fill' ? 'fill' : 'get', + action: isClickLikeCommand(action.command) ? 'click' : action.command === 'fill' ? 'fill' : 'get', }); const selectorExpression = selectorChain.join(' || '); - if (action.command === 'click') { + if (isClickLikeCommand(action.command)) { return { ...action, positionals: [selectorExpression], @@ -790,7 +791,7 @@ function collectReplaySelectorCandidates(action: SessionAction): string[] { : []; result.push(...explicitChain); - if (action.command === 'click') { + if (isClickLikeCommand(action.command)) { const first = action.positionals?.[0] ?? ''; if (first && !first.startsWith('@')) { result.push(action.positionals.join(' ')); @@ -986,17 +987,25 @@ function parseReplayScriptLine(line: string): SessionAction | null { return action; } - if (command === 'click') { - if (args.length === 0) return action; - const target = args[0]; + if (isClickLikeCommand(command)) { + const parsed = parseReplaySeriesFlags(command, args); + Object.assign(action.flags, parsed.flags); + if (parsed.positionals.length === 0) return action; + const target = parsed.positionals[0]; if (target.startsWith('@')) { action.positionals = [target]; - if (args[1]) { - action.result = { refLabel: args[1] }; + if (parsed.positionals[1]) { + action.result = { refLabel: parsed.positionals[1] }; } return action; } - action.positionals = [args.join(' ')]; + const maybeX = parsed.positionals[0]; + const maybeY = parsed.positionals[1]; + if (isNumericToken(maybeX) && isNumericToken(maybeY) && parsed.positionals.length >= 2) { + action.positionals = [maybeX, maybeY]; + return action; + } + action.positionals = [parsed.positionals.join(' ')]; return action; } @@ -1037,10 +1046,22 @@ function parseReplayScriptLine(line: string): SessionAction | null { return action; } + if (command === 'swipe') { + const parsed = parseReplaySeriesFlags(command, args); + Object.assign(action.flags, parsed.flags); + action.positionals = parsed.positionals; + return action; + } + action.positionals = args; return action; } +function isNumericToken(token: string | undefined): token is string { + if (!token) return false; + return !Number.isNaN(Number(token)); +} + function tokenizeReplayLine(line: string): string[] { const tokens: string[] = []; let cursor = 0; @@ -1104,14 +1125,14 @@ function formatReplayActionLine(action: SessionAction): string { parts.push('-d', String(action.flags.snapshotDepth)); } if (action.flags?.snapshotScope) { - parts.push('-s', formatReplayArg(action.flags.snapshotScope)); + parts.push('-s', formatScriptArg(action.flags.snapshotScope)); } if (action.flags?.snapshotRaw) parts.push('--raw'); return parts.join(' '); } if (action.command === 'open') { for (const positional of action.positionals ?? []) { - parts.push(formatReplayArg(positional)); + parts.push(formatScriptArg(positional)); } if (action.flags?.relaunch) { parts.push('--relaunch'); @@ -1119,14 +1140,8 @@ function formatReplayActionLine(action: SessionAction): string { return parts.join(' '); } for (const positional of action.positionals ?? []) { - parts.push(formatReplayArg(positional)); + parts.push(formatScriptArg(positional)); } + appendScriptSeriesFlags(parts, action); return parts.join(' '); } - -function formatReplayArg(value: string): string { - const trimmed = value.trim(); - if (trimmed.startsWith('@')) return trimmed; - if (/^-?\d+(\.\d+)?$/.test(trimmed)) return trimmed; - return JSON.stringify(trimmed); -} diff --git a/src/daemon/script-utils.ts b/src/daemon/script-utils.ts new file mode 100644 index 00000000..f8ee8362 --- /dev/null +++ b/src/daemon/script-utils.ts @@ -0,0 +1,100 @@ +import type { SessionAction } from './types.ts'; + +const NUMERIC_ARG_RE = /^-?\d+(\.\d+)?$/; + +const CLICK_LIKE_NUMERIC_FLAG_MAP = new Map([ + ['--count', 'count'], + ['--interval-ms', 'intervalMs'], + ['--hold-ms', 'holdMs'], + ['--jitter-px', 'jitterPx'], +]); + +const SWIPE_NUMERIC_FLAG_MAP = new Map([ + ['--count', 'count'], + ['--pause-ms', 'pauseMs'], +]); + +export function isClickLikeCommand(command: string): command is 'click' | 'press' { + return command === 'click' || command === 'press'; +} + +export function formatScriptArg(value: string): string { + const trimmed = value.trim(); + if (trimmed.startsWith('@')) return trimmed; + if (NUMERIC_ARG_RE.test(trimmed)) return trimmed; + return JSON.stringify(trimmed); +} + +export function formatScriptActionSummary(action: SessionAction): string { + const values = (action.positionals ?? []).map((value) => formatScriptArg(value)); + return [action.command, ...values].join(' '); +} + +export function appendScriptSeriesFlags(parts: string[], action: Pick): void { + const flags = action.flags ?? {}; + if (isClickLikeCommand(action.command)) { + if (typeof flags.count === 'number') parts.push('--count', String(flags.count)); + if (typeof flags.intervalMs === 'number') parts.push('--interval-ms', String(flags.intervalMs)); + if (typeof flags.holdMs === 'number') parts.push('--hold-ms', String(flags.holdMs)); + if (typeof flags.jitterPx === 'number') parts.push('--jitter-px', String(flags.jitterPx)); + if (flags.doubleTap === true) parts.push('--double-tap'); + return; + } + if (action.command === 'swipe') { + if (typeof flags.count === 'number') parts.push('--count', String(flags.count)); + if (typeof flags.pauseMs === 'number') parts.push('--pause-ms', String(flags.pauseMs)); + if (flags.pattern === 'one-way' || flags.pattern === 'ping-pong') { + parts.push('--pattern', flags.pattern); + } + } +} + +export function parseReplaySeriesFlags(command: string, args: string[]): { positionals: string[]; flags: SessionAction['flags'] } { + const positionals: string[] = []; + const flags: SessionAction['flags'] = {}; + + const numericFlagMap = isClickLikeCommand(command) + ? CLICK_LIKE_NUMERIC_FLAG_MAP + : command === 'swipe' + ? SWIPE_NUMERIC_FLAG_MAP + : undefined; + + for (let index = 0; index < args.length; index += 1) { + const token = args[index]; + + if (isClickLikeCommand(command) && token === '--double-tap') { + flags.doubleTap = true; + continue; + } + + const numericKey = numericFlagMap?.get(token); + if (numericKey && index + 1 < args.length) { + const parsed = parseNonNegativeIntToken(args[index + 1]); + if (parsed !== null) { + flags[numericKey] = parsed; + } + index += 1; + continue; + } + + if (command === 'swipe' && token === '--pattern' && index + 1 < args.length) { + const pattern = args[index + 1]; + if (pattern === 'one-way' || pattern === 'ping-pong') { + flags.pattern = pattern; + } + index += 1; + continue; + } + + positionals.push(token); + } + + return { positionals, flags }; +} + +function parseNonNegativeIntToken(token: string | undefined): number | null { + if (!token) return null; + const value = Number(token); + if (!Number.isFinite(value) || value < 0) return null; + return Math.floor(value); +} diff --git a/src/daemon/session-store.ts b/src/daemon/session-store.ts index 4a73abbd..d93d7fcd 100644 --- a/src/daemon/session-store.ts +++ b/src/daemon/session-store.ts @@ -4,6 +4,7 @@ import path from 'node:path'; import type { CommandFlags } from '../core/dispatch.ts'; import type { SessionAction, SessionState } from './types.ts'; import { inferFillText } from './action-utils.ts'; +import { appendScriptSeriesFlags, formatScriptArg, isClickLikeCommand } from './script-utils.ts'; export class SessionStore { private readonly sessions = new Map(); @@ -109,9 +110,12 @@ export class SessionStore { action.result?.selectorChain.every((entry) => typeof entry === 'string') ? (action.result.selectorChain as string[]) : []; - if (selectorChain.length > 0 && (action.command === 'click' || action.command === 'fill' || action.command === 'get')) { + if ( + selectorChain.length > 0 && + (isClickLikeCommand(action.command) || action.command === 'fill' || action.command === 'get') + ) { const selectorExpr = selectorChain.join(' || '); - if (action.command === 'click') { + if (isClickLikeCommand(action.command)) { optimized.push({ ...action, positionals: [selectorExpr], @@ -139,7 +143,7 @@ export class SessionStore { } } } - if (action.command === 'click' || action.command === 'fill' || action.command === 'get') { + if (isClickLikeCommand(action.command) || action.command === 'fill' || action.command === 'get') { const refLabel = action.result?.refLabel; if (typeof refLabel === 'string' && refLabel.trim().length > 0) { optimized.push({ @@ -179,6 +183,13 @@ function sanitizeFlags(flags: CommandFlags | undefined): SessionAction['flags'] relaunch, saveScript, noRecord, + count, + intervalMs, + holdMs, + jitterPx, + doubleTap, + pauseMs, + pattern, } = flags; return { platform, @@ -195,6 +206,13 @@ function sanitizeFlags(flags: CommandFlags | undefined): SessionAction['flags'] relaunch, saveScript, noRecord, + count, + intervalMs, + holdMs, + jitterPx, + doubleTap, + pauseMs, + pattern, }; } @@ -213,30 +231,36 @@ function formatScript(session: SessionState, actions: SessionAction[]): string { function formatActionLine(action: SessionAction): string { const parts: string[] = [action.command]; - if (action.command === 'click') { - const ref = action.positionals?.[0]; - if (ref) { - parts.push(formatArg(ref)); - if (ref.startsWith('@')) { + if (isClickLikeCommand(action.command)) { + const first = action.positionals?.[0]; + if (first) { + if (first.startsWith('@')) { + parts.push(formatScriptArg(first)); const refLabel = action.result?.refLabel; if (typeof refLabel === 'string' && refLabel.trim().length > 0) { - parts.push(formatArg(refLabel)); + parts.push(formatScriptArg(refLabel)); } + appendScriptSeriesFlags(parts, action); + return parts.join(' '); + } + if (action.positionals.length === 1) { + parts.push(formatScriptArg(first)); + appendScriptSeriesFlags(parts, action); + return parts.join(' '); } - return parts.join(' '); } } if (action.command === 'fill') { const ref = action.positionals?.[0]; if (ref && ref.startsWith('@')) { - parts.push(formatArg(ref)); + parts.push(formatScriptArg(ref)); const refLabel = action.result?.refLabel; const text = action.positionals.slice(1).join(' '); if (typeof refLabel === 'string' && refLabel.trim().length > 0) { - parts.push(formatArg(refLabel)); + parts.push(formatScriptArg(refLabel)); } if (text) { - parts.push(formatArg(text)); + parts.push(formatScriptArg(text)); } return parts.join(' '); } @@ -245,12 +269,12 @@ function formatActionLine(action: SessionAction): string { const sub = action.positionals?.[0]; const ref = action.positionals?.[1]; if (sub && ref) { - parts.push(formatArg(sub)); - parts.push(formatArg(ref)); + parts.push(formatScriptArg(sub)); + parts.push(formatScriptArg(ref)); if (ref.startsWith('@')) { const refLabel = action.result?.refLabel; if (typeof refLabel === 'string' && refLabel.trim().length > 0) { - parts.push(formatArg(refLabel)); + parts.push(formatScriptArg(refLabel)); } } return parts.join(' '); @@ -263,14 +287,14 @@ function formatActionLine(action: SessionAction): string { parts.push('-d', String(action.flags.snapshotDepth)); } if (action.flags?.snapshotScope) { - parts.push('-s', formatArg(action.flags.snapshotScope)); + parts.push('-s', formatScriptArg(action.flags.snapshotScope)); } if (action.flags?.snapshotRaw) parts.push('--raw'); return parts.join(' '); } if (action.command === 'open') { for (const positional of action.positionals ?? []) { - parts.push(formatArg(positional)); + parts.push(formatScriptArg(positional)); } if (action.flags?.relaunch) { parts.push('--relaunch'); @@ -278,14 +302,8 @@ function formatActionLine(action: SessionAction): string { return parts.join(' '); } for (const positional of action.positionals ?? []) { - parts.push(formatArg(positional)); + parts.push(formatScriptArg(positional)); } + appendScriptSeriesFlags(parts, action); return parts.join(' '); } - -function formatArg(value: string): string { - const trimmed = value.trim(); - if (trimmed.startsWith('@')) return trimmed; - if (/^-?\d+(\.\d+)?$/.test(trimmed)) return trimmed; - return JSON.stringify(trimmed); -} diff --git a/src/platforms/ios/runner-client.ts b/src/platforms/ios/runner-client.ts index 2261f5ea..65c1db55 100644 --- a/src/platforms/ios/runner-client.ts +++ b/src/platforms/ios/runner-client.ts @@ -15,8 +15,10 @@ import { resolveTimeoutMs, resolveTimeoutSeconds } from '../../utils/timeouts.ts export type RunnerCommand = { command: | 'tap' + | 'tapSeries' | 'longPress' | 'drag' + | 'dragSeries' | 'type' | 'swipe' | 'findText' @@ -33,6 +35,11 @@ export type RunnerCommand = { action?: 'get' | 'accept' | 'dismiss'; x?: number; y?: number; + count?: number; + intervalMs?: number; + doubleTap?: boolean; + pauseMs?: number; + pattern?: 'one-way' | 'ping-pong'; x2?: number; y2?: number; durationMs?: number; diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index 69d55cb5..a2c62563 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -68,6 +68,31 @@ test('parseArgs recognizes press series flags', () => { assert.equal(parsed.flags.jitterPx, 3); }); +test('parseArgs recognizes press selector + snapshot flags', () => { + const parsed = parseArgs(['press', '@e2', '--depth', '3', '--scope', 'Sign In', '--raw'], { strictFlags: true }); + assert.equal(parsed.command, 'press'); + assert.deepEqual(parsed.positionals, ['@e2']); + assert.equal(parsed.flags.snapshotDepth, 3); + assert.equal(parsed.flags.snapshotScope, 'Sign In'); + assert.equal(parsed.flags.snapshotRaw, true); +}); + +test('parseArgs recognizes click series flags', () => { + const parsed = parseArgs(['click', '@e5', '--count', '4', '--interval-ms', '10'], { strictFlags: true }); + assert.equal(parsed.command, 'click'); + assert.deepEqual(parsed.positionals, ['@e5']); + assert.equal(parsed.flags.count, 4); + assert.equal(parsed.flags.intervalMs, 10); +}); + +test('parseArgs recognizes double-tap flag for repeated press', () => { + const parsed = parseArgs(['press', '201', '545', '--count', '5', '--double-tap'], { strictFlags: true }); + assert.equal(parsed.command, 'press'); + assert.deepEqual(parsed.positionals, ['201', '545']); + assert.equal(parsed.flags.count, 5); + assert.equal(parsed.flags.doubleTap, true); +}); + test('parseArgs recognizes swipe positional + pattern flags', () => { const parsed = parseArgs([ 'swipe', @@ -125,17 +150,17 @@ test('schema capability mappings match capability source-of-truth', () => { assert.deepEqual(getSchemaCapabilityKeys(), listCapabilityCommands()); }); -test('compat mode warns and strips unsupported pilot-command flags', () => { - const parsed = parseArgs(['press', '10', '20', '--depth', '2'], { strictFlags: false }); +test('compat mode warns and strips unsupported command flags', () => { + const parsed = parseArgs(['press', '10', '20', '--pause-ms', '2'], { strictFlags: false }); assert.equal(parsed.command, 'press'); - assert.equal(parsed.flags.snapshotDepth, undefined); + assert.equal(parsed.flags.pauseMs, undefined); assert.equal(parsed.warnings.length, 1); assert.match(parsed.warnings[0], /not supported for command press/); }); test('strict mode rejects unsupported pilot-command flags', () => { assert.throws( - () => parseArgs(['press', '10', '20', '--depth', '2'], { strictFlags: true }), + () => parseArgs(['press', '10', '20', '--pause-ms', '2'], { strictFlags: true }), (error) => error instanceof AppError && error.code === 'INVALID_ARGS' && diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index c2f91aa4..f32ca7c1 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -17,6 +17,7 @@ export type CliFlags = { intervalMs?: number; holdMs?: number; jitterPx?: number; + doubleTap?: boolean; pauseMs?: number; pattern?: 'one-way' | 'ping-pong'; activity?: string; @@ -149,6 +150,13 @@ export const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ usageLabel: '--jitter-px ', usageDescription: 'Deterministic coordinate jitter radius for press', }, + { + key: 'doubleTap', + names: ['--double-tap'], + type: 'boolean', + usageLabel: '--double-tap', + usageDescription: 'Use double-tap gesture per press iteration', + }, { key: 'pauseMs', names: ['--pause-ms'], @@ -369,10 +377,11 @@ export const COMMAND_SCHEMAS: Record = { allowedFlags: [], }, click: { - usageOverride: 'click <@ref|selector>', - description: 'Click element by snapshot ref or selector', + usageOverride: 'click ', + description: 'Tap/click by coordinates, snapshot ref, or selector', positionalArgs: ['target'], - allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS], + allowsExtraPositionals: true, + allowedFlags: ['count', 'intervalMs', 'holdMs', 'jitterPx', 'doubleTap', ...SELECTOR_SNAPSHOT_FLAGS], }, get: { usageOverride: 'get text|attrs <@ref|selector>', @@ -387,9 +396,11 @@ export const COMMAND_SCHEMAS: Record = { skipCapabilityCheck: true, }, press: { - description: 'Tap/press at coordinates (supports repeated gesture series)', - positionalArgs: ['x', 'y'], - allowedFlags: ['count', 'intervalMs', 'holdMs', 'jitterPx'], + usageOverride: 'press ', + description: 'Tap/press by coordinates, snapshot ref, or selector (supports repeated series)', + positionalArgs: ['targetOrX', 'y?'], + allowsExtraPositionals: true, + allowedFlags: ['count', 'intervalMs', 'holdMs', 'jitterPx', 'doubleTap', ...SELECTOR_SNAPSHOT_FLAGS], }, 'long-press': { description: 'Long press (where supported)', diff --git a/src/utils/interactors.ts b/src/utils/interactors.ts index ffa14a04..044db87e 100644 --- a/src/utils/interactors.ts +++ b/src/utils/interactors.ts @@ -34,6 +34,7 @@ export type Interactor = { openDevice(): Promise; close(app: string): Promise; tap(x: number, y: number): Promise; + doubleTap(x: number, y: number): Promise; swipe(x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise; longPress(x: number, y: number, durationMs?: number): Promise; focus(x: number, y: number): Promise; @@ -52,6 +53,10 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): openDevice: () => openAndroidDevice(device), close: (app) => closeAndroidApp(device, app), tap: (x, y) => pressAndroid(device, x, y), + doubleTap: async (x, y) => { + await pressAndroid(device, x, y); + await pressAndroid(device, x, y); + }, swipe: (x1, y1, x2, y2, durationMs) => swipeAndroid(device, x1, y1, x2, y2, durationMs), longPress: (x, y, durationMs) => longPressAndroid(device, x, y, durationMs), focus: (x, y) => focusAndroid(device, x, y), @@ -74,7 +79,10 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): } } -type IoRunnerOverrides = Pick; +type IoRunnerOverrides = Pick< + Interactor, + 'tap' | 'doubleTap' | 'swipe' | 'longPress' | 'focus' | 'type' | 'fill' | 'scroll' | 'scrollIntoView' +>; function iosRunnerOverrides(device: DeviceInfo, ctx: RunnerContext): IoRunnerOverrides { const runnerOpts = { verbose: ctx.verbose, logPath: ctx.logPath, traceLogPath: ctx.traceLogPath }; @@ -87,6 +95,13 @@ function iosRunnerOverrides(device: DeviceInfo, ctx: RunnerContext): IoRunnerOve runnerOpts, ); }, + doubleTap: async (x, y) => { + await runIosRunnerCommand( + device, + { command: 'tapSeries', x, y, count: 1, intervalMs: 0, doubleTap: true, appBundleId: ctx.appBundleId }, + runnerOpts, + ); + }, swipe: async (x1, y1, x2, y2, durationMs) => { await runIosRunnerCommand( device,