From 14f48f4e65f6b06fa229ea2b909494c0afbe6e89 Mon Sep 17 00:00:00 2001 From: Sam Maister Date: Fri, 19 Jun 2026 02:59:58 +0100 Subject: [PATCH 1/6] fix(tauri-plugin-webdriver): resolve pointerMove origin so click(options) hits the target element.click(options) routes through the W3C Actions API instead of the elementClick endpoint. The embedded driver's PointerMove ignored the action `origin`, so WebdriverIO's element-relative move (origin=element, x/y=0) was treated as absolute viewport coordinates and landed at (0,0), missing the target. Resolve viewport/pointer/element origins and add Executor::get_element_center (client/viewport coords). Adds an e2e regression test (the repro from #423). Fixes #423 Co-Authored-By: Claude Opus 4.8 (1M context) --- e2e/test/tauri/actions.spec.ts | 39 ++++++++++++ .../src/platform/executor.rs | 37 +++++++++++ .../src/server/handlers/actions.rs | 61 +++++++++++++++++-- 3 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 e2e/test/tauri/actions.spec.ts diff --git a/e2e/test/tauri/actions.spec.ts b/e2e/test/tauri/actions.spec.ts new file mode 100644 index 000000000..76c77a262 --- /dev/null +++ b/e2e/test/tauri/actions.spec.ts @@ -0,0 +1,39 @@ +import { browser, expect } from '@wdio/globals'; +import '@wdio/native-types'; + +/** + * `element.click(options)` routes through the W3C Actions API (performActions), + * not the elementClick endpoint that bare `element.click()` uses. Both must land + * on the same element. + */ +describe('W3C Actions API (click with options)', () => { + it('click({}) lands on the same target as bare click()', async () => { + const counter = await browser.$('#counter'); + const increment = await browser.$('#increment-button'); + const reset = await browser.$('#reset-button'); + + await reset.click(); + await expect(counter).toHaveText('0'); + + // Bare click() uses the elementClick endpoint — already worked. + await increment.click(); + await expect(counter).toHaveText('1'); + + // click({}) routes through performActions with an element origin; it must + // land on the button, not viewport (0,0). + await increment.click({}); + await expect(counter).toHaveText('2'); + }); + + it('click({ button: "left" }) lands on the target', async () => { + const counter = await browser.$('#counter'); + const increment = await browser.$('#increment-button'); + const reset = await browser.$('#reset-button'); + + await reset.click(); + await expect(counter).toHaveText('0'); + + await increment.click({ button: 'left' }); + await expect(counter).toHaveText('1'); + }); +}); diff --git a/packages/tauri-plugin-webdriver/src/platform/executor.rs b/packages/tauri-plugin-webdriver/src/platform/executor.rs index fcb5609d7..3c1100034 100644 --- a/packages/tauri-plugin-webdriver/src/platform/executor.rs +++ b/packages/tauri-plugin-webdriver/src/platform/executor.rs @@ -447,6 +447,43 @@ pub trait PlatformExecutor: Send + Sync { Ok(ElementRect::default()) } + /// Get an element's in-view center point in **client (viewport)** coordinates, + /// scrolling it into view first. Unlike [`Executor::get_element_rect`], this + /// does not add scroll offsets: pointer events dispatch against viewport + /// coordinates (`clientX`/`clientY`), so the center must be viewport-relative. + async fn get_element_center(&self, js_var: &str) -> Result<(i32, i32), WebDriverErrorResponse> { + let script = format!( + r"(function() {{ + var el = window.{js_var}; + if (!el || !el.isConnected) {{ + throw new Error('stale element reference'); + }} + el.scrollIntoView({{ block: 'center', inline: 'center' }}); + var r = el.getBoundingClientRect(); + return {{ + x: Math.round(r.left + r.width / 2), + y: Math.round(r.top + r.height / 2) + }}; + }})()" + ); + let result = self.evaluate_js(&script).await?; + + let value = result + .get("value") + .cloned() + .ok_or_else(|| WebDriverErrorResponse::unknown_error("element center script returned no value"))?; + + #[derive(serde::Deserialize)] + struct Center { + x: i32, + y: i32, + } + let center: Center = serde_json::from_value(value).map_err(|err| { + WebDriverErrorResponse::unknown_error(&format!("could not read element center: {err}")) + })?; + Ok((center.x, center.y)) + } + /// Check if element is displayed async fn is_element_displayed(&self, js_var: &str) -> Result { let script = format!( diff --git a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs index 80c7e4200..4ed26a2f1 100644 --- a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs +++ b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::sync::Arc; use axum::extract::{Path, State}; @@ -6,7 +7,7 @@ use serde::Deserialize; use tauri::Runtime; use crate::platform::{ModifierState, PointerEventType}; -use crate::server::response::{WebDriverResponse, WebDriverResult}; +use crate::server::response::{WebDriverErrorResponse, WebDriverResponse, WebDriverResult}; use crate::server::AppState; #[derive(Debug, Deserialize)] @@ -65,11 +66,31 @@ pub enum PointerAction { x: i32, y: i32, duration: Option, + #[serde(default)] + origin: Option, }, #[serde(rename = "pause")] Pause { duration: Option }, } +/// W3C JSON key identifying a web element reference. +const ELEMENT_KEY: &str = "element-6066-11e4-a52e-4f735466cecf"; + +/// Coordinate origin for a `pointerMove`. Per the WebDriver Actions spec the +/// `origin` is either the string `"viewport"` (the default — x/y are absolute +/// viewport coordinates) or `"pointer"` (x/y are relative to the current pointer +/// position), or an element reference object +/// `{ "element-6066-11e4-a52e-4f735466cecf": "" }` (x/y are offsets from the +/// element's in-view center point). WebdriverIO sends the element form for +/// `element.click(options)` with x/y defaulting to 0, so this must resolve to the +/// element's center rather than viewport (0,0). +#[derive(Debug, Deserialize)] +#[serde(untagged)] +pub enum Origin { + Named(String), + Element(HashMap), +} + #[derive(Debug, Deserialize)] #[serde(tag = "type")] pub enum WheelAction { @@ -200,9 +221,41 @@ pub async fn perform( } } } - PointerAction::PointerMove { x, y, duration } => { - pointer_state.x = *x; - pointer_state.y = *y; + PointerAction::PointerMove { + x, + y, + duration, + origin, + } => { + let (target_x, target_y) = match origin { + // No origin or "viewport": x/y are absolute viewport coords. + None => (*x, *y), + Some(Origin::Named(name)) if name == "pointer" => { + (pointer_state.x + *x, pointer_state.y + *y) + } + Some(Origin::Named(_)) => (*x, *y), + Some(Origin::Element(refs)) => { + let element_id = refs.get(ELEMENT_KEY).ok_or_else(|| { + WebDriverErrorResponse::invalid_argument( + "pointerMove origin is missing a web element reference", + ) + })?; + let js_var = { + let sessions = state.sessions.read().await; + let session = sessions.get(&session_id)?; + session + .elements + .get(element_id) + .ok_or_else(WebDriverErrorResponse::no_such_element)? + .js_ref + .clone() + }; + let (cx, cy) = executor.get_element_center(&js_var).await?; + (cx + *x, cy + *y) + } + }; + pointer_state.x = target_x; + pointer_state.y = target_y; if let Some(ms) = duration { if *ms > 0 { tokio::time::sleep(std::time::Duration::from_millis(*ms)).await; From 7a427cc8f54e96ddc021df841a302ce01f1522d3 Mon Sep 17 00:00:00 2001 From: Sam Maister Date: Fri, 19 Jun 2026 08:48:46 +0100 Subject: [PATCH 2/6] fix(tauri-plugin-webdriver): use instant scroll + floor for element center Address review feedback on get_element_center: scrollIntoView uses behavior 'instant' so getBoundingClientRect is not read mid-animation under scroll-behavior: smooth, and the center uses Math.floor to match the W3C "in-view center point" algorithm (avoids a 1px divergence at sub-pixel element boundaries). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/tauri-plugin-webdriver/src/platform/executor.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/tauri-plugin-webdriver/src/platform/executor.rs b/packages/tauri-plugin-webdriver/src/platform/executor.rs index 3c1100034..b01fd3963 100644 --- a/packages/tauri-plugin-webdriver/src/platform/executor.rs +++ b/packages/tauri-plugin-webdriver/src/platform/executor.rs @@ -458,11 +458,11 @@ pub trait PlatformExecutor: Send + Sync { if (!el || !el.isConnected) {{ throw new Error('stale element reference'); }} - el.scrollIntoView({{ block: 'center', inline: 'center' }}); + el.scrollIntoView({{ behavior: 'instant', block: 'center', inline: 'center' }}); var r = el.getBoundingClientRect(); return {{ - x: Math.round(r.left + r.width / 2), - y: Math.round(r.top + r.height / 2) + x: Math.floor(r.left + r.width / 2), + y: Math.floor(r.top + r.height / 2) }}; }})()" ); From 47e2af5705608642ee4066b63da4fc3bd8341286 Mon Sep 17 00:00:00 2001 From: Sam Maister Date: Fri, 19 Jun 2026 09:13:54 +0100 Subject: [PATCH 3/6] fix(tauri-plugin-webdriver): persist pointer position across performActions calls Address review feedback: pointer_state was a local reinitialized to (0, 0) at the start of every perform() call, so an origin: "pointer" move in a later performActions call computed from (0, 0) instead of the pointer's actual position. Persist it in the session's ActionState (read at the start of perform, written back at the end) and reset it on release, matching how pressed keys/buttons are tracked. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/server/handlers/actions.rs | 18 ++++++++++++++++-- .../src/webdriver/session.rs | 4 ++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs index 4ed26a2f1..520fad3f2 100644 --- a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs +++ b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs @@ -130,18 +130,22 @@ pub async fn perform( Json(request): Json, ) -> WebDriverResult { // Get session info and executor first - let (current_window, timeouts, frame_context) = { + let (current_window, timeouts, frame_context, pointer_position) = { let sessions = state.sessions.read().await; let session = sessions.get(&session_id)?; ( session.current_window.clone(), session.timeouts.clone(), session.frame_context.clone(), + session.action_state.pointer_position, ) }; let executor = state.get_executor_for_window(¤t_window, timeouts, frame_context)?; - let mut pointer_state = PointerState { x: 0, y: 0 }; + let mut pointer_state = PointerState { + x: pointer_position.0, + y: pointer_position.1, + }; let mut modifier_state = ModifierState::default(); for action_seq in &request.actions { @@ -319,6 +323,15 @@ pub async fn perform( } } + // Persist the final pointer position so a later performActions call with + // origin: "pointer" resolves relative to it instead of (0, 0). + { + let mut sessions = state.sessions.write().await; + if let Ok(session) = sessions.get_mut(&session_id) { + session.action_state.pointer_position = (pointer_state.x, pointer_state.y); + } + } + Ok(WebDriverResponse::null()) } @@ -333,6 +346,7 @@ pub async fn release( let session = sessions.get_mut(&session_id)?; let pressed_keys: Vec = session.action_state.pressed_keys.drain().collect(); let pressed_buttons = std::mem::take(&mut session.action_state.pressed_buttons); + session.action_state.pointer_position = (0, 0); ( session.current_window.clone(), session.timeouts.clone(), diff --git a/packages/tauri-plugin-webdriver/src/webdriver/session.rs b/packages/tauri-plugin-webdriver/src/webdriver/session.rs index b4203d30f..fb7762d07 100644 --- a/packages/tauri-plugin-webdriver/src/webdriver/session.rs +++ b/packages/tauri-plugin-webdriver/src/webdriver/session.rs @@ -14,6 +14,10 @@ pub struct ActionState { pub pressed_keys: HashSet, /// Currently pressed pointer buttons by source ID pub pressed_buttons: HashMap>, + /// Last pointer position in viewport coordinates. Persisted across + /// `performActions` calls so an `origin: "pointer"` move resolves relative to + /// where the pointer actually is, not (0, 0) at the start of every call. + pub pointer_position: (i32, i32), } /// Session timeouts configuration From 7d2c7c965d68698c251ae798d6c8a49cee97fd26 Mon Sep 17 00:00:00 2001 From: Sam Maister Date: Fri, 19 Jun 2026 18:37:39 +0100 Subject: [PATCH 4/6] fix(tauri-plugin-webdriver): synthesize a click event on Actions-based clicks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit element.click(options) routes through the W3C Actions API, which the embedded driver dispatched as bare mousedown/mouseup MouseEvents. Manually dispatched mouse events don't make the browser synthesize a click, so element click handlers never fired — .click({}) moved to the right spot (after the origin fix) but still didn't actually click. Emit a click event after a primary-button press + release on the same position (a click, not a drag). Real WebDriver providers (official/crabnebula) already do this; this brings the embedded driver in line, fixing the e2e on the embedded provider. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/platform/executor.rs | 5 +++++ .../src/server/handlers/actions.rs | 20 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/packages/tauri-plugin-webdriver/src/platform/executor.rs b/packages/tauri-plugin-webdriver/src/platform/executor.rs index b01fd3963..86e7616fe 100644 --- a/packages/tauri-plugin-webdriver/src/platform/executor.rs +++ b/packages/tauri-plugin-webdriver/src/platform/executor.rs @@ -47,6 +47,7 @@ pub enum PointerEventType { Down, Up, Move, + Click, } /// Cookie data @@ -1389,6 +1390,10 @@ pub trait PlatformExecutor: Send + Sync { PointerEventType::Down => "mousedown", PointerEventType::Up => "mouseup", PointerEventType::Move => "mousemove", + // Manually dispatched mousedown/mouseup do NOT make the browser + // synthesize a click, so element click handlers never fire. The + // actions handler emits this explicitly after a same-spot down+up. + PointerEventType::Click => "click", }; let buttons = if matches!(event_type, PointerEventType::Down) { diff --git a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs index 520fad3f2..cd672f967 100644 --- a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs +++ b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs @@ -146,6 +146,9 @@ pub async fn perform( x: pointer_position.0, y: pointer_position.1, }; + // Position of the last primary-button press, used to synthesize a `click` + // when the matching release lands on the same spot (a click, not a drag). + let mut primary_down_pos: Option<(i32, i32)> = None; let mut modifier_state = ModifierState::default(); for action_seq in &request.actions { @@ -195,6 +198,9 @@ pub async fn perform( *button, ) .await?; + if *button == 0 { + primary_down_pos = Some((pointer_state.x, pointer_state.y)); + } // Track pressed button let mut sessions = state.sessions.write().await; if let Ok(session) = sessions.get_mut(&session_id) { @@ -215,6 +221,20 @@ pub async fn perform( *button, ) .await?; + // A primary press + release on the same spot is a + // click; emit the click event the browser would + // synthesize for real input so element handlers fire. + if *button == 0 && primary_down_pos == Some((pointer_state.x, pointer_state.y)) { + executor + .dispatch_pointer_event( + PointerEventType::Click, + pointer_state.x, + pointer_state.y, + *button, + ) + .await?; + } + primary_down_pos = None; // Remove from tracked buttons let mut sessions = state.sessions.write().await; if let Ok(session) = sessions.get_mut(&session_id) { From 36c05d95c85be86f6085579d5362d38cc957d5aa Mon Sep 17 00:00:00 2001 From: Sam Maister Date: Fri, 19 Jun 2026 20:56:52 +0100 Subject: [PATCH 5/6] fix(tauri-plugin-webdriver): only clear primary-down state on a primary release Address review feedback: primary_down_pos was reset on every PointerUp regardless of button, so a non-primary release between a primary down and up (button 0 down -> button 1 up -> button 0 up) would drop the press state and suppress the synthesized click. Gate both the click synthesis and the reset on the primary button. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/server/handlers/actions.rs | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs index cd672f967..0fa511208 100644 --- a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs +++ b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs @@ -224,17 +224,22 @@ pub async fn perform( // A primary press + release on the same spot is a // click; emit the click event the browser would // synthesize for real input so element handlers fire. - if *button == 0 && primary_down_pos == Some((pointer_state.x, pointer_state.y)) { - executor - .dispatch_pointer_event( - PointerEventType::Click, - pointer_state.x, - pointer_state.y, - *button, - ) - .await?; + // Only the primary button's release consumes/clears + // the press state — a non-primary release in between + // must not drop it. + if *button == 0 { + if primary_down_pos == Some((pointer_state.x, pointer_state.y)) { + executor + .dispatch_pointer_event( + PointerEventType::Click, + pointer_state.x, + pointer_state.y, + *button, + ) + .await?; + } + primary_down_pos = None; } - primary_down_pos = None; // Remove from tracked buttons let mut sessions = state.sessions.write().await; if let Ok(session) = sessions.get_mut(&session_id) { From a548fafe9b117e3ba7f1493d1a5d8c26af97b59f Mon Sep 17 00:00:00 2001 From: Sam Maister Date: Fri, 19 Jun 2026 21:53:32 +0100 Subject: [PATCH 6/6] fix(tauri-plugin-webdriver): reject unrecognised named pointerMove origins The PointerMove origin match treated any named origin as viewport via a catch-all `Some(Origin::Named(_)) => (*x, *y)`. The W3C Actions spec defines only "viewport" and "pointer" as named origins, so give "viewport" its own arm and return invalid argument for anything else instead of silently falling through to viewport behaviour. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/server/handlers/actions.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs index 0fa511208..5f177c13c 100644 --- a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs +++ b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs @@ -257,12 +257,19 @@ pub async fn perform( origin, } => { let (target_x, target_y) = match origin { - // No origin or "viewport": x/y are absolute viewport coords. + // No origin (the default) or "viewport": x/y are absolute viewport coords. None => (*x, *y), + Some(Origin::Named(name)) if name == "viewport" => (*x, *y), Some(Origin::Named(name)) if name == "pointer" => { (pointer_state.x + *x, pointer_state.y + *y) } - Some(Origin::Named(_)) => (*x, *y), + // The spec defines only "viewport" and "pointer" as named origins; + // reject anything else rather than silently treating it as viewport. + Some(Origin::Named(name)) => { + return Err(WebDriverErrorResponse::invalid_argument(&format!( + "pointerMove origin '{name}' is not a recognised named origin (expected 'viewport' or 'pointer')" + ))); + } Some(Origin::Element(refs)) => { let element_id = refs.get(ELEMENT_KEY).ok_or_else(|| { WebDriverErrorResponse::invalid_argument(