diff --git a/e2e/test/tauri/actions.spec.ts b/e2e/test/tauri/actions.spec.ts new file mode 100644 index 000000000..76c77a262 --- /dev/null +++ b/e2e/test/tauri/actions.spec.ts @@ -0,0 +1,39 @@ +import { browser, expect } from '@wdio/globals'; +import '@wdio/native-types'; + +/** + * `element.click(options)` routes through the W3C Actions API (performActions), + * not the elementClick endpoint that bare `element.click()` uses. Both must land + * on the same element. + */ +describe('W3C Actions API (click with options)', () => { + it('click({}) lands on the same target as bare click()', async () => { + const counter = await browser.$('#counter'); + const increment = await browser.$('#increment-button'); + const reset = await browser.$('#reset-button'); + + await reset.click(); + await expect(counter).toHaveText('0'); + + // Bare click() uses the elementClick endpoint — already worked. + await increment.click(); + await expect(counter).toHaveText('1'); + + // click({}) routes through performActions with an element origin; it must + // land on the button, not viewport (0,0). + await increment.click({}); + await expect(counter).toHaveText('2'); + }); + + it('click({ button: "left" }) lands on the target', async () => { + const counter = await browser.$('#counter'); + const increment = await browser.$('#increment-button'); + const reset = await browser.$('#reset-button'); + + await reset.click(); + await expect(counter).toHaveText('0'); + + await increment.click({ button: 'left' }); + await expect(counter).toHaveText('1'); + }); +}); diff --git a/packages/tauri-plugin-webdriver/src/platform/executor.rs b/packages/tauri-plugin-webdriver/src/platform/executor.rs index fcb5609d7..86e7616fe 100644 --- a/packages/tauri-plugin-webdriver/src/platform/executor.rs +++ b/packages/tauri-plugin-webdriver/src/platform/executor.rs @@ -47,6 +47,7 @@ pub enum PointerEventType { Down, Up, Move, + Click, } /// Cookie data @@ -447,6 +448,43 @@ pub trait PlatformExecutor: Send + Sync { Ok(ElementRect::default()) } + /// Get an element's in-view center point in **client (viewport)** coordinates, + /// scrolling it into view first. Unlike [`Executor::get_element_rect`], this + /// does not add scroll offsets: pointer events dispatch against viewport + /// coordinates (`clientX`/`clientY`), so the center must be viewport-relative. + async fn get_element_center(&self, js_var: &str) -> Result<(i32, i32), WebDriverErrorResponse> { + let script = format!( + r"(function() {{ + var el = window.{js_var}; + if (!el || !el.isConnected) {{ + throw new Error('stale element reference'); + }} + el.scrollIntoView({{ behavior: 'instant', block: 'center', inline: 'center' }}); + var r = el.getBoundingClientRect(); + return {{ + x: Math.floor(r.left + r.width / 2), + y: Math.floor(r.top + r.height / 2) + }}; + }})()" + ); + let result = self.evaluate_js(&script).await?; + + let value = result + .get("value") + .cloned() + .ok_or_else(|| WebDriverErrorResponse::unknown_error("element center script returned no value"))?; + + #[derive(serde::Deserialize)] + struct Center { + x: i32, + y: i32, + } + let center: Center = serde_json::from_value(value).map_err(|err| { + WebDriverErrorResponse::unknown_error(&format!("could not read element center: {err}")) + })?; + Ok((center.x, center.y)) + } + /// Check if element is displayed async fn is_element_displayed(&self, js_var: &str) -> Result { let script = format!( @@ -1352,6 +1390,10 @@ pub trait PlatformExecutor: Send + Sync { PointerEventType::Down => "mousedown", PointerEventType::Up => "mouseup", PointerEventType::Move => "mousemove", + // Manually dispatched mousedown/mouseup do NOT make the browser + // synthesize a click, so element click handlers never fire. The + // actions handler emits this explicitly after a same-spot down+up. + PointerEventType::Click => "click", }; let buttons = if matches!(event_type, PointerEventType::Down) { diff --git a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs index 80c7e4200..5f177c13c 100644 --- a/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs +++ b/packages/tauri-plugin-webdriver/src/server/handlers/actions.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::sync::Arc; use axum::extract::{Path, State}; @@ -6,7 +7,7 @@ use serde::Deserialize; use tauri::Runtime; use crate::platform::{ModifierState, PointerEventType}; -use crate::server::response::{WebDriverResponse, WebDriverResult}; +use crate::server::response::{WebDriverErrorResponse, WebDriverResponse, WebDriverResult}; use crate::server::AppState; #[derive(Debug, Deserialize)] @@ -65,11 +66,31 @@ pub enum PointerAction { x: i32, y: i32, duration: Option, + #[serde(default)] + origin: Option, }, #[serde(rename = "pause")] Pause { duration: Option }, } +/// W3C JSON key identifying a web element reference. +const ELEMENT_KEY: &str = "element-6066-11e4-a52e-4f735466cecf"; + +/// Coordinate origin for a `pointerMove`. Per the WebDriver Actions spec the +/// `origin` is either the string `"viewport"` (the default — x/y are absolute +/// viewport coordinates) or `"pointer"` (x/y are relative to the current pointer +/// position), or an element reference object +/// `{ "element-6066-11e4-a52e-4f735466cecf": "" }` (x/y are offsets from the +/// element's in-view center point). WebdriverIO sends the element form for +/// `element.click(options)` with x/y defaulting to 0, so this must resolve to the +/// element's center rather than viewport (0,0). +#[derive(Debug, Deserialize)] +#[serde(untagged)] +pub enum Origin { + Named(String), + Element(HashMap), +} + #[derive(Debug, Deserialize)] #[serde(tag = "type")] pub enum WheelAction { @@ -109,18 +130,25 @@ pub async fn perform( Json(request): Json, ) -> WebDriverResult { // Get session info and executor first - let (current_window, timeouts, frame_context) = { + let (current_window, timeouts, frame_context, pointer_position) = { let sessions = state.sessions.read().await; let session = sessions.get(&session_id)?; ( session.current_window.clone(), session.timeouts.clone(), session.frame_context.clone(), + session.action_state.pointer_position, ) }; let executor = state.get_executor_for_window(¤t_window, timeouts, frame_context)?; - let mut pointer_state = PointerState { x: 0, y: 0 }; + let mut pointer_state = PointerState { + x: pointer_position.0, + y: pointer_position.1, + }; + // Position of the last primary-button press, used to synthesize a `click` + // when the matching release lands on the same spot (a click, not a drag). + let mut primary_down_pos: Option<(i32, i32)> = None; let mut modifier_state = ModifierState::default(); for action_seq in &request.actions { @@ -170,6 +198,9 @@ pub async fn perform( *button, ) .await?; + if *button == 0 { + primary_down_pos = Some((pointer_state.x, pointer_state.y)); + } // Track pressed button let mut sessions = state.sessions.write().await; if let Ok(session) = sessions.get_mut(&session_id) { @@ -190,6 +221,25 @@ pub async fn perform( *button, ) .await?; + // A primary press + release on the same spot is a + // click; emit the click event the browser would + // synthesize for real input so element handlers fire. + // Only the primary button's release consumes/clears + // the press state — a non-primary release in between + // must not drop it. + if *button == 0 { + if primary_down_pos == Some((pointer_state.x, pointer_state.y)) { + executor + .dispatch_pointer_event( + PointerEventType::Click, + pointer_state.x, + pointer_state.y, + *button, + ) + .await?; + } + primary_down_pos = None; + } // Remove from tracked buttons let mut sessions = state.sessions.write().await; if let Ok(session) = sessions.get_mut(&session_id) { @@ -200,9 +250,48 @@ pub async fn perform( } } } - PointerAction::PointerMove { x, y, duration } => { - pointer_state.x = *x; - pointer_state.y = *y; + PointerAction::PointerMove { + x, + y, + duration, + origin, + } => { + let (target_x, target_y) = match origin { + // No origin (the default) or "viewport": x/y are absolute viewport coords. + None => (*x, *y), + Some(Origin::Named(name)) if name == "viewport" => (*x, *y), + Some(Origin::Named(name)) if name == "pointer" => { + (pointer_state.x + *x, pointer_state.y + *y) + } + // The spec defines only "viewport" and "pointer" as named origins; + // reject anything else rather than silently treating it as viewport. + Some(Origin::Named(name)) => { + return Err(WebDriverErrorResponse::invalid_argument(&format!( + "pointerMove origin '{name}' is not a recognised named origin (expected 'viewport' or 'pointer')" + ))); + } + Some(Origin::Element(refs)) => { + let element_id = refs.get(ELEMENT_KEY).ok_or_else(|| { + WebDriverErrorResponse::invalid_argument( + "pointerMove origin is missing a web element reference", + ) + })?; + let js_var = { + let sessions = state.sessions.read().await; + let session = sessions.get(&session_id)?; + session + .elements + .get(element_id) + .ok_or_else(WebDriverErrorResponse::no_such_element)? + .js_ref + .clone() + }; + let (cx, cy) = executor.get_element_center(&js_var).await?; + (cx + *x, cy + *y) + } + }; + pointer_state.x = target_x; + pointer_state.y = target_y; if let Some(ms) = duration { if *ms > 0 { tokio::time::sleep(std::time::Duration::from_millis(*ms)).await; @@ -266,6 +355,15 @@ pub async fn perform( } } + // Persist the final pointer position so a later performActions call with + // origin: "pointer" resolves relative to it instead of (0, 0). + { + let mut sessions = state.sessions.write().await; + if let Ok(session) = sessions.get_mut(&session_id) { + session.action_state.pointer_position = (pointer_state.x, pointer_state.y); + } + } + Ok(WebDriverResponse::null()) } @@ -280,6 +378,7 @@ pub async fn release( let session = sessions.get_mut(&session_id)?; let pressed_keys: Vec = session.action_state.pressed_keys.drain().collect(); let pressed_buttons = std::mem::take(&mut session.action_state.pressed_buttons); + session.action_state.pointer_position = (0, 0); ( session.current_window.clone(), session.timeouts.clone(), diff --git a/packages/tauri-plugin-webdriver/src/webdriver/session.rs b/packages/tauri-plugin-webdriver/src/webdriver/session.rs index b4203d30f..fb7762d07 100644 --- a/packages/tauri-plugin-webdriver/src/webdriver/session.rs +++ b/packages/tauri-plugin-webdriver/src/webdriver/session.rs @@ -14,6 +14,10 @@ pub struct ActionState { pub pressed_keys: HashSet, /// Currently pressed pointer buttons by source ID pub pressed_buttons: HashMap>, + /// Last pointer position in viewport coordinates. Persisted across + /// `performActions` calls so an `origin: "pointer"` move resolves relative to + /// where the pointer actually is, not (0, 0) at the start of every call. + pub pointer_position: (i32, i32), } /// Session timeouts configuration