diff --git a/CHANGELOG.md b/CHANGELOG.md index c5014065..54ab8111 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Security + +- Validate synthetic ID format on inbound values from the `x-synthetic-id` header and `synthetic_id` cookie; values that do not match the expected format (`64-hex-hmac.6-alphanumeric-suffix`) are discarded and a fresh ID is generated rather than forwarded to response headers, cookies, or third-party APIs + ### Added - Implemented basic authentication for configurable endpoint paths (#73) diff --git a/crates/common/src/integrations/registry.rs b/crates/common/src/integrations/registry.rs index 158a1e29..a4a15b63 100644 --- a/crates/common/src/integrations/registry.rs +++ b/crates/common/src/integrations/registry.rs @@ -1310,8 +1310,15 @@ mod tests { let registry = IntegrationRegistry::from_routes(routes); let mut req = Request::get("https://test.example.com/integrations/test/synthetic"); - // Pre-existing cookie - req.set_header(header::COOKIE, "synthetic_id=existing_id_12345"); + // Pre-existing cookie with a valid-format synthetic ID + req.set_header( + header::COOKIE, + format!( + "{}={}", + crate::constants::COOKIE_SYNTHETIC_ID, + crate::test_support::tests::VALID_SYNTHETIC_ID + ), + ); let result = futures::executor::block_on(registry.handle_proxy( &Method::GET, diff --git a/crates/common/src/proxy.rs b/crates/common/src/proxy.rs index 99db6328..9b13884d 100644 --- a/crates/common/src/proxy.rs +++ b/crates/common/src/proxy.rs @@ -1291,7 +1291,8 @@ mod tests { sig ), ); - req.set_header(crate::constants::HEADER_X_SYNTHETIC_ID, "synthetic-123"); + let valid_synthetic_id = crate::test_support::tests::VALID_SYNTHETIC_ID; + req.set_header(crate::constants::HEADER_X_SYNTHETIC_ID, valid_synthetic_id); let resp = handle_first_party_click(&settings, req) .await @@ -1309,7 +1310,7 @@ mod tests { assert_eq!(pairs.remove("foo").as_deref(), Some("1")); assert_eq!( pairs.remove("synthetic_id").as_deref(), - Some("synthetic-123") + Some(valid_synthetic_id) ); assert!(pairs.is_empty()); } diff --git a/crates/common/src/synthetic.rs b/crates/common/src/synthetic.rs index 853c7141..22bd3eec 100644 --- a/crates/common/src/synthetic.rs +++ b/crates/common/src/synthetic.rs @@ -25,6 +25,35 @@ type HmacSha256 = Hmac; const ALPHANUMERIC_CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +/// Expected byte length of a valid synthetic ID: 64 hex chars + '.' + 6 alphanumeric chars. +const SYNTHETIC_ID_LEN: usize = 71; + +/// Validates that `value` matches the canonical synthetic ID format. +/// +/// The format is `.` where `` is exactly 64 **lowercase** hex +/// characters (HMAC-SHA256 output via [`hex::encode`]) and `` is exactly +/// 6 ASCII alphanumeric characters. Uppercase hex is rejected — the generator +/// never produces it and intermediaries that normalise case would produce an ID +/// that no longer matches its HMAC. +/// +/// The total length is checked first so that oversized attacker-supplied +/// strings are rejected in O(1) before any character scanning occurs. +fn is_valid_synthetic_id(value: &str) -> bool { + if value.len() != SYNTHETIC_ID_LEN { + return false; + } + match value.split_once('.') { + Some((hmac_part, suffix_part)) => { + hmac_part.len() == 64 + && hmac_part + .bytes() + .all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f')) + && suffix_part.bytes().all(|b| b.is_ascii_alphanumeric()) + } + None => false, + } +} + /// Normalizes an IP address for stable synthetic ID generation. /// /// For IPv6 addresses, masks to /64 prefix to handle Privacy Extensions @@ -96,7 +125,7 @@ pub fn generate_synthetic_id( message: "Failed to render synthetic ID template".to_string(), })?; - log::info!("Input string for fresh ID: {} {}", input_string, data); + log::debug!("Generating fresh synthetic ID from template inputs"); let mut mac = HmacSha256::new_from_slice(settings.synthetic.secret_key.as_bytes()) .change_context(TrustedServerError::SyntheticId { @@ -109,39 +138,60 @@ pub fn generate_synthetic_id( let random_suffix = generate_random_suffix(6); let synthetic_id = format!("{}.{}", hmac_hash, random_suffix); - log::info!("Generated fresh ID: {}", synthetic_id); + debug_assert!( + is_valid_synthetic_id(&synthetic_id), + "should generate a synthetic ID matching the expected format" + ); + + log::debug!("Generated fresh synthetic ID"); Ok(synthetic_id) } -/// Gets or creates a synthetic ID from the request. +/// Reads a validated synthetic ID from the request, if one is present. +/// +/// Checks the `x-synthetic-id` header first, then the `synthetic_id` cookie. +/// Values that do not match the canonical format (`<64-hex>.<6-alphanumeric>`) +/// are discarded and a warning is logged — the raw invalid value is never +/// included in log output. /// -/// Attempts to retrieve an existing synthetic ID from: -/// 1. The `x-synthetic-id` header -/// 2. The `synthetic_id` cookie +/// Note: a non-UTF-8 `x-synthetic-id` header value is silently discarded and +/// the cookie is checked next, whereas a non-UTF-8 `Cookie` header propagates +/// as an error. /// -/// If neither exists, generates a new synthetic ID. +/// Returns `Ok(None)` when no valid ID is found, allowing the caller to +/// generate a fresh one. /// /// # Errors /// -/// - [`TrustedServerError::Template`] if template rendering fails during generation -/// - [`TrustedServerError::SyntheticId`] if ID generation fails +/// - [`TrustedServerError::InvalidHeaderValue`] if the Cookie header contains invalid UTF-8 pub fn get_synthetic_id(req: &Request) -> Result, Report> { - if let Some(synthetic_id) = req + if let Some(raw) = req .get_header(HEADER_X_SYNTHETIC_ID) .and_then(|h| h.to_str().ok()) { - let id = synthetic_id.to_string(); - log::info!("Using existing Synthetic ID from header: {}", id); - return Ok(Some(id)); + if is_valid_synthetic_id(raw) { + log::info!("Using existing synthetic ID from header"); + return Ok(Some(raw.to_string())); + } + log::warn!( + "Rejecting synthetic ID from header: invalid format (len={})", + raw.len() + ); } match handle_request_cookies(req)? { Some(jar) => { if let Some(cookie) = jar.get(COOKIE_SYNTHETIC_ID) { - let id = cookie.value().to_string(); - log::info!("Using existing Trusted Server ID from cookie: {}", id); - return Ok(Some(id)); + let raw = cookie.value(); + if is_valid_synthetic_id(raw) { + log::info!("Using existing synthetic ID from cookie"); + return Ok(Some(raw.to_string())); + } + log::warn!( + "Rejecting synthetic ID from cookie: invalid format (len={})", + raw.len() + ); } } None => { @@ -152,17 +202,17 @@ pub fn get_synthetic_id(req: &Request) -> Result, Report bool { - let mut parts = value.split('.'); - let hmac_part = match parts.next() { - Some(part) => part, - None => return false, - }; - let suffix_part = match parts.next() { - Some(part) => part, - None => return false, - }; - if parts.next().is_some() { - return false; - } - if hmac_part.len() != 64 || suffix_part.len() != 6 { - return false; - } - if !hmac_part.chars().all(|c| c.is_ascii_hexdigit()) { - return false; - } - if !suffix_part.chars().all(|c| c.is_ascii_alphanumeric()) { - return false; - } - true - } - #[test] fn test_generate_synthetic_id() { let settings: Settings = create_test_settings(); @@ -263,60 +288,88 @@ mod tests { let synthetic_id = generate_synthetic_id(&settings, &req).expect("should generate synthetic ID"); - log::info!("Generated synthetic ID: {}", synthetic_id); assert!( - is_synthetic_id_format(&synthetic_id), + is_valid_synthetic_id(&synthetic_id), "should match synthetic ID format" ); } #[test] - fn test_is_synthetic_id_format_accepts_valid_value() { - let value = format!("{}.{}", "a".repeat(64), "Ab12z9"); + fn test_is_valid_synthetic_id_accepts_valid_value() { assert!( - is_synthetic_id_format(&value), - "should accept a valid synthetic ID format" + is_valid_synthetic_id(VALID_SYNTHETIC_ID), + "should accept a well-formed synthetic ID" ); } #[test] - fn test_is_synthetic_id_format_rejects_invalid_values() { + fn test_is_valid_synthetic_id_rejects_invalid_values() { let missing_suffix = "a".repeat(64); assert!( - !is_synthetic_id_format(&missing_suffix), + !is_valid_synthetic_id(&missing_suffix), "should reject missing suffix" ); let invalid_hex = format!("{}.{}", "a".repeat(63) + "g", "Ab12z9"); assert!( - !is_synthetic_id_format(&invalid_hex), + !is_valid_synthetic_id(&invalid_hex), "should reject non-hex HMAC content" ); let invalid_suffix = format!("{}.{}", "a".repeat(64), "ab-129"); assert!( - !is_synthetic_id_format(&invalid_suffix), + !is_valid_synthetic_id(&invalid_suffix), "should reject non-alphanumeric suffix" ); + // 74 bytes — caught by the length guard before any scan. let extra_segment = format!("{}.{}.{}", "a".repeat(64), "Ab12z9", "zz"); assert!( - !is_synthetic_id_format(&extra_segment), + !is_valid_synthetic_id(&extra_segment), "should reject extra segments" ); + + // 71 bytes, dot at position 64 (correct), but suffix contains a dot — caught by + // the suffix alphanumeric scan, not the length guard. + let dot_in_suffix = format!("{}.Ab12.z", "a".repeat(64)); + assert!( + !is_valid_synthetic_id(&dot_in_suffix), + "should reject dot within suffix" + ); + + let uppercase_hex = format!("{}.{}", "A".repeat(64), "Ab12z9"); + assert!( + !is_valid_synthetic_id(&uppercase_hex), + "should reject uppercase hex in HMAC part" + ); + + let oversized = "a".repeat(1000); + assert!( + !is_valid_synthetic_id(&oversized), + "should reject oversized input" + ); + + assert!(!is_valid_synthetic_id(""), "should reject empty string"); } #[test] fn test_get_synthetic_id_with_header() { let settings = create_test_settings(); - let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "existing_synthetic_id")]); + let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, VALID_SYNTHETIC_ID)]); let synthetic_id = get_synthetic_id(&req).expect("should get synthetic ID"); - assert_eq!(synthetic_id, Some("existing_synthetic_id".to_string())); + assert_eq!( + synthetic_id, + Some(VALID_SYNTHETIC_ID.to_string()), + "should return the valid header ID" + ); let synthetic_id = get_or_generate_synthetic_id(&settings, &req) .expect("should reuse header synthetic ID"); - assert_eq!(synthetic_id, "existing_synthetic_id"); + assert_eq!( + synthetic_id, VALID_SYNTHETIC_ID, + "should reuse the valid header ID" + ); } #[test] @@ -324,22 +377,107 @@ mod tests { let settings = create_test_settings(); let req = create_test_request(vec![( header::COOKIE, - &format!("{}=existing_cookie_id", COOKIE_SYNTHETIC_ID), + &format!("{}={}", COOKIE_SYNTHETIC_ID, VALID_SYNTHETIC_ID), )]); let synthetic_id = get_synthetic_id(&req).expect("should get synthetic ID"); - assert_eq!(synthetic_id, Some("existing_cookie_id".to_string())); + assert_eq!( + synthetic_id, + Some(VALID_SYNTHETIC_ID.to_string()), + "should return the valid cookie ID" + ); let synthetic_id = get_or_generate_synthetic_id(&settings, &req) .expect("should reuse cookie synthetic ID"); - assert_eq!(synthetic_id, "existing_cookie_id"); + assert_eq!( + synthetic_id, VALID_SYNTHETIC_ID, + "should reuse the valid cookie ID" + ); + } + + #[test] + fn test_get_synthetic_id_rejects_invalid_header() { + let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "not-a-valid-id")]); + + let synthetic_id = get_synthetic_id(&req).expect("should not error on invalid header ID"); + assert!( + synthetic_id.is_none(), + "should discard invalid synthetic ID from header" + ); + } + + #[test] + fn test_get_synthetic_id_rejects_invalid_cookie() { + let req = create_test_request(vec![( + header::COOKIE, + &format!("{}=not-a-valid-id", COOKIE_SYNTHETIC_ID), + )]); + + let synthetic_id = get_synthetic_id(&req).expect("should not error on invalid cookie ID"); + assert!( + synthetic_id.is_none(), + "should discard invalid synthetic ID from cookie" + ); + } + + #[test] + fn test_get_synthetic_id_invalid_header_falls_through_to_valid_cookie() { + let req = create_test_request(vec![ + (HEADER_X_SYNTHETIC_ID, "not-a-valid-id"), + ( + header::COOKIE, + &format!("{}={}", COOKIE_SYNTHETIC_ID, VALID_SYNTHETIC_ID), + ), + ]); + + let synthetic_id = get_synthetic_id(&req).expect("should not error when cookie is valid"); + assert_eq!( + synthetic_id, + Some(VALID_SYNTHETIC_ID.to_string()), + "should fall through to valid cookie when header ID is invalid" + ); + } + + #[test] + fn test_get_synthetic_id_header_takes_precedence_over_cookie() { + let cookie_id = "b2a1c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0b1a2.Zx98y7"; + let req = create_test_request(vec![ + (HEADER_X_SYNTHETIC_ID, VALID_SYNTHETIC_ID), + ( + header::COOKIE, + &format!("{}={}", COOKIE_SYNTHETIC_ID, cookie_id), + ), + ]); + let result = get_synthetic_id(&req).expect("should succeed"); + assert_eq!( + result, + Some(VALID_SYNTHETIC_ID.to_string()), + "should prefer header over cookie" + ); } #[test] fn test_get_synthetic_id_none() { let req = create_test_request(vec![]); let synthetic_id = get_synthetic_id(&req).expect("should handle missing ID"); - assert!(synthetic_id.is_none()); + assert!( + synthetic_id.is_none(), + "should return None when no ID present" + ); + } + + #[test] + fn test_get_or_generate_synthetic_id_generates_when_invalid_header() { + let settings = create_test_settings(); + // A string that is clearly not a valid synthetic ID (wrong format, wrong length) + let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "totally-invalid-id-value")]); + + let synthetic_id = get_or_generate_synthetic_id(&settings, &req) + .expect("should generate when header ID is invalid"); + assert!( + is_valid_synthetic_id(&synthetic_id), + "should generate a fresh valid ID when inbound ID is invalid" + ); } #[test] @@ -349,6 +487,9 @@ mod tests { let synthetic_id = get_or_generate_synthetic_id(&settings, &req) .expect("should get or generate synthetic ID"); - assert!(!synthetic_id.is_empty()); + assert!( + is_valid_synthetic_id(&synthetic_id), + "should generate a valid synthetic ID" + ); } } diff --git a/crates/common/src/test_support.rs b/crates/common/src/test_support.rs index 5e0d8f97..e3d73fa4 100644 --- a/crates/common/src/test_support.rs +++ b/crates/common/src/test_support.rs @@ -2,6 +2,10 @@ pub mod tests { use crate::settings::Settings; + /// A well-formed synthetic ID for use in tests: 64 lowercase hex chars + `'.'` + 6 alphanumeric. + pub const VALID_SYNTHETIC_ID: &str = + "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2.Ab12z9"; + #[must_use] pub fn crate_test_settings_str() -> String { r#" diff --git a/docs/guide/synthetic-ids.md b/docs/guide/synthetic-ids.md index 45914a90..b8a8b69f 100644 --- a/docs/guide/synthetic-ids.md +++ b/docs/guide/synthetic-ids.md @@ -12,10 +12,12 @@ Trusted Server surfaces the current synthetic ID via response headers and a firs ### HMAC-Based Generation -Synthetic IDs use HMAC (Hash-based Message Authentication Code) to generate a deterministic base from a configurable template, then append a short random suffix. +Synthetic IDs use HMAC (Hash-based Message Authentication Code) to generate a base from a configurable template, then append a short random alphanumeric suffix. The HMAC base is deterministic for a given set of template inputs; templates that include the built-in `{{random_uuid}}` variable produce a unique ID on every generation. **Format**: `64-hex-hmac`.`6-alphanumeric-suffix` +Inbound IDs (from the `x-synthetic-id` header or `synthetic_id` cookie) are validated against this format before use. Values that do not match — including oversized strings, special characters, or wrong structure — are discarded and a fresh ID is generated in their place. + **IP normalization**: IPv6 addresses are normalized to a /64 prefix before templating. ## Configuration