diff --git a/Cargo.lock b/Cargo.lock index c01df71..cf08dcf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -909,8 +909,7 @@ dependencies = [ [[package]] name = "multistore" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdf7fb1fa70d7a75a57e322825fa61da83bfaef5a6e705ad871b8bad46db55b" +source = "git+https://github.com/developmentseed/multistore?rev=81b24ec1afad4c947a972b8c7f834806d7c0205e#81b24ec1afad4c947a972b8c7f834806d7c0205e" dependencies = [ "async-trait", "base64", @@ -936,8 +935,7 @@ dependencies = [ [[package]] name = "multistore-cf-workers" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "941b094b16c87594cdda4952e52c1a70bc19ca8a89985fc8d63a82b9b7feb6a9" +source = "git+https://github.com/developmentseed/multistore?rev=81b24ec1afad4c947a972b8c7f834806d7c0205e#81b24ec1afad4c947a972b8c7f834806d7c0205e" dependencies = [ "async-trait", "bytes", @@ -961,13 +959,13 @@ dependencies = [ [[package]] name = "multistore-oidc-provider" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "249fb5386150184f591170ddfb0916ae9fe1861c3523d8ae84b638cad22f5e76" +source = "git+https://github.com/developmentseed/multistore?rev=81b24ec1afad4c947a972b8c7f834806d7c0205e#81b24ec1afad4c947a972b8c7f834806d7c0205e" dependencies = [ "async-trait", "base64", "chrono", "multistore", + "quick-xml 0.37.5", "rsa", "serde", "serde_json", @@ -980,8 +978,7 @@ dependencies = [ [[package]] name = "multistore-path-mapping" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa1f27fa70820affb7056520372cbf337fa7739056bf6f0e63b4b5870aeb5a8e" +source = "git+https://github.com/developmentseed/multistore?rev=81b24ec1afad4c947a972b8c7f834806d7c0205e#81b24ec1afad4c947a972b8c7f834806d7c0205e" dependencies = [ "multistore", "percent-encoding", @@ -991,8 +988,7 @@ dependencies = [ [[package]] name = "multistore-sts" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d147130ee410a7f429ab4d8fa1be0f391be43682d3582bf0393e184d705a722a" +source = "git+https://github.com/developmentseed/multistore?rev=81b24ec1afad4c947a972b8c7f834806d7c0205e#81b24ec1afad4c947a972b8c7f834806d7c0205e" dependencies = [ "aes-gcm", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 0725c5b..583339c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,10 @@ path = "tests/routing.rs" name = "pagination" path = "tests/pagination.rs" +[[test]] +name = "backend_auth" +path = "tests/backend_auth.rs" + [dependencies] # Multistore multistore = { version = "0.4.0", features = ["azure"] } @@ -55,3 +59,15 @@ web-sys = { version = "0.3", features = [ ] } worker = { version = "=0.7.4", features = ["http"] } worker-macros = { version = "=0.7.4", features = ["http"] } + +# Track multistore ahead of a crates.io release. Pinned to an exact `rev` (not a +# branch) so builds are reproducible and `cargo update` can't float to a newer +# commit. The rev is on the `feat/shareable-credential-cache` branch, which adds +# `Clone` to `OidcCredentialProvider` so the worker can keep one warm credential +# cache across requests. Drop this patch and bump the versions above once it ships. +[patch.crates-io] +multistore = { git = "https://github.com/developmentseed/multistore", rev = "81b24ec1afad4c947a972b8c7f834806d7c0205e" } +multistore-oidc-provider = { git = "https://github.com/developmentseed/multistore", rev = "81b24ec1afad4c947a972b8c7f834806d7c0205e" } +multistore-path-mapping = { git = "https://github.com/developmentseed/multistore", rev = "81b24ec1afad4c947a972b8c7f834806d7c0205e" } +multistore-sts = { git = "https://github.com/developmentseed/multistore", rev = "81b24ec1afad4c947a972b8c7f834806d7c0205e" } +multistore-cf-workers = { git = "https://github.com/developmentseed/multistore", rev = "81b24ec1afad4c947a972b8c7f834806d7c0205e" } diff --git a/src/backend_auth.rs b/src/backend_auth.rs new file mode 100644 index 0000000..4f9c54c --- /dev/null +++ b/src/backend_auth.rs @@ -0,0 +1,123 @@ +//! Per-connection backend authentication: the model the Source API reports for a +//! data connection, and its translation into multistore `backend_options`. +//! +//! Kept in its own module — free of wasm-only deps — so it can be unit-tested on +//! native targets despite the crate's `[lib] test = false`. See +//! `tests/backend_auth.rs`. + +use multistore::error::ProxyError; +use serde::Deserialize; +use std::collections::HashMap; + +/// `aud` claim for the proxy's AWS `AssumeRoleWithWebIdentity` assertions. AWS's +/// fixed web-identity convention — the value the customer registers their IAM +/// OIDC provider with and conditions the role trust policy on — so it is constant +/// across connections. Applied at the OIDC backend-auth provider (see `lib.rs`). +pub(crate) const AWS_STS_AUDIENCE: &str = "sts.amazonaws.com"; + +/// Per-connection backend authentication, as reported by the Source API +/// (a sibling of `details` on the connection). +/// +/// Internally tagged on `type`; defaults to [`Unsigned`](BackendAuth::Unsigned) +/// when the field is omitted, so existing connections keep issuing unsigned +/// requests until a role is configured. Unknown `type`s (e.g. the app-side +/// GCP/Azure workload-identity variants) deserialize to +/// [`Unsupported`](BackendAuth::Unsupported) instead of failing the request. +/// +/// The AWS variant carries only `role_arn`; the audience is the fixed constant +/// [`AWS_STS_AUDIENCE`] set on the OIDC backend-auth provider, and session +/// duration / subject scope may be added later. +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum BackendAuth { + /// Public bucket — issue unsigned requests, no backend credentials. + #[default] + Unsigned, + /// Federate the proxy's OIDC identity into a customer-owned AWS role via + /// `AssumeRoleWithWebIdentity`, signing backend requests with the resulting + /// temporary credentials. (S3 only for now.) + S3WebIdentityRole { + /// ARN of the IAM role the proxy assumes for this connection. + role_arn: String, + }, + /// An authentication type this proxy build does not implement — e.g. the + /// Source API's `gcp_workload_identity` / `azure_workload_identity` variants, + /// scaffolded app-side but without proxy/multistore support yet. Captured via + /// `#[serde(other)]` so an unknown `type` deserializes gracefully; treated as + /// unsupported (served unsigned, with a warning). + #[serde(other)] + Unsupported, +} + +impl BackendAuth { + /// Short, stable label for logs/spans (no secrets — the role ARN is not + /// included). + pub(crate) fn kind(&self) -> &'static str { + match self { + BackendAuth::Unsigned => "unsigned", + BackendAuth::S3WebIdentityRole { .. } => "s3_web_identity_role", + BackendAuth::Unsupported => "unsupported", + } + } +} + +/// Lenient `deserialize_with` for a connection's `authentication` field. +/// +/// A *present* value that doesn't parse as a known [`BackendAuth`] — unknown +/// `type`, missing `role_arn`, wrong shape — becomes [`Unsupported`], and `null` +/// becomes [`Unsigned`]. This keeps a single malformed `authentication` from +/// failing deserialization of the *entire* data-connection list, which the proxy +/// parses in one `serde_json::from_str`. An *absent* field is handled by +/// `#[serde(default)]` and never reaches this function. +pub(crate) fn deserialize_lenient<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let value = serde_json::Value::deserialize(deserializer)?; + if value.is_null() { + return Ok(BackendAuth::Unsigned); + } + Ok(serde_json::from_value(value).unwrap_or(BackendAuth::Unsupported)) +} + +/// Translate a connection's [`BackendAuth`] into multistore `backend_options`. +/// +/// - [`Unsigned`](BackendAuth::Unsigned) sets `skip_signature` so the proxy +/// issues an unsigned request to a public bucket. +/// - [`S3WebIdentityRole`](BackendAuth::S3WebIdentityRole) hands the role ARN and +/// a per-connection subject (`scv1:conn:{id}`) to multistore's OIDC backend-auth +/// middleware (wired in `lib.rs`), which mints the assertion (with the fixed AWS +/// audience set on the provider), exchanges it at AWS STS, and injects the +/// temporary credentials — clearing `skip_signature` so the request is signed. +/// - [`Unsupported`](BackendAuth::Unsupported) can't be fulfilled, so it **fails +/// closed** with [`ProxyError::BackendAuthError`] rather than silently serving +/// unsigned. +pub(crate) fn apply_backend_auth( + auth: &BackendAuth, + connection_id: &str, + options: &mut HashMap, +) -> Result<(), ProxyError> { + match auth { + BackendAuth::Unsigned => { + options.insert("skip_signature".to_string(), "true".to_string()); + } + BackendAuth::S3WebIdentityRole { role_arn } => { + options.insert("auth_type".to_string(), "oidc".to_string()); + options.insert("oidc_role_arn".to_string(), role_arn.clone()); + options.insert( + "oidc_subject".to_string(), + format!("scv1:conn:{connection_id}"), + ); + } + // Fail closed: a scheme we can't fulfill (the app-side GCP/Azure + // workload-identity variants, or a malformed `authentication`) must not + // fall back to unsigned — that could expose an anonymously-readable + // backend. Deny so the misconfiguration surfaces explicitly. + BackendAuth::Unsupported => { + return Err(ProxyError::BackendAuthError(format!( + "connection {connection_id}: unsupported backend authentication type" + ))); + } + } + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 9d9728c..76efbed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ mod analytics; mod auth; +mod backend_auth; mod cache; mod config; mod handlers; @@ -18,7 +19,9 @@ use multistore_cf_workers::{ collect_js_body, GatewayResponseExt, NoopCredentialRegistry, RequestParts, WorkerBackend, WorkerSubscriber, }; +use multistore_oidc_provider::backend_auth::{AwsBackendAuth, MaybeOidcAuth}; use multistore_oidc_provider::route_handler::OidcRouterExt; +use multistore_oidc_provider::{HttpExchange, OidcCredentialProvider, OidcProviderError}; use multistore_path_mapping::{MappedRegistry, PathMapping}; use multistore_sts::jwks::JwksCache; use multistore_sts::route_handler::StsRouterExt; @@ -51,6 +54,45 @@ fn jwks_cache() -> JwksCache { .clone() } +/// [`HttpExchange`] for outbound STS calls, backed by the shared reqwest client +/// (reqwest wraps `web_sys::fetch` on wasm). This is what lets the OIDC +/// backend-auth middleware POST `AssumeRoleWithWebIdentity` to AWS STS. +#[derive(Clone)] +struct FetchHttpExchange { + client: reqwest::Client, +} + +impl HttpExchange for FetchHttpExchange { + async fn post_form( + &self, + url: &str, + form: &[(&str, &str)], + ) -> std::result::Result { + let resp = self + .client + .post(url) + .form(form) + .send() + .await + .map_err(|e| OidcProviderError::HttpError(e.to_string()))?; + // Intentionally NOT checking the HTTP status / calling + // `error_for_status()`: AWS STS returns its `` XML in the + // body on 4xx/5xx, and multistore's `parse_response` reads the error + // (code + message) out of that body. Discarding it on a non-2xx would + // lose the diagnostic and the precise ProxyError mapping. + resp.text() + .await + .map_err(|e| OidcProviderError::HttpError(e.to_string())) + } +} + +/// Isolate-shared OIDC credential provider for backend federation. The gateway +/// (and its middleware) are rebuilt per request, but the provider — and its +/// credential cache — must persist so the proxy doesn't re-mint a JWT and re-run +/// `AssumeRoleWithWebIdentity` on every request to the same role. Initialized +/// from the first request's signing config, which is constant for the isolate. +static OIDC_PROVIDER: OnceLock> = OnceLock::new(); + #[event(fetch)] async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result { console_error_panic_hook::set_once(); @@ -144,12 +186,34 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result= tracing::Level::DEBUG) .with_credential_resolver(config.session_token_key.clone()); diff --git a/src/registry.rs b/src/registry.rs index 5f7348f..aaa45aa 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -7,6 +7,8 @@ use multistore::types::{BucketConfig, ResolvedIdentity, S3Operation}; use serde::Deserialize; use std::collections::HashMap; +use crate::backend_auth::{apply_backend_auth, BackendAuth}; + /// Registry that resolves Source Cooperative products to multistore `BucketConfig`s /// by calling the Source Cooperative API. #[derive(Clone)] @@ -102,6 +104,7 @@ async fn resolve_product( account = %account, product = %product, backend_type = tracing::field::Empty, + auth_type = tracing::field::Empty, ); let _guard = span.enter(); @@ -182,10 +185,20 @@ async fn resolve_product( _ => {} } - // TODO: For authenticated users, provide real backend credentials so that - // write operations can be forwarded to the storage backend. Currently all - // requests use anonymous/unsigned access, so writes will fail at the backend. - backend_options.insert("skip_signature".to_string(), "true".to_string()); + // Backend authentication: unsigned (public) by default, or federate the + // proxy's OIDC identity into the connection's role. + // + // The confused-deputy guard is upstream: the subject-scoped Source API + // fetches above (get_or_fetch_product / get_or_fetch_data_connections, keyed + // on the caller's principal) only return products/connections this caller is + // authorized for — so reaching here means the caller is already cleared for + // this connection's backend. Federation does not re-authorize. + span.record("auth_type", connection.authentication.kind()); + apply_backend_auth( + &connection.authentication, + &connection.data_connection_id, + &mut backend_options, + )?; // 5. Build prefix: connection.base_prefix + mirror.prefix let base_prefix = connection.details.base_prefix.as_deref().unwrap_or(""); @@ -272,6 +285,13 @@ pub struct SourceProductMirrorConfig { pub struct DataConnection { pub data_connection_id: String, pub details: DataConnectionDetails, + /// How the proxy authenticates to this connection's backend. A sibling of + /// `details`, matching the Source API's `DataConnection` shape. Absent → + /// [`BackendAuth::Unsigned`] (public bucket); a present-but-malformed value + /// becomes `Unsupported` rather than failing the whole list (see + /// [`deserialize_lenient`](crate::backend_auth::deserialize_lenient)). + #[serde(default, deserialize_with = "crate::backend_auth::deserialize_lenient")] + pub authentication: BackendAuth, } #[derive(Debug, Clone, Deserialize)] diff --git a/tests/backend_auth.rs b/tests/backend_auth.rs new file mode 100644 index 0000000..21c6848 --- /dev/null +++ b/tests/backend_auth.rs @@ -0,0 +1,133 @@ +//! Native unit tests for the wasm-free `backend_auth` module, included via +//! `#[path]` (the lib itself is `cdylib` with `test = false`). Mirrors the +//! pattern in `tests/pagination.rs`. + +#[path = "../src/backend_auth.rs"] +mod backend_auth; + +use backend_auth::{apply_backend_auth, BackendAuth}; +use std::collections::HashMap; + +// ── deserialization ──────────────────────────────────────────────── + +#[test] +fn deserializes_unsigned() { + let a: BackendAuth = serde_json::from_str(r#"{"type":"unsigned"}"#).unwrap(); + assert_eq!(a, BackendAuth::Unsigned); +} + +#[test] +fn deserializes_web_identity_role() { + let a: BackendAuth = serde_json::from_str( + r#"{"type":"s3_web_identity_role","role_arn":"arn:aws:iam::1:role/r"}"#, + ) + .unwrap(); + assert_eq!( + a, + BackendAuth::S3WebIdentityRole { + role_arn: "arn:aws:iam::1:role/r".into() + } + ); +} + +// ── lenient field deserialization (one bad entry must not poison the list) ── + +#[derive(serde::Deserialize)] +struct Wrapper { + #[serde(default, deserialize_with = "backend_auth::deserialize_lenient")] + auth: BackendAuth, +} + +#[test] +fn lenient_absent_is_unsigned() { + let w: Wrapper = serde_json::from_str("{}").unwrap(); + assert_eq!(w.auth, BackendAuth::Unsigned); +} + +#[test] +fn lenient_null_is_unsigned() { + let w: Wrapper = serde_json::from_str(r#"{"auth":null}"#).unwrap(); + assert_eq!(w.auth, BackendAuth::Unsigned); +} + +#[test] +fn lenient_valid_role_parses() { + let w: Wrapper = serde_json::from_str( + r#"{"auth":{"type":"s3_web_identity_role","role_arn":"arn:aws:iam::1:role/r"}}"#, + ) + .unwrap(); + assert_eq!( + w.auth, + BackendAuth::S3WebIdentityRole { + role_arn: "arn:aws:iam::1:role/r".into() + } + ); +} + +#[test] +fn lenient_malformed_becomes_unsupported_not_error() { + // Missing role_arn, a wrong-typed value, and an unknown type all degrade to + // Unsupported instead of erroring — so they can't fail the whole list parse. + for bad in [ + r#"{"auth":{"type":"s3_web_identity_role"}}"#, + r#"{"auth":"garbage"}"#, + r#"{"auth":{"type":"gcp_workload_identity","workload_identity_provider":"x"}}"#, + ] { + let w: Wrapper = serde_json::from_str(bad).unwrap(); + assert_eq!(w.auth, BackendAuth::Unsupported, "input: {bad}"); + } +} + +#[test] +fn unknown_type_deserializes_to_unsupported() { + // The app-side GCP/Azure variants this proxy build doesn't implement must not + // fail deserialization — `#[serde(other)]` catches them. + let a: BackendAuth = serde_json::from_str( + r#"{"type":"gcp_workload_identity","workload_identity_provider":"x","service_account":"y"}"#, + ) + .unwrap(); + assert_eq!(a, BackendAuth::Unsupported); +} + +// ── option translation ───────────────────────────────────────────── + +#[test] +fn unsigned_sets_skip_signature() { + let mut o = HashMap::new(); + apply_backend_auth(&BackendAuth::Unsigned, "conn-1", &mut o).unwrap(); + assert_eq!(o.get("skip_signature").map(String::as_str), Some("true")); + assert!(!o.contains_key("auth_type")); +} + +#[test] +fn web_identity_role_sets_oidc_options_and_keeps_signing() { + let mut o = HashMap::new(); + apply_backend_auth( + &BackendAuth::S3WebIdentityRole { + role_arn: "arn:aws:iam::1:role/r".into(), + }, + "conn-1", + &mut o, + ) + .unwrap(); + assert_eq!(o.get("auth_type").map(String::as_str), Some("oidc")); + assert_eq!( + o.get("oidc_role_arn").map(String::as_str), + Some("arn:aws:iam::1:role/r") + ); + assert_eq!( + o.get("oidc_subject").map(String::as_str), + Some("scv1:conn:conn-1") + ); + // Signing must stay ON for the federated path. + assert!(!o.contains_key("skip_signature")); +} + +#[test] +fn unsupported_fails_closed() { + let mut o = HashMap::new(); + let result = apply_backend_auth(&BackendAuth::Unsupported, "conn-1", &mut o); + assert!(result.is_err()); + // Must not have set unsigned (or any) options as a side effect. + assert!(o.is_empty()); +} diff --git a/tests/test_federation.py b/tests/test_federation.py new file mode 100644 index 0000000..39bdb1d --- /dev/null +++ b/tests/test_federation.py @@ -0,0 +1,61 @@ +"""Federation smoke test for the Source Cooperative Data Proxy. + +Exercises the full federated backend path end-to-end: a request for a product +backed by an ``s3_web_identity_role`` data connection makes the proxy mint its +own OIDC assertion, assume the customer role via AWS STS +``AssumeRoleWithWebIdentity``, and sign the S3 read with the temporary +credentials. + +This needs real infrastructure that can't be stood up in unit tests — a deployed +proxy, a federated test product whose data connection carries a ``role_arn``, and +the customer-side IAM OIDC provider + role trust policy (conditioned on +``aud = sts.amazonaws.com`` and ``sub = scv1:conn:{connection_id}``). So it is +gated on env vars and SKIPS when they are unset. Set them in staging/preview CI +to activate it (it is discovered automatically by ``pytest tests/``): + + PROXY_URL base URL of the deployed proxy (shared with the other + integration tests; defaults to http://localhost:8787) + FEDERATION_TEST_ACCOUNT account id of the federated test product + FEDERATION_TEST_PRODUCT product id of the federated test product + FEDERATION_TEST_KEY an object key expected to be readable via federation +""" + +import os + +import pytest +import requests + +PROXY_URL = os.environ.get("PROXY_URL", "http://localhost:8787") +ACCOUNT = os.environ.get("FEDERATION_TEST_ACCOUNT") +PRODUCT = os.environ.get("FEDERATION_TEST_PRODUCT") +KEY = os.environ.get("FEDERATION_TEST_KEY") + +pytestmark = pytest.mark.skipif( + not (ACCOUNT and PRODUCT and KEY), + reason=( + "federation test target not configured " + "(set FEDERATION_TEST_ACCOUNT/PRODUCT/KEY against a deployed proxy)" + ), +) + + +def test_federated_object_is_served(): + """A private, federated product's object is served via AssumeRoleWithWebIdentity. + + A 403/500 here means the proxy could not assume the role or sign the request + — typically a missing IAM OIDC provider, a trust-policy ``aud``/``sub`` + mismatch, or the API not surfacing the connection's ``role_arn`` to the proxy. + """ + resp = requests.get(f"{PROXY_URL}/{ACCOUNT}/{PRODUCT}/{KEY}") + assert resp.status_code == 200, ( + f"federated read failed ({resp.status_code}): {resp.text[:300]}" + ) + assert resp.content, "federated read returned an empty body" + + +def test_federated_listing_is_served(): + """Listing a federated product works (signed ListObjectsV2).""" + resp = requests.get(f"{PROXY_URL}/{ACCOUNT}/{PRODUCT}/") + assert resp.status_code == 200, ( + f"federated list failed ({resp.status_code}): {resp.text[:300]}" + )