diff --git a/CHANGELOG.md b/CHANGELOG.md index e016fa98..9be53f6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Implemented header-based filtering for collections and geometry. Supports `X-Filter-Collections` (comma-separated collection IDs) and `X-Filter-Geometry` (GeoJSON) headers to restrict access to specific collections and geographic areas. Applies to `/collections`, `/collections/{id}`, `/collections/{id}/items`, `/collections/{id}/items/{id}`, and `/search` endpoints. Added optional `[geo]` extra with `shapely` dependency for geometry filtering on single item endpoints. [#563](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/563) + ### Changed ### Fixed diff --git a/README.md b/README.md index b050691b..69ad5e76 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI - [Managing Elasticsearch Indices](#managing-elasticsearch-indices) - [Snapshots](#snapshots) - [Reindexing](#reindexing) + - [Header Filtering](#header-filtering) - [Auth](#auth) - [Aggregation](#aggregation) - [Rate Limiting](#rate-limiting) @@ -1015,6 +1016,39 @@ pip install stac-fastapi-elasticsearch[redis] - This makes the modified Items with lowercase identifiers visible to users accessing my-collection in the STAC API - Using aliases allows you to switch between different index versions without changing the API endpoint +## Header Filtering + +SFEOS supports filtering API responses based on HTTP headers. This enables upstream proxies or gateways to restrict access to specific collections and geographic areas. + +### Headers + +| Header | Format | Description | +|--------|--------|-------------| +| `X-Filter-Collections` | Comma-separated IDs | Restricts access to specified collections only | +| `X-Filter-Geometry` | GeoJSON geometry | Restricts access to items within the specified geometry | + +### Affected Endpoints + +| Endpoint | Collection Filter | Geometry Filter | +|----------|:-----------------:|:---------------:| +| `GET /collections` | ✅ | - | +| `GET /collections/{id}` | ✅ (404 if denied) | - | +| `GET /collections/{id}/items` | ✅ | ✅ | +| `GET /collections/{id}/items/{id}` | ✅ (404 if denied) | ✅* (404 if denied) | +| `GET/POST /search` | ✅ | ✅ | + +*Requires optional `shapely` dependency. + +### Optional Dependency + +For geometry filtering on single item endpoints (`/collections/{id}/items/{id}`), install with the `geo` extra: + +```bash +pip install stac-fastapi-core[geo] +``` + +Without this dependency, geometry filtering on single items is skipped with a warning. + ## Auth - **Overview**: Authentication is an optional feature that can be enabled through Route Dependencies. diff --git a/stac_fastapi/core/pyproject.toml b/stac_fastapi/core/pyproject.toml index 6ad95056..118f9af8 100644 --- a/stac_fastapi/core/pyproject.toml +++ b/stac_fastapi/core/pyproject.toml @@ -49,6 +49,9 @@ redis = [ "redis~=6.4.0", "retry~=0.9.2", ] +geo = [ + "shapely>=2.0.0", +] [project.urls] Homepage = "https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch" diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 18bea46f..b72c5e25 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -23,6 +23,10 @@ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic from stac_fastapi.core.base_settings import ApiBaseSettings from stac_fastapi.core.datetime_utils import format_datetime_range +from stac_fastapi.core.header_filters import ( + parse_filter_collections, + parse_filter_geometry, +) from stac_fastapi.core.models.links import PagingLinks from stac_fastapi.core.queryables import ( QueryablesCache, @@ -449,6 +453,13 @@ async def all_collections( else: filtered_collections = collections + # Filter by header collections if present + header_collections = parse_filter_collections(request) + if header_collections is not None: + filtered_collections = [ + c for c in filtered_collections if c.get("id") in header_collections + ] + links = [ {"rel": Relations.root.value, "type": MimeTypes.json, "href": base_url}, {"rel": Relations.parent.value, "type": MimeTypes.json, "href": base_url}, @@ -580,6 +591,12 @@ async def get_collection( NotFoundError: If the collection with the given id cannot be found in the database. """ request = kwargs["request"] + + # Check if collection is allowed by header filter + header_collections = parse_filter_collections(request) + if header_collections is not None and collection_id not in header_collections: + raise HTTPException(status_code=404, detail="Collection not found") + collection = await self.database.find_collection(collection_id=collection_id) return self.collection_serializer.db_to_stac( collection=collection, @@ -665,11 +682,30 @@ async def get_item( Exception: If any error occurs while getting the item from the database. NotFoundError: If the item does not exist in the specified collection. """ - base_url = str(kwargs["request"].base_url) + request = kwargs["request"] + + # Check if collection is allowed by header filter + header_collections = parse_filter_collections(request) + if header_collections is not None and collection_id not in header_collections: + raise HTTPException(status_code=404, detail="Item not found") + + base_url = str(request.base_url) item = await self.database.get_one_item( item_id=item_id, collection_id=collection_id ) - return self.item_serializer.db_to_stac(item, base_url) + stac_item = self.item_serializer.db_to_stac(item, base_url) + + # Check if item geometry intersects with allowed geometry filter + header_geometry = parse_filter_geometry(request) + if header_geometry is not None: + item_geometry = stac_item.get("geometry") + if item_geometry: + from stac_fastapi.core.header_filters import geometry_intersects_filter + + if not geometry_intersects_filter(item_geometry, header_geometry): + raise HTTPException(status_code=404, detail="Item not found") + + return stac_item async def get_search( self, @@ -821,7 +857,14 @@ async def post_search( search=search, item_ids=search_request.ids ) - if search_request.collections: + # Apply collection filter from header or request + header_collections = parse_filter_collections(request) + if header_collections is not None: + # Use header collections (stac-auth-proxy already did intersection) + search = self.database.apply_collections_filter( + search=search, collection_ids=header_collections + ) + elif search_request.collections: search = self.database.apply_collections_filter( search=search, collection_ids=search_request.collections ) @@ -844,6 +887,19 @@ async def post_search( search = self.database.apply_bbox_filter(search=search, bbox=bbox) + # Apply geometry filter from header + header_geometry = parse_filter_geometry(request) + if header_geometry is not None: + from types import SimpleNamespace + + geometry_obj = SimpleNamespace( + type=header_geometry.get("type", ""), + coordinates=header_geometry.get("coordinates", []), + ) + search = self.database.apply_intersects_filter( + search=search, intersects=geometry_obj + ) + if hasattr(search_request, "intersects") and getattr( search_request, "intersects" ): diff --git a/stac_fastapi/core/stac_fastapi/core/header_filters.py b/stac_fastapi/core/stac_fastapi/core/header_filters.py new file mode 100644 index 00000000..f6149bbc --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/header_filters.py @@ -0,0 +1,114 @@ +"""Header-based filtering utilities. + +This module provides functions for parsing filter headers from stac-auth-proxy. +Headers allow stac-auth-proxy to pass collection and geometry filters to sfeos. +""" + +import json +import logging +from typing import Any, Dict, List, Optional + +from fastapi import Request + +logger = logging.getLogger(__name__) + +# Header names +FILTER_COLLECTIONS_HEADER = "X-Filter-Collections" +FILTER_GEOMETRY_HEADER = "X-Filter-Geometry" + + +def parse_filter_collections(request: Request) -> Optional[List[str]]: + """Parse collection filter from X-Filter-Collections header. + + Args: + request: FastAPI Request object. + + Returns: + List of collection IDs if header is present, None otherwise. + Empty list if header value is empty string. + + Example: + Header "X-Filter-Collections: col-a,col-b,col-c" returns ["col-a", "col-b", "col-c"] + """ + header_value = request.headers.get(FILTER_COLLECTIONS_HEADER) + + if header_value is None: + return None + + # Handle empty header value + if not header_value.strip(): + return [] + + # Parse comma-separated list + collections = [c.strip() for c in header_value.split(",") if c.strip()] + logger.debug(f"Parsed filter collections from header: {collections}") + + return collections + + +def parse_filter_geometry(request: Request) -> Optional[Dict[str, Any]]: + """Parse geometry filter from X-Filter-Geometry header. + + Args: + request: FastAPI Request object. + + Returns: + GeoJSON geometry dict if header is present and valid, None otherwise. + + Example: + Header 'X-Filter-Geometry: {"type":"Polygon","coordinates":[...]}' + returns the parsed GeoJSON dict. + """ + header_value = request.headers.get(FILTER_GEOMETRY_HEADER) + + if header_value is None: + return None + + if not header_value.strip(): + return None + + try: + geometry = json.loads(header_value) + logger.debug( + f"Parsed filter geometry from header: {geometry.get('type', 'unknown')}" + ) + return geometry + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse geometry header: {e}") + return None + + +def geometry_intersects_filter( + item_geometry: Dict[str, Any], filter_geometry: Dict[str, Any] +) -> bool: + """Check if item geometry intersects with the filter geometry. + + Args: + item_geometry: GeoJSON geometry dict from the item. + filter_geometry: GeoJSON geometry dict from header filter. + + Returns: + True if geometries intersect (or if shapely not available), False otherwise. + + Note: + Requires shapely to be installed. If shapely is not available, + this function returns True (allows access) to avoid breaking + deployments without shapely. + """ + try: + from shapely.geometry import shape + except ImportError: + logger.warning( + "shapely not installed - geometry filter check skipped. " + "Install shapely for full geometry filtering support." + ) + return True # Allow access if shapely not available + + try: + item_shape = shape(item_geometry) + filter_shape = shape(filter_geometry) + return item_shape.intersects(filter_shape) + except Exception as e: + logger.warning(f"Geometry intersection check failed: {e}") + # On error, allow access (fail open) + return True diff --git a/stac_fastapi/tests/api/test_header_filters.py b/stac_fastapi/tests/api/test_header_filters.py new file mode 100644 index 00000000..6972c2f6 --- /dev/null +++ b/stac_fastapi/tests/api/test_header_filters.py @@ -0,0 +1,226 @@ +"""Tests for header-based filtering functionality. + +This module tests the header filtering feature that allows stac-auth-proxy +to pass allowed collections and geometries via HTTP headers. +""" + +import json + +import pytest +import pytest_asyncio + +from ..conftest import create_collection, create_item, delete_collections_and_items + +# Header names +FILTER_COLLECTIONS_HEADER = "X-Filter-Collections" +FILTER_GEOMETRY_HEADER = "X-Filter-Geometry" + + +@pytest_asyncio.fixture(scope="function") +async def multi_collection_ctx(txn_client, load_test_data): + """Create multiple collections for testing header filtering.""" + await delete_collections_and_items(txn_client) + + # Create test collections + collections = [] + for suffix in ["a", "b", "c"]: + collection = load_test_data("test_collection.json").copy() + collection["id"] = f"test-collection-{suffix}" + await create_collection(txn_client, collection) + collections.append(collection) + + # Create items in each collection + items = [] + for collection in collections: + item = load_test_data("test_item.json").copy() + item["id"] = f"test-item-{collection['id']}" + item["collection"] = collection["id"] + await create_item(txn_client, item) + items.append(item) + + yield {"collections": collections, "items": items} + + await delete_collections_and_items(txn_client) + + +class TestHeaderFilteringSearch: + """Tests for search endpoints with header filtering.""" + + @pytest.mark.asyncio + async def test_search_uses_header_collections( + self, app_client, multi_collection_ctx + ): + """When X-Filter-Collections header is present, search only in those collections.""" + # Search with header limiting to collection-a only + response = await app_client.get( + "/search", + headers={FILTER_COLLECTIONS_HEADER: "test-collection-a"}, + ) + assert response.status_code == 200 + data = response.json() + + # Should only return items from collection-a + for feature in data["features"]: + assert feature["collection"] == "test-collection-a" + + @pytest.mark.asyncio + async def test_search_header_multiple_collections( + self, app_client, multi_collection_ctx + ): + """Header with multiple collections filters to those collections.""" + response = await app_client.get( + "/search", + headers={FILTER_COLLECTIONS_HEADER: "test-collection-a,test-collection-b"}, + ) + assert response.status_code == 200 + data = response.json() + + # Should only return items from collection-a and collection-b + for feature in data["features"]: + assert feature["collection"] in ["test-collection-a", "test-collection-b"] + + @pytest.mark.asyncio + async def test_search_no_header_returns_all(self, app_client, multi_collection_ctx): + """Without header, search returns items from all collections.""" + response = await app_client.get("/search") + assert response.status_code == 200 + data = response.json() + + # Should have items from all collections + collections_in_response = {f["collection"] for f in data["features"]} + assert "test-collection-a" in collections_in_response + assert "test-collection-b" in collections_in_response + assert "test-collection-c" in collections_in_response + + @pytest.mark.asyncio + async def test_post_search_uses_header_collections( + self, app_client, multi_collection_ctx + ): + """POST /search also respects the header.""" + response = await app_client.post( + "/search", + json={}, + headers={FILTER_COLLECTIONS_HEADER: "test-collection-b"}, + ) + assert response.status_code == 200 + data = response.json() + + for feature in data["features"]: + assert feature["collection"] == "test-collection-b" + + +class TestHeaderFilteringCollections: + """Tests for collections endpoint with header filtering.""" + + @pytest.mark.asyncio + async def test_all_collections_filtered_by_header( + self, app_client, multi_collection_ctx + ): + """GET /collections only returns collections from header.""" + response = await app_client.get( + "/collections", + headers={FILTER_COLLECTIONS_HEADER: "test-collection-a,test-collection-c"}, + ) + assert response.status_code == 200 + data = response.json() + + collection_ids = [c["id"] for c in data["collections"]] + assert "test-collection-a" in collection_ids + assert "test-collection-c" in collection_ids + assert "test-collection-b" not in collection_ids + + @pytest.mark.asyncio + async def test_get_collection_allowed_by_header( + self, app_client, multi_collection_ctx + ): + """GET /collections/{id} works when collection is in header.""" + response = await app_client.get( + "/collections/test-collection-a", + headers={FILTER_COLLECTIONS_HEADER: "test-collection-a,test-collection-b"}, + ) + assert response.status_code == 200 + assert response.json()["id"] == "test-collection-a" + + @pytest.mark.asyncio + async def test_get_collection_no_header_allowed( + self, app_client, multi_collection_ctx + ): + """GET /collections/{id} works without header.""" + response = await app_client.get("/collections/test-collection-a") + assert response.status_code == 200 + assert response.json()["id"] == "test-collection-a" + + +class TestHeaderFilteringItems: + """Tests for item endpoints with header filtering.""" + + @pytest.mark.asyncio + async def test_item_collection_uses_header(self, app_client, multi_collection_ctx): + """GET /collections/{id}/items respects header.""" + response = await app_client.get( + "/collections/test-collection-a/items", + headers={FILTER_COLLECTIONS_HEADER: "test-collection-a"}, + ) + assert response.status_code == 200 + + @pytest.mark.asyncio + async def test_get_item_with_header(self, app_client, multi_collection_ctx): + """GET /collections/{id}/items/{item_id} works with header.""" + response = await app_client.get( + "/collections/test-collection-a/items/test-item-test-collection-a", + headers={FILTER_COLLECTIONS_HEADER: "test-collection-a"}, + ) + assert response.status_code == 200 + + +class TestGeometryHeaderFiltering: + """Tests for geometry header filtering.""" + + @pytest.mark.asyncio + async def test_search_with_geometry_header(self, app_client, ctx): + """Search respects X-Filter-Geometry header.""" + # Geometry that intersects with test item + geometry = { + "type": "Polygon", + "coordinates": [ + [ + [149.0, -34.5], + [149.0, -32.0], + [151.5, -32.0], + [151.5, -34.5], + [149.0, -34.5], + ] + ], + } + + response = await app_client.get( + "/search", + headers={FILTER_GEOMETRY_HEADER: json.dumps(geometry)}, + ) + assert response.status_code == 200 + # Items should be filtered by geometry + + @pytest.mark.asyncio + async def test_search_with_non_intersecting_geometry(self, app_client, ctx): + """Search with non-intersecting geometry returns no items.""" + # Geometry that doesn't intersect with test item + geometry = { + "type": "Polygon", + "coordinates": [ + [ + [0.0, 0.0], + [0.0, 1.0], + [1.0, 1.0], + [1.0, 0.0], + [0.0, 0.0], + ] + ], + } + + response = await app_client.get( + "/search", + headers={FILTER_GEOMETRY_HEADER: json.dumps(geometry)}, + ) + assert response.status_code == 200 + data = response.json() + assert len(data["features"]) == 0