From c6002f172da54f3b8745c0851c9299620459ad38 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 17:08:28 -0400 Subject: [PATCH 01/17] Add design spec for purpose-based data model Co-Authored-By: Claude Opus 4.6 --- ...6-03-11-purpose-based-data-model-design.md | 517 ++++++++++++++++++ 1 file changed, 517 insertions(+) create mode 100644 docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md diff --git a/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md b/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md new file mode 100644 index 00000000000..55117a72a3a --- /dev/null +++ b/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md @@ -0,0 +1,517 @@ +# Purpose-Based Data Model Design Spec + +**Date:** 2026-03-11 +**PRD:** [PRD: Purpose-Based Data Model](https://ethyca.atlassian.net/wiki/spaces/PM/pages/4457660423/PRD+Purpose-Based+Data+Model) +**Scope:** Full roadmap architecture, Phase 1 implementation + +--- + +## Summary + +Introduce a purpose-based data model built on four new first-class entities (Data Purpose, Data Consumer, Data Producer, extended Dataset) that decouple data governance from the system-centric model. Phase 1 delivers schema, models, and CRUD APIs. Later phases add migration from existing PrivacyDeclarations, dual-write, downstream feature migration, and deprecation of the old model. + +## Key Decisions + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| OSS/Paid split | Models + migrations in fides, routes + services in fidesplus | Consistent with existing architecture | +| Data migration | Deferred to Phase 2 | Reduce Phase 1 risk; schema designed to support it | +| DataConsumer for systems | Facade over `ctl_systems`, no new table row | Avoid sync overhead; systems stay in `ctl_systems` | +| DataConsumer for groups/projects | New `data_consumer` table | Non-system types need their own persistence | +| DataPurpose identity | `FidesBase` (fides_key), flat | Taxonomy citizen without hierarchy complexity | +| Dataset purposes | Extend JSON schema with soft references at all levels | Consistent with existing collection/field JSON pattern | +| Dataset purpose routes | No new routes; purposes are part of existing dataset payload | Avoids over-granular API surface | +| DataProducer | Full CRUD in Phase 1 | Low coupling, straightforward to build | +| Consumer-Purpose join | Audited (created_at, updated_at, assigned_by) | Avoids future migration on a potentially large table | +| Approach | Split tables, unified API schema (Approach C) | No sync burden for systems; clean relational model per type; repository layer abstraction available later | + +--- + +## Data Model + +### New Tables (fides OSS) + +#### `data_purpose` + +Replaces PrivacyDeclaration as a standalone, reusable entity. Inherits from `FidesBase` for `fides_key`, `name`, `description`. No parent key / hierarchy. + +| Column | Type | Constraints | +|--------|------|-------------| +| `id` | String (UUID) | PK | +| `fides_key` | String | Unique, Not Null, Indexed | +| `name` | String | Not Null | +| `description` | String | Nullable | +| `data_use` | String | Not Null, Indexed | +| `data_subject` | String | Nullable | +| `data_categories` | ARRAY(String) | server_default `{}` | +| `legal_basis_for_processing` | String | Nullable | +| `flexible_legal_basis_for_processing` | Boolean | server_default `true` | +| `special_category_legal_basis` | String | Nullable | +| `impact_assessment_location` | String | Nullable | +| `retention_period` | String | Nullable | +| `features` | ARRAY(String) | server_default `{}` | +| `created_at` | DateTime(tz) | Auto | +| `updated_at` | DateTime(tz) | Auto | + +Design notes: +- One data use per purpose (deliberate constraint per PRD) +- At most one data subject (0..1 for MVP) +- Data categories are optional (act as allowlist when specified) + +#### `data_consumer` + +Stores non-system consumers (groups, projects, custom types). System-type consumers are surfaced via a facade over `ctl_systems`. + +| Column | Type | Constraints | +|--------|------|-------------| +| `id` | String (UUID) | PK | +| `name` | String | Not Null | +| `description` | String | Nullable | +| `type` | String | Not Null, Indexed, CHECK `type != 'system'` | +| `external_id` | String | Nullable | +| `egress` | JSON | Nullable | +| `ingress` | JSON | Nullable | +| `data_shared_with_third_parties` | Boolean | server_default `false` | +| `third_parties` | String | Nullable | +| `shared_categories` | ARRAY(String) | server_default `{}` | +| `contact_email` | String | Nullable | +| `contact_slack_channel` | String | Nullable | +| `contact_details` | JSON | Nullable | +| `tags` | ARRAY(String) | server_default `{}` | +| `created_at` | DateTime(tz) | Auto | +| `updated_at` | DateTime(tz) | Auto | + +Type extensibility: `type` is a string, not a DB enum. Seed values: `group`, `project`. Customers register additional types via API. The CHECK constraint prevents `system` type rows (those go through the facade). + +#### `data_consumer_purpose` + +Join table: non-system consumer to purpose. Audited. + +| Column | Type | Constraints | +|--------|------|-------------| +| `id` | String (UUID) | PK | +| `data_consumer_id` | String (FK) | Not Null, references `data_consumer.id` | +| `data_purpose_id` | String (FK) | Not Null, references `data_purpose.id` | +| `assigned_by` | String (FK) | Nullable, references `fidesuser.id` | +| `created_at` | DateTime(tz) | Auto | +| `updated_at` | DateTime(tz) | Auto | + +Unique constraint on `(data_consumer_id, data_purpose_id)`. + +#### `system_purpose` + +Join table: system to purpose (via facade). Identical schema to `data_consumer_purpose` for future abstraction. + +| Column | Type | Constraints | +|--------|------|-------------| +| `id` | String (UUID) | PK | +| `system_id` | String (FK) | Not Null, references `ctl_systems.id` | +| `data_purpose_id` | String (FK) | Not Null, references `data_purpose.id` | +| `assigned_by` | String (FK) | Nullable, references `fidesuser.id` | +| `created_at` | DateTime(tz) | Auto | +| `updated_at` | DateTime(tz) | Auto | + +Unique constraint on `(system_id, data_purpose_id)`. + +#### `data_producer` + +Lightweight entity representing people/teams responsible for data registration and purpose assignment. + +| Column | Type | Constraints | +|--------|------|-------------| +| `id` | String (UUID) | PK | +| `name` | String | Not Null | +| `description` | String | Nullable | +| `external_id` | String | Nullable | +| `monitor_id` | String (FK) | Nullable, references `monitorconfig.id` | +| `contact_email` | String | Nullable | +| `contact_slack_channel` | String | Nullable | +| `contact_details` | JSON | Nullable | +| `created_at` | DateTime(tz) | Auto | +| `updated_at` | DateTime(tz) | Auto | + +#### `data_producer_member` + +Join table: producer to user. + +| Column | Type | Constraints | +|--------|------|-------------| +| `id` | String (UUID) | PK | +| `data_producer_id` | String (FK) | Not Null, references `data_producer.id` | +| `user_id` | String (FK) | Not Null, references `fidesuser.id` | +| `created_at` | DateTime(tz) | Auto | +| `updated_at` | DateTime(tz) | Auto | + +Unique constraint on `(data_producer_id, user_id)`. + +#### `dataset_purpose` + +Join table: dataset to purpose. Audited. + +| Column | Type | Constraints | +|--------|------|-------------| +| `id` | String (UUID) | PK | +| `dataset_id` | String (FK) | Not Null, references `ctl_datasets.id` | +| `data_purpose_id` | String (FK) | Not Null, references `data_purpose.id` | +| `assigned_by` | String (FK) | Nullable, references `fidesuser.id` | +| `created_at` | DateTime(tz) | Auto | +| `updated_at` | DateTime(tz) | Auto | + +Unique constraint on `(dataset_id, data_purpose_id)`. + +### Extended Existing Tables + +#### `ctl_datasets` + +New column: + +| Column | Type | Constraints | +|--------|------|-------------| +| `data_producer_id` | String (FK) | Nullable, references `data_producer.id` | + +#### Collection/Field JSON Schema Extension + +The existing JSON blob for collections, fields, and sub-fields gains an optional `data_purposes` array at every level: + +```yaml +dataset: + fides_key: customer_analytics_db + data_purposes: ["customer_marketing"] # dataset level (join table) + collections: + - name: user_profiles + data_purposes: ["personalization"] # collection level (JSON) + fields: + - name: preferences + data_purposes: ["recommendation"] # field level (JSON) + fields: + - name: topics + data_purposes: ["content_curation"] # sub-field level (JSON) +``` + +Purposes are soft references (`fides_key` strings). Validated on write against the `data_purpose` table. + +**Additive inheritance:** Effective purposes at any level = own purposes + all ancestor purposes. No override or exclusion mechanism. + +### Entity Relationship Diagram + +``` + +-----------------+ + | data_purpose | + | (FidesBase) | + | - fides_key | + | - data_use | + | - data_subject | + +--------+--------+ + | + +-----------+------+--------+--------+----------+ + | | | | | + system_purpose | data_consumer_purpose | dataset_purpose + (audited join) | (audited join) | (audited join) + | | | | | + +------+------+ | +------+-------+ | +------+------+ + | ctl_systems | | | data_consumer | | | ctl_datasets| + | (existing) | | | (group/proj) | | | (extended) | + +-------------+ | +--------------+ | +------+------+ + | | | + | | data_producer_id FK + | | | + | | +------+-------+ + | | | data_producer | + | | +------+-------+ + | | | + | | data_producer_member + | | (join table) + | | | + | | +------+------+ + | | | fidesuser | + | | +-------------+ + | | + Collection/Field JSON | + (soft data_purposes refs) ------+ +``` + +--- + +## Service Layer Architecture + +All services live in **fidesplus**. Models and migrations in **fides OSS**. + +### DataPurposeService + +- Standard CRUD on the `data_purpose` table +- Validates `data_use` references exist in the `DataUse` taxonomy +- Validates `data_subject` references exist in the `DataSubject` taxonomy +- Validates `fides_key` uniqueness +- On delete: checks for referencing consumers (both `system_purpose` and `data_consumer_purpose`), datasets (`dataset_purpose` + collection JSON), and blocks or cascades based on policy + +### DataConsumerService + +The core facade. Two internal code paths, one external interface. + +**Read path:** +- `get(id, type)`: if `type=system`, queries `ctl_systems` + `system_purpose`; otherwise queries `data_consumer` + `data_consumer_purpose` +- `list(filters)`: queries both sources, merges into unified `DataConsumerResponse` list. Supports filtering by type, purpose, tags. +- For system-type consumers, hydrates purpose associations from `system_purpose` and maps System fields into the `DataConsumerResponse` schema + +**Write path (system):** +- Purpose assignment/removal only. Writes to `system_purpose` join table. Does not modify `ctl_systems` directly. +- System creation/update continues through existing System endpoints. + +**Write path (group/project):** +- Full CRUD on `data_consumer` table +- Purpose assignment writes to `data_consumer_purpose` + +**Listing across types:** +- For paginated list endpoints, query both sources with matching filters, merge in-memory, sort, paginate. Acceptable for Phase 1 volumes. + +### DataProducerService + +- CRUD on `data_producer` table +- Member management: add/remove users via `data_producer_member` join table +- Dataset assignment: set/clear `data_producer_id` FK on `ctl_datasets` +- Optional monitor link: validate `monitor_id` references a valid `MonitorConfig` + +### Dataset Purpose Handling + +Not a separate service. Integrated into the existing dataset write path: +- On dataset create/update, if `data_purposes` is present at dataset level, validate and persist via `dataset_purpose` join table +- If `data_purposes` is present at collection/field/sub-field levels in the JSON, validate all `fides_key` references and persist in the JSON blob +- On dataset read, include `data_purposes` at each level as stored + +### Repository Layer + +For Phase 1, services interact with models directly (SQLAlchemy session). The two-path logic in `DataConsumerService` is contained within the service methods. + +Future refactor path (when complexity warrants it): +- `SystemConsumerRepository`: reads/writes `ctl_systems` + `system_purpose` +- `NonSystemConsumerRepository`: reads/writes `data_consumer` + `data_consumer_purpose` +- Both implement a shared `ConsumerRepositoryProtocol` + +### Cross-Cutting Concerns + +**Audit logging:** All mutations fire audit events. For system-type consumers, purpose changes log against the system. For non-system consumers, use the generic audit log. + +**Permissions:** New scope prefixes: +- `data_purpose:read`, `data_purpose:create`, `data_purpose:update`, `data_purpose:delete` +- `data_consumer:read`, `data_consumer:create`, `data_consumer:update`, `data_consumer:delete` +- `data_producer:read`, `data_producer:create`, `data_producer:update`, `data_producer:delete` + +System-type consumer purpose assignment requires `system:update` scope. + +--- + +## API Routes + +All routes in **fidesplus**, under `/api/v1/`. + +### Data Purpose + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/data-purpose` | Create a new Data Purpose | +| `GET` | `/data-purpose` | List purposes (paginated, filterable by `data_use`, `data_subject`) | +| `GET` | `/data-purpose/{fides_key}` | Get a single purpose | +| `PUT` | `/data-purpose/{fides_key}` | Update a purpose | +| `DELETE` | `/data-purpose/{fides_key}` | Delete (blocked if in use, unless `?force=true`) | + +### Data Consumer + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/data-consumer` | List all consumers (unified). Filters: `type`, `purpose_fides_key`, `tags` | +| `GET` | `/data-consumer/{id}` | Get a single consumer. For `type=system`, `id` is the system `id`. | +| `POST` | `/data-consumer` | Create a non-system consumer. Returns 400 if `type=system`. | +| `PUT` | `/data-consumer/{id}` | Update a non-system consumer. Returns 400 for system-type. | +| `DELETE` | `/data-consumer/{id}` | Delete a non-system consumer. System-type cannot be deleted here. | + +**Purpose assignment (works for all types):** + +| Method | Path | Description | +|--------|------|-------------| +| `PUT` | `/data-consumer/{id}/purpose` | Set the full list of purposes (replace semantics) | +| `POST` | `/data-consumer/{id}/purpose/{fides_key}` | Add a single purpose | +| `DELETE` | `/data-consumer/{id}/purpose/{fides_key}` | Remove a single purpose | + +Purpose assignment routes write to `system_purpose` or `data_consumer_purpose` based on type, transparently. + +### Data Producer + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/data-producer` | Create a producer | +| `GET` | `/data-producer` | List producers (paginated) | +| `GET` | `/data-producer/{id}` | Get a single producer | +| `PUT` | `/data-producer/{id}` | Update a producer | +| `DELETE` | `/data-producer/{id}` | Delete (nullifies `data_producer_id` on datasets) | + +**Member management:** + +| Method | Path | Description | +|--------|------|-------------| +| `PUT` | `/data-producer/{id}/member` | Set the full member list (replace semantics) | +| `POST` | `/data-producer/{id}/member/{user_id}` | Add a member | +| `DELETE` | `/data-producer/{id}/member/{user_id}` | Remove a member | + +### Dataset (existing endpoints, extended payload) + +No new routes. Existing `POST /dataset` and `PUT /dataset/{fides_key}` accept: +- `data_purposes: string[]` at dataset level +- `data_purposes: string[]` within each collection, field, and sub-field in the JSON blob +- `data_producer_id: string | None` + +Existing `GET /dataset/{fides_key}` response extended with these fields. + +### Unified Response Schema + +```python +class DataConsumerResponse(BaseModel): + id: str + name: str + description: str | None + type: str # "system", "group", "project", custom + external_id: str | None + purposes: list[DataPurposeResponse] + # System-type only (from ctl_systems): + system_fides_key: str | None + vendor_id: str | None + # Non-system only: + egress: dict | None + ingress: dict | None + tags: list[str] + contact_email: str | None + contact_slack_channel: str | None + contact_details: dict | None + created_at: datetime + updated_at: datetime +``` + +System-specific fields (`cookie_max_age_seconds`, etc.) are not duplicated. Clients needing full system detail use existing system endpoints via `system_fides_key`. + +--- + +## Migration & Backward Compatibility + +### Alembic Migration (Phase 1) + +Single migration file. Table creation order (respects FK dependencies): + +1. `data_purpose` (no FKs to new tables) +2. `data_producer` (FK to `monitorconfig`) +3. `data_consumer` (no FKs to new tables) +4. `data_consumer_purpose` (FKs to `data_consumer` + `data_purpose`) +5. `system_purpose` (FKs to `ctl_systems` + `data_purpose`) +6. `data_producer_member` (FKs to `data_producer` + `fidesuser`) +7. `dataset_purpose` (FKs to `ctl_datasets` + `data_purpose`) +8. ALTER `ctl_datasets`: add `data_producer_id` FK column + +Migration conventions: +- All new nullable columns use `nullable=True` +- Array columns use `server_default="{}"` +- Boolean columns use `server_default` with explicit values +- Partial indexes on `system_purpose(system_id)` and `data_consumer_purpose(data_consumer_id)` +- Index on `data_purpose(data_use)` and `data_purpose(fides_key)` + +Downgrade: drop tables in reverse order, remove `data_producer_id` from `ctl_datasets`. + +### Backward Compatibility Guarantees + +- `ctl_systems` table untouched (no column adds/removes/renames) +- `privacydeclaration` table untouched +- All existing System and PrivacyDeclaration API endpoints continue to work identically +- `systemmanager` join table continues to function +- DSR traversal path (System > ConnectionConfig > DatasetConfig) unchanged +- Existing dataset payloads without `data_purposes` or `data_producer_id` continue to work +- Collection/field JSON blobs without `data_purposes` are valid (absence = empty list) +- No existing API response shapes change; new fields are additive only + +### Feature Flag + +fidesplus setting: `purpose_based_model_enabled: bool = False` + +- When `False`: new endpoints return 404, dataset purpose fields are ignored on write, stripped on read +- When `True`: full functionality available +- Allows deployment of the migration without exposing the feature until ready + +--- + +## Phased Roadmap + +### Phase 1: Schema + CRUD (implementation scope) + +**fides OSS:** +- Alembic migration: all new tables, `data_producer_id` FK on `ctl_datasets` +- SQLAlchemy models for all new tables +- Extend dataset/collection/field Pydantic schemas with optional `data_purposes` + +**fidesplus:** +- `DataPurposeService` + CRUD routes +- `DataConsumerService` (facade) + CRUD routes + purpose assignment routes +- `DataProducerService` + CRUD routes + member management routes +- Dataset service extension: validate and persist purpose references on dataset write + +**Not included:** Data migration from PrivacyDeclarations, dual-write, downstream feature updates. + +### Phase 2: Dual-Write Bridge + +- System endpoint writes (create/update privacy declarations) intercepted to mirror into `system_purpose` +- Feature flag controls whether reads come from old model or new model +- Data migration script: backfill `data_purpose` rows from existing PrivacyDeclarations, create `system_purpose` associations +- Deduplication strategy decided at migration time + +### Phase 3: Downstream Feature Migration + +| Feature | Migration | +|---------|-----------| +| **Policy Evaluation** | Read purposes from `system_purpose` / `data_consumer_purpose` instead of `privacydeclaration` | +| **Datamap / Data Inventory** | Render DataConsumers (all types) with their purposes. System-type includes system metadata via facade. | +| **Consent / TCF** | `TCFPurposeOverride` references `data_purpose.fides_key`. Flexibility evaluated at purpose level. | +| **Privacy Requests (DSR)** | No change. Connection configs remain on System. DataConsumer(system) > System > ConnectionConfig unchanged. | +| **Discovery / Classification** | Classification proposes DataPurpose assignments. DataProducer members review. | +| **PBAC** | Built natively: compare DataConsumer purposes against Dataset effective purposes. | + +### Phase 4: Deprecation + +- PrivacyDeclaration endpoints marked deprecated +- Dual-write removed, old model becomes read-only +- Eventually: drop `privacydeclaration` table, remove bridge code + +--- + +## Testing Strategy + +### Unit Tests (fides OSS) + +- **Model tests:** CRUD on each new model, constraint validation (unique fides_key, CHECK constraint on `data_consumer.type`, unique join table entries) +- **Schema tests:** Pydantic validation for DataPurpose, DataConsumer, DataProducer schemas. Dataset schema extension with `data_purposes` at all levels. +- **Additive inheritance:** Compute effective purposes across dataset > collection > field > sub-field hierarchy + +### Integration Tests (fidesplus) + +- **DataPurposeService:** Create/read/update/delete. Validate `data_use` and `data_subject` references. Delete blocked when purpose is in use. +- **DataConsumerService facade:** + - List returns both system-type and non-system consumers in unified schema + - Purpose assignment to system-type writes to `system_purpose` + - Purpose assignment to group/project writes to `data_consumer_purpose` + - Create with `type=system` is rejected + - Get by ID resolves system-type from `ctl_systems` +- **DataProducerService:** CRUD, member add/remove, dataset assignment +- **Dataset purposes:** Full dataset payload with purposes at multiple levels, round-trip read/write, purpose validation failures + +### API Tests (fidesplus) + +- Route-level tests for each endpoint: auth/permissions, request validation, response shape +- Feature flag off: endpoints return 404 +- Cross-type consumer listing with pagination and filters + +--- + +## Open Questions (from PRD) + +| # | Question | Status | +|---|----------|--------| +| 1 | Should `data_subject` on Data Purpose be required or optional? | **Decided: Optional (0..1) for MVP** | +| 2 | Deduplication during migration? | **Deferred to Phase 2** | +| 3 | Should Consumer-Purpose join carry metadata? | **Decided: Yes (created_at, updated_at, assigned_by)** | +| 4 | How does `flexible_legal_basis_for_processing` map to new model? | **Deferred to Phase 3 (TCF migration)** | +| 5 | Data Producers assigned to collections or dataset only? | **Decided: Dataset level only (FK on ctl_datasets)** | +| 6 | Naming convention for auto-generated `fides_key` during migration? | **Deferred to Phase 2** | From 715495370eebdb4a83b47e67bbc41e4c33ebf2dd Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 17:17:30 -0400 Subject: [PATCH 02/17] Address spec review findings for purpose-based data model Clarify FidesBase inheritance pattern, add cascade/delete behavior, fix response schema conventions, document facade coercion, add comprehensive index specifications, and resolve GET-by-ID ambiguity. Co-Authored-By: Claude Opus 4.6 --- ...6-03-11-purpose-based-data-model-design.md | 96 +++++++++++++------ 1 file changed, 69 insertions(+), 27 deletions(-) diff --git a/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md b/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md index 55117a72a3a..61958419261 100644 --- a/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md +++ b/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md @@ -33,14 +33,16 @@ Introduce a purpose-based data model built on four new first-class entities (Dat #### `data_purpose` -Replaces PrivacyDeclaration as a standalone, reusable entity. Inherits from `FidesBase` for `fides_key`, `name`, `description`. No parent key / hierarchy. +Replaces PrivacyDeclaration as a standalone, reusable entity. Inherits from both `Base` and `FidesBase` (like `System` and `Dataset`), giving it `id` (UUID PK from `Base`) plus `fides_key`, `name`, `description`, `organization_fides_key`, and `tags` (from `FidesBase`). No parent key / hierarchy. `fides_key` is used as the unique PK in `FidesBase`, but `id` is the actual PK used by join table FKs. This matches the existing dual-key pattern used by `System`, `Dataset`, etc. | Column | Type | Constraints | |--------|------|-------------| -| `id` | String (UUID) | PK | -| `fides_key` | String | Unique, Not Null, Indexed | -| `name` | String | Not Null | -| `description` | String | Nullable | +| `id` | String (UUID) | PK (from `Base`) | +| `fides_key` | String | Unique, Not Null, Indexed (from `FidesBase`) | +| `name` | String | Not Null (from `FidesBase`) | +| `description` | String | Nullable (from `FidesBase`) | +| `organization_fides_key` | String | Nullable (from `FidesBase`, default "default_organization") | +| `tags` | ARRAY(String) | Nullable (from `FidesBase`) | | `data_use` | String | Not Null, Indexed | | `data_subject` | String | Nullable | | `data_categories` | ARRAY(String) | server_default `{}` | @@ -55,8 +57,18 @@ Replaces PrivacyDeclaration as a standalone, reusable entity. Inherits from `Fid Design notes: - One data use per purpose (deliberate constraint per PRD) -- At most one data subject (0..1 for MVP) +- At most one data subject (0..1 for MVP). Note: existing `PrivacyDeclaration.data_subjects` is `ARRAY(String)`. Declarations with multiple subjects will need to be split into multiple purposes during Phase 2 migration. - Data categories are optional (act as allowlist when specified) +- `processes_special_category_data` from PrivacyDeclaration is intentionally omitted (derived: present when `special_category_legal_basis` is set) +- All join tables reference `data_purpose.id` (UUID), not `fides_key`. API routes use `fides_key` as the URL identifier; services perform the `fides_key` to `id` lookup internally. +- `flexible_legal_basis_for_processing` and `features` should be `NOT NULL` (matching PrivacyDeclaration pattern), with their `server_default` values. +- All models require explicit `__tablename__` overrides (e.g., `__tablename__ = "data_purpose"`) since the auto-generated names from class names would produce `datapurpose`, `dataconsumer`, etc. + +**Facade field coercion (system-type consumers):** When mapping `ctl_systems` rows into `DataConsumerResponse`, the service must: +- Set `type` to `"system"` (hardcoded) +- Coalesce `tags` from `None` to `[]` (System's `tags` column has no server default) +- Map `egress`/`ingress` from System's JSON columns to `Optional[dict]` +- Populate `data_shared_with_third_parties`, `third_parties`, `shared_categories` from the system's privacy declarations where available #### `data_consumer` @@ -81,7 +93,9 @@ Stores non-system consumers (groups, projects, custom types). System-type consum | `created_at` | DateTime(tz) | Auto | | `updated_at` | DateTime(tz) | Auto | -Type extensibility: `type` is a string, not a DB enum. Seed values: `group`, `project`. Customers register additional types via API. The CHECK constraint prevents `system` type rows (those go through the facade). +Type extensibility: `type` is a free-form string, not a DB enum. There is no type registry table. Seed values (`group`, `project`) are documented conventions, not enforced. Customers can use any string value except `system`. The CHECK constraint prevents `system` type rows (those go through the facade). + +Note: `data_consumer` has no `fides_key`. Non-system consumers are identified by opaque UUID `id` only. System-type consumers exposed via the facade have a `system_fides_key` available in the response schema (from `ctl_systems.fides_key`). This is a deliberate difference: purposes are taxonomy-like (hence `fides_key`), consumers are not. #### `data_consumer_purpose` @@ -123,7 +137,7 @@ Lightweight entity representing people/teams responsible for data registration a | `name` | String | Not Null | | `description` | String | Nullable | | `external_id` | String | Nullable | -| `monitor_id` | String (FK) | Nullable, references `monitorconfig.id` | +| `monitor_id` | String (FK) | Nullable, references `monitorconfig.id` (UUID, not the `key` field) | | `contact_email` | String | Nullable | | `contact_slack_channel` | String | Nullable | | `contact_details` | JSON | Nullable | @@ -242,7 +256,7 @@ All services live in **fidesplus**. Models and migrations in **fides OSS**. - Validates `data_use` references exist in the `DataUse` taxonomy - Validates `data_subject` references exist in the `DataSubject` taxonomy - Validates `fides_key` uniqueness -- On delete: checks for referencing consumers (both `system_purpose` and `data_consumer_purpose`), datasets (`dataset_purpose` + collection JSON), and blocks or cascades based on policy +- On delete: blocked by DB-level ON DELETE RESTRICT if the purpose is referenced by any `system_purpose`, `data_consumer_purpose`, or `dataset_purpose` rows. The `?force=true` query param bypasses this by first removing all join table references, then deleting the purpose. Collection-level JSON soft references are not FK-enforced; the service scans and warns about orphaned references but does not block on them. ### DataConsumerService @@ -319,7 +333,7 @@ All routes in **fidesplus**, under `/api/v1/`. | Method | Path | Description | |--------|------|-------------| | `GET` | `/data-consumer` | List all consumers (unified). Filters: `type`, `purpose_fides_key`, `tags` | -| `GET` | `/data-consumer/{id}` | Get a single consumer. For `type=system`, `id` is the system `id`. | +| `GET` | `/data-consumer/{id}` | Get a single consumer. Requires `?type=system` query param for system lookups (uses system `id`). Without `type` param, looks up in `data_consumer` table only. | | `POST` | `/data-consumer` | Create a non-system consumer. Returns 400 if `type=system`. | | `PUT` | `/data-consumer/{id}` | Update a non-system consumer. Returns 400 for system-type. | | `DELETE` | `/data-consumer/{id}` | Delete a non-system consumer. System-type cannot be deleted here. | @@ -332,7 +346,7 @@ All routes in **fidesplus**, under `/api/v1/`. | `POST` | `/data-consumer/{id}/purpose/{fides_key}` | Add a single purpose | | `DELETE` | `/data-consumer/{id}/purpose/{fides_key}` | Remove a single purpose | -Purpose assignment routes write to `system_purpose` or `data_consumer_purpose` based on type, transparently. +Purpose assignment routes also require `?type=system` query param when operating on a system consumer. Without the param, the `{id}` is looked up in the `data_consumer` table only. This matches the GET-by-ID resolution strategy. ### Data Producer @@ -367,25 +381,31 @@ Existing `GET /dataset/{fides_key}` response extended with these fields. class DataConsumerResponse(BaseModel): id: str name: str - description: str | None - type: str # "system", "group", "project", custom - external_id: str | None - purposes: list[DataPurposeResponse] + description: Optional[str] + type: str # "system", "group", "project", custom + external_id: Optional[str] + purposes: List[DataPurposeResponse] # System-type only (from ctl_systems): - system_fides_key: str | None - vendor_id: str | None - # Non-system only: - egress: dict | None - ingress: dict | None - tags: list[str] - contact_email: str | None - contact_slack_channel: str | None - contact_details: dict | None + system_fides_key: Optional[str] + vendor_id: Optional[str] + # All types (from data_consumer table or ctl_systems+privacydeclaration): + egress: Optional[dict] + ingress: Optional[dict] + data_shared_with_third_parties: Optional[bool] + third_parties: Optional[str] + shared_categories: Optional[List[str]] + tags: List[str] + contact_email: Optional[str] + contact_slack_channel: Optional[str] + contact_details: Optional[dict] created_at: datetime updated_at: datetime ``` -System-specific fields (`cookie_max_age_seconds`, etc.) are not duplicated. Clients needing full system detail use existing system endpoints via `system_fides_key`. +Notes: +- Uses `Optional[X]` / `List[X]` (not `X | None` / `list[X]`) to match existing codebase Pydantic conventions. +- `data_shared_with_third_parties`, `third_parties`, `shared_categories` are included for all types. For system-type consumers, these are populated from `ctl_systems`/`privacydeclaration` data where available. +- System-specific fields (`cookie_max_age_seconds`, etc.) are not duplicated. Clients needing full system detail use existing system endpoints via `system_fides_key`. --- @@ -408,8 +428,30 @@ Migration conventions: - All new nullable columns use `nullable=True` - Array columns use `server_default="{}"` - Boolean columns use `server_default` with explicit values -- Partial indexes on `system_purpose(system_id)` and `data_consumer_purpose(data_consumer_id)` -- Index on `data_purpose(data_use)` and `data_purpose(fides_key)` + +Indexes: +- `data_purpose(fides_key)` — unique index (lookups by fides_key) +- `data_purpose(data_use)` — for filtering by data use +- `data_consumer(type)` — for filtering by type +- `system_purpose(system_id)` — for hydrating system consumers +- `system_purpose(data_purpose_id)` — for "find all consumers for a purpose" queries +- `data_consumer_purpose(data_consumer_id)` — for hydrating non-system consumers +- `data_consumer_purpose(data_purpose_id)` — for "find all consumers for a purpose" queries +- `dataset_purpose(dataset_id)` — for hydrating dataset purposes +- `dataset_purpose(data_purpose_id)` — for "find all datasets for a purpose" queries +- `data_producer_member(data_producer_id)` — for listing producer members +- `data_producer_member(user_id)` — for finding a user's producer memberships + +FK cascade/delete behavior: +- `system_purpose.system_id` ON DELETE CASCADE (matches existing `privacydeclaration` cascade-on-system-delete behavior) +- `system_purpose.data_purpose_id` ON DELETE RESTRICT (prevent deleting a purpose that's in use) +- `data_consumer_purpose.data_consumer_id` ON DELETE CASCADE (deleting a consumer removes its purpose links) +- `data_consumer_purpose.data_purpose_id` ON DELETE RESTRICT (prevent deleting a purpose that's in use) +- `dataset_purpose.dataset_id` ON DELETE CASCADE (deleting a dataset removes its purpose links) +- `dataset_purpose.data_purpose_id` ON DELETE RESTRICT (prevent deleting a purpose that's in use) +- `data_producer_member.data_producer_id` ON DELETE CASCADE (deleting a producer removes its member links) +- `data_producer_member.user_id` ON DELETE CASCADE (deleting a user removes their producer memberships) +- `ctl_datasets.data_producer_id` ON DELETE SET NULL (deleting a producer nullifies the FK on datasets) Downgrade: drop tables in reverse order, remove `data_producer_id` from `ctl_datasets`. From 21d625ff01f10e5157e85cf0cad6fca78c419262 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 17:48:07 -0400 Subject: [PATCH 03/17] Remove dataset_purpose join table, use column + soft refs instead Dataset-level purposes stored as ARRAY(String) column on ctl_datasets. Collection/field/sub-field purposes remain as soft refs in JSON. All dataset purpose references are fides_key strings validated on write. No join table needed for dataset-to-purpose relationships. Co-Authored-By: Claude Opus 4.6 --- ...6-03-11-purpose-based-data-model-design.md | 98 +++++++++---------- 1 file changed, 44 insertions(+), 54 deletions(-) diff --git a/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md b/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md index 61958419261..d914066a763 100644 --- a/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md +++ b/docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md @@ -60,7 +60,7 @@ Design notes: - At most one data subject (0..1 for MVP). Note: existing `PrivacyDeclaration.data_subjects` is `ARRAY(String)`. Declarations with multiple subjects will need to be split into multiple purposes during Phase 2 migration. - Data categories are optional (act as allowlist when specified) - `processes_special_category_data` from PrivacyDeclaration is intentionally omitted (derived: present when `special_category_legal_basis` is set) -- All join tables reference `data_purpose.id` (UUID), not `fides_key`. API routes use `fides_key` as the URL identifier; services perform the `fides_key` to `id` lookup internally. +- Consumer join tables (`system_purpose`, `data_consumer_purpose`) reference `data_purpose.id` (UUID), not `fides_key`. Dataset purposes are soft references by `fides_key` string. API routes use `fides_key` as the URL identifier; services perform the `fides_key` to `id` lookup internally for join table writes. - `flexible_legal_basis_for_processing` and `features` should be `NOT NULL` (matching PrivacyDeclaration pattern), with their `server_default` values. - All models require explicit `__tablename__` overrides (e.g., `__tablename__ = "data_purpose"`) since the auto-generated names from class names would produce `datapurpose`, `dataconsumer`, etc. @@ -158,31 +158,19 @@ Join table: producer to user. Unique constraint on `(data_producer_id, user_id)`. -#### `dataset_purpose` - -Join table: dataset to purpose. Audited. - -| Column | Type | Constraints | -|--------|------|-------------| -| `id` | String (UUID) | PK | -| `dataset_id` | String (FK) | Not Null, references `ctl_datasets.id` | -| `data_purpose_id` | String (FK) | Not Null, references `data_purpose.id` | -| `assigned_by` | String (FK) | Nullable, references `fidesuser.id` | -| `created_at` | DateTime(tz) | Auto | -| `updated_at` | DateTime(tz) | Auto | - -Unique constraint on `(dataset_id, data_purpose_id)`. - ### Extended Existing Tables #### `ctl_datasets` -New column: +New columns: | Column | Type | Constraints | |--------|------|-------------| +| `data_purposes` | ARRAY(String) | server_default `{}`, Nullable | | `data_producer_id` | String (FK) | Nullable, references `data_producer.id` | +`data_purposes` is an array of `fides_key` strings (soft references to `data_purpose`). Validated on write against the `data_purpose` table. This is the same pattern used at every level of the dataset hierarchy. + #### Collection/Field JSON Schema Extension The existing JSON blob for collections, fields, and sub-fields gains an optional `data_purposes` array at every level: @@ -190,7 +178,7 @@ The existing JSON blob for collections, fields, and sub-fields gains an optional ```yaml dataset: fides_key: customer_analytics_db - data_purposes: ["customer_marketing"] # dataset level (join table) + data_purposes: ["customer_marketing"] # dataset level (column on ctl_datasets) collections: - name: user_profiles data_purposes: ["personalization"] # collection level (JSON) @@ -202,7 +190,7 @@ dataset: data_purposes: ["content_curation"] # sub-field level (JSON) ``` -Purposes are soft references (`fides_key` strings). Validated on write against the `data_purpose` table. +All purpose references at every level (dataset, collection, field, sub-field) are soft references (`fides_key` strings). There is no `dataset_purpose` join table. Validated on write against the `data_purpose` table. **Additive inheritance:** Effective purposes at any level = own purposes + all ancestor purposes. No override or exclusion mechanism. @@ -217,31 +205,37 @@ Purposes are soft references (`fides_key` strings). Validated on write against t | - data_subject | +--------+--------+ | - +-----------+------+--------+--------+----------+ - | | | | | - system_purpose | data_consumer_purpose | dataset_purpose - (audited join) | (audited join) | (audited join) - | | | | | - +------+------+ | +------+-------+ | +------+------+ - | ctl_systems | | | data_consumer | | | ctl_datasets| - | (existing) | | | (group/proj) | | | (extended) | - +-------------+ | +--------------+ | +------+------+ - | | | - | | data_producer_id FK - | | | - | | +------+-------+ - | | | data_producer | - | | +------+-------+ - | | | - | | data_producer_member - | | (join table) - | | | - | | +------+------+ - | | | fidesuser | - | | +-------------+ + +-----------+------+--------+--------+ + | | | | + system_purpose | data_consumer_purpose | + (audited join) | (audited join) | + | | | | + +------+------+ | +------+-------+ | + | ctl_systems | | | data_consumer | | + | (existing) | | | (group/proj) | | + +-------------+ | +--------------+ | + | | + soft data_purposes refs | + (fides_key strings) | + | | + +------+------+ | + | ctl_datasets| | + | (extended) | | + | + collections/fields JSON | + +------+------+ | + | | + data_producer_id FK | + | | + +------+-------+ | + | data_producer | | + +------+-------+ | + | | + data_producer_member | + (join table) | | | - Collection/Field JSON | - (soft data_purposes refs) ------+ + +------+------+ | + | fidesuser | | + +-------------+ -----------------+ ``` --- @@ -256,7 +250,7 @@ All services live in **fidesplus**. Models and migrations in **fides OSS**. - Validates `data_use` references exist in the `DataUse` taxonomy - Validates `data_subject` references exist in the `DataSubject` taxonomy - Validates `fides_key` uniqueness -- On delete: blocked by DB-level ON DELETE RESTRICT if the purpose is referenced by any `system_purpose`, `data_consumer_purpose`, or `dataset_purpose` rows. The `?force=true` query param bypasses this by first removing all join table references, then deleting the purpose. Collection-level JSON soft references are not FK-enforced; the service scans and warns about orphaned references but does not block on them. +- On delete: blocked by DB-level ON DELETE RESTRICT if the purpose is referenced by any `system_purpose` or `data_consumer_purpose` rows. The `?force=true` query param bypasses this by first removing all join table references, then deleting the purpose. Dataset/collection/field-level purpose references are soft (string arrays, not FK-enforced); the service scans and warns about orphaned references but does not block on them. ### DataConsumerService @@ -288,8 +282,9 @@ The core facade. Two internal code paths, one external interface. ### Dataset Purpose Handling Not a separate service. Integrated into the existing dataset write path: -- On dataset create/update, if `data_purposes` is present at dataset level, validate and persist via `dataset_purpose` join table -- If `data_purposes` is present at collection/field/sub-field levels in the JSON, validate all `fides_key` references and persist in the JSON blob +- On dataset create/update, validate all `data_purposes` `fides_key` references at every level (dataset column, collection/field/sub-field JSON) against the `data_purpose` table +- Dataset-level purposes are persisted as the `data_purposes` ARRAY column on `ctl_datasets` +- Collection/field/sub-field-level purposes are persisted within the JSON blob - On dataset read, include `data_purposes` at each level as stored ### Repository Layer @@ -421,8 +416,7 @@ Single migration file. Table creation order (respects FK dependencies): 4. `data_consumer_purpose` (FKs to `data_consumer` + `data_purpose`) 5. `system_purpose` (FKs to `ctl_systems` + `data_purpose`) 6. `data_producer_member` (FKs to `data_producer` + `fidesuser`) -7. `dataset_purpose` (FKs to `ctl_datasets` + `data_purpose`) -8. ALTER `ctl_datasets`: add `data_producer_id` FK column +7. ALTER `ctl_datasets`: add `data_purposes` ARRAY column + `data_producer_id` FK column Migration conventions: - All new nullable columns use `nullable=True` @@ -437,8 +431,6 @@ Indexes: - `system_purpose(data_purpose_id)` — for "find all consumers for a purpose" queries - `data_consumer_purpose(data_consumer_id)` — for hydrating non-system consumers - `data_consumer_purpose(data_purpose_id)` — for "find all consumers for a purpose" queries -- `dataset_purpose(dataset_id)` — for hydrating dataset purposes -- `dataset_purpose(data_purpose_id)` — for "find all datasets for a purpose" queries - `data_producer_member(data_producer_id)` — for listing producer members - `data_producer_member(user_id)` — for finding a user's producer memberships @@ -447,13 +439,11 @@ FK cascade/delete behavior: - `system_purpose.data_purpose_id` ON DELETE RESTRICT (prevent deleting a purpose that's in use) - `data_consumer_purpose.data_consumer_id` ON DELETE CASCADE (deleting a consumer removes its purpose links) - `data_consumer_purpose.data_purpose_id` ON DELETE RESTRICT (prevent deleting a purpose that's in use) -- `dataset_purpose.dataset_id` ON DELETE CASCADE (deleting a dataset removes its purpose links) -- `dataset_purpose.data_purpose_id` ON DELETE RESTRICT (prevent deleting a purpose that's in use) - `data_producer_member.data_producer_id` ON DELETE CASCADE (deleting a producer removes its member links) - `data_producer_member.user_id` ON DELETE CASCADE (deleting a user removes their producer memberships) - `ctl_datasets.data_producer_id` ON DELETE SET NULL (deleting a producer nullifies the FK on datasets) -Downgrade: drop tables in reverse order, remove `data_producer_id` from `ctl_datasets`. +Downgrade: drop tables in reverse order, remove `data_purposes` and `data_producer_id` from `ctl_datasets`. ### Backward Compatibility Guarantees @@ -481,7 +471,7 @@ fidesplus setting: `purpose_based_model_enabled: bool = False` ### Phase 1: Schema + CRUD (implementation scope) **fides OSS:** -- Alembic migration: all new tables, `data_producer_id` FK on `ctl_datasets` +- Alembic migration: all new tables, `data_purposes` column + `data_producer_id` FK on `ctl_datasets` - SQLAlchemy models for all new tables - Extend dataset/collection/field Pydantic schemas with optional `data_purposes` From e273fba86f13ff55ddfd571263aa3abc500a74e2 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:15:08 -0400 Subject: [PATCH 04/17] feat: add implementation plan for purpose-based data model 18-task plan across 4 chunks covering models/migration (fides OSS), services, API routes (fidesplus), and dataset integration. Reviewed and validated against the design spec. Co-Authored-By: Claude Opus 4.6 --- .../2026-03-11-purpose-based-data-model.md | 2198 +++++++++++++++++ 1 file changed, 2198 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-11-purpose-based-data-model.md diff --git a/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md b/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md new file mode 100644 index 00000000000..e3a8b3ae4ad --- /dev/null +++ b/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md @@ -0,0 +1,2198 @@ +# Purpose-Based Data Model Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Implement Phase 1 of the purpose-based data model: new tables, models, CRUD services, and API routes for Data Purpose, Data Consumer, Data Producer, and dataset purpose assignment. + +**Architecture:** Models and migrations in fides OSS, services and routes in fidesplus. DataConsumer uses a facade pattern: system-type consumers are read from `ctl_systems` + `system_purpose` join table; non-system consumers (group/project) use a new `data_consumer` table. All dataset purposes are soft `fides_key` string references. + +**Tech Stack:** Python 3, SQLAlchemy, Alembic, FastAPI, Pydantic v2, PostgreSQL, pytest + +**Spec:** `docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md` + +--- + +## File Structure + +### fides OSS (models, migrations, schemas, scopes) + +| Action | File | Responsibility | +|--------|------|---------------| +| Modify | `src/fides/api/models/sql_models.py` | Add DataPurpose model (alongside other FidesBase models), add Dataset columns, add System relationship | +| Create | `src/fides/api/models/data_consumer.py` | DataConsumer model, DataConsumerPurpose join table | +| Create | `src/fides/api/models/system_purpose.py` | SystemPurpose join table | +| Create | `src/fides/api/models/data_producer.py` | DataProducer model, DataProducerMember join table | +| Create | `src/fides/api/schemas/data_purpose.py` | DataPurpose Pydantic schemas (create, update, response) | +| Create | `src/fides/api/schemas/data_consumer.py` | DataConsumer Pydantic schemas | +| Create | `src/fides/api/schemas/data_producer.py` | DataProducer Pydantic schemas | +| Modify | `src/fides/api/db/base.py` | Import new models so Alembic sees them | +| Create | `src/fides/api/alembic/migrations/versions/xx_..._purpose_based_data_model.py` | Migration for all new tables + dataset columns | + +### fidesplus (services, routes, settings) + +| Action | File | Responsibility | +|--------|------|---------------| +| Create | `src/fidesplus/config/purpose_settings.py` | Feature flag settings class | +| Modify | `src/fidesplus/config/__init__.py` | Register PurposeSettings on FidesplusConfig | +| Create | `src/fidesplus/service/data_purpose/__init__.py` | Package init | +| Create | `src/fidesplus/service/data_purpose/data_purpose_service.py` | DataPurpose CRUD + validation | +| Create | `src/fidesplus/service/data_consumer/__init__.py` | Package init | +| Create | `src/fidesplus/service/data_consumer/data_consumer_service.py` | DataConsumer facade service | +| Create | `src/fidesplus/service/data_producer/__init__.py` | Package init | +| Create | `src/fidesplus/service/data_producer/data_producer_service.py` | DataProducer CRUD + member management | +| Create | `src/fidesplus/api/routes/data_purpose.py` | DataPurpose API routes | +| Create | `src/fidesplus/api/routes/data_consumer.py` | DataConsumer API routes + purpose assignment | +| Create | `src/fidesplus/api/routes/data_producer.py` | DataProducer API routes + member management | +| Modify | `src/fidesplus/api/plus_scope_registry.py` | Define scopes, add to SCOPE_DOCS and role mappings | +| Modify | `src/fidesplus/api/urn_registry.py` | Add URL path constants | +| Modify | `src/fidesplus/api/deps.py` | Add service factory functions for Depends() | +| Modify | Router registration file | Mount new routers on plus_router | + +### Tests + +| Action | File | Responsibility | +|--------|------|---------------| +| Create | `tests/ops/models/test_data_purpose.py` | DataPurpose model unit tests | +| Create | `tests/ops/models/test_data_consumer.py` | DataConsumer + join table model tests | +| Create | `tests/ops/models/test_system_purpose.py` | SystemPurpose join table tests | +| Create | `tests/ops/models/test_data_producer.py` | DataProducer + member model tests | +| Create | `tests/ops/models/test_dataset_purposes.py` | Dataset purpose column + JSON validation | +| Create | (fidesplus) `tests/ops/api/test_data_purpose_api.py` | DataPurpose API endpoint tests | +| Create | (fidesplus) `tests/ops/api/test_data_consumer_api.py` | DataConsumer API endpoint tests | +| Create | (fidesplus) `tests/ops/api/test_data_producer_api.py` | DataProducer API endpoint tests | +| Create | (fidesplus) `tests/ops/service/test_data_purpose_service.py` | DataPurposeService integration tests | +| Create | (fidesplus) `tests/ops/service/test_data_consumer_service.py` | DataConsumerService facade tests | +| Create | (fidesplus) `tests/ops/service/test_data_producer_service.py` | DataProducerService integration tests | + +--- + +## Chunk 1: Models and Migration (fides OSS) + +### Task 1: DataPurpose Model + +**Files:** +- Modify: `src/fides/api/models/sql_models.py` (add DataPurpose class) +- Test: `tests/ops/models/test_data_purpose.py` + +- [ ] **Step 1: Write the model test** + +```python +# tests/ops/models/test_data_purpose.py +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.sql_models import DataPurpose + + +@pytest.mark.postgres +class TestDataPurposeModel: + def test_create_data_purpose(self, db: Session): + purpose = DataPurpose.create( + db=db, + data={ + "fides_key": "marketing_email", + "name": "Email Marketing", + "description": "Processing for email marketing campaigns", + "data_use": "marketing.advertising", + "data_subject": "customer", + "data_categories": ["user.contact.email"], + "legal_basis_for_processing": "Consent", + "flexible_legal_basis_for_processing": True, + "retention_period": "90 days", + "features": ["email_targeting"], + }, + ) + assert purpose.fides_key == "marketing_email" + assert purpose.data_use == "marketing.advertising" + assert purpose.data_subject == "customer" + assert purpose.data_categories == ["user.contact.email"] + assert purpose.flexible_legal_basis_for_processing is True + assert purpose.features == ["email_targeting"] + assert purpose.id is not None + assert purpose.created_at is not None + + def test_create_minimal_data_purpose(self, db: Session): + """Only fides_key, name, and data_use are required.""" + purpose = DataPurpose.create( + db=db, + data={ + "fides_key": "analytics_basic", + "name": "Basic Analytics", + "data_use": "analytics", + }, + ) + assert purpose.fides_key == "analytics_basic" + assert purpose.data_subject is None + assert purpose.data_categories == [] + assert purpose.legal_basis_for_processing is None + + def test_fides_key_uniqueness(self, db: Session): + DataPurpose.create( + db=db, + data={ + "fides_key": "unique_purpose", + "name": "Purpose A", + "data_use": "analytics", + }, + ) + with pytest.raises(Exception): + DataPurpose.create( + db=db, + data={ + "fides_key": "unique_purpose", + "name": "Purpose B", + "data_use": "marketing", + }, + ) + + def test_delete_data_purpose(self, db: Session): + purpose = DataPurpose.create( + db=db, + data={ + "fides_key": "to_delete", + "name": "Delete Me", + "data_use": "analytics", + }, + ) + purpose_id = purpose.id + purpose.delete(db) + assert db.query(DataPurpose).filter_by(id=purpose_id).first() is None +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_data_purpose.py -v` +Expected: ImportError (module does not exist yet) + +- [ ] **Step 3: Write the DataPurpose model** + +**Important:** `FidesBase` (the mixin providing `fides_key`, `name`, `description`, `organization_fides_key`, `tags`) is defined in `sql_models.py`, NOT in `base_class.py`. The `Base` class from `base_class.py` provides `id`, `created_at`, `updated_at`, and CRUD methods. Models inheriting both get a composite primary key (`id` + `fides_key`). This is the same pattern used by `System`, `Dataset`, `DataCategory`, etc. Join tables reference `data_purpose.id` (the UUID column), which works because `id` is part of the composite PK. + +Add the `DataPurpose` class to `src/fides/api/models/sql_models.py` (alongside the other FidesBase models) to avoid circular import issues: + +```python +# Add to src/fides/api/models/sql_models.py after the Dataset class + +class DataPurpose(Base, FidesBase): + """ + A standalone, reusable declaration of why data is processed. + Replaces the system-bound PrivacyDeclaration with a centrally-governed entity. + Flat (no hierarchy) but inherits FidesBase for fides_key, name, description, + organization_fides_key, and tags. + """ + + __tablename__ = "data_purpose" + + data_use = Column(String, nullable=False, index=True) + data_subject = Column(String, nullable=True) + data_categories = Column(ARRAY(String), server_default="{}", nullable=False) + legal_basis_for_processing = Column(String, nullable=True) + flexible_legal_basis_for_processing = Column( + Boolean, server_default="t", nullable=False + ) + special_category_legal_basis = Column(String, nullable=True) + impact_assessment_location = Column(String, nullable=True) + retention_period = Column(String, nullable=True) + features = Column(ARRAY(String), server_default="{}", nullable=False) + + @classmethod + def create( + cls, + db: Session, + *, + data: dict[str, Any], + check_name: bool = False, + ) -> "DataPurpose": + """Override create to skip name uniqueness check. + DataPurpose uses fides_key for uniqueness, not name.""" + return super().create(db=db, data=data, check_name=check_name) +``` + +Columns inherited from `FidesBase` (do NOT redeclare): `fides_key`, `name`, `description`, `organization_fides_key`, `tags`. +Columns inherited from `Base` (do NOT redeclare): `id`, `created_at`, `updated_at`. + +- [ ] **Step 4: Register model in base imports** + +Add to `src/fides/api/db/base.py`: +```python +from fides.api.models.sql_models import DataPurpose # noqa: F401 +``` + +- [ ] **Step 5: Run tests to verify they pass** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_data_purpose.py -v` +Expected: All 4 tests PASS + +- [ ] **Step 6: Commit** + +```bash +git add src/fides/api/models/sql_models.py src/fides/api/db/base.py tests/ops/models/test_data_purpose.py +git commit -m "feat: add DataPurpose model with unit tests" +``` + +--- + +### Task 2: SystemPurpose Join Table + +**Files:** +- Create: `src/fides/api/models/system_purpose.py` +- Modify: `src/fides/api/models/sql_models.py` (add relationship to System) +- Test: `tests/ops/models/test_system_purpose.py` + +- [ ] **Step 1: Write the test** + +```python +# tests/ops/models/test_system_purpose.py +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.sql_models import DataPurpose +from fides.api.models.system_purpose import SystemPurpose +from fides.api.models.sql_models import System + + +@pytest.mark.postgres +class TestSystemPurposeModel: + @pytest.fixture + def purpose(self, db: Session) -> DataPurpose: + return DataPurpose.create( + db=db, + data={ + "fides_key": "test_purpose", + "name": "Test Purpose", + "data_use": "analytics", + }, + ) + + def test_create_system_purpose(self, db: Session, system: System, purpose: DataPurpose): + sp = SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + assert sp.system_id == system.id + assert sp.data_purpose_id == purpose.id + assert sp.assigned_by is None + assert sp.created_at is not None + + def test_unique_constraint(self, db: Session, system: System, purpose: DataPurpose): + SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + with pytest.raises(Exception): + SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + + def test_cascade_on_system_delete(self, db: Session, system: System, purpose: DataPurpose): + sp = SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + sp_id = sp.id + system.delete(db) + assert db.query(SystemPurpose).filter_by(id=sp_id).first() is None +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_system_purpose.py -v` +Expected: ImportError + +- [ ] **Step 3: Write the SystemPurpose model** + +```python +# src/fides/api/models/system_purpose.py +from sqlalchemy import Column, ForeignKey, String, UniqueConstraint +from sqlalchemy.orm import relationship + +from fides.api.db.base_class import Base + + +class SystemPurpose(Base): + """ + Audited join table linking a System to a DataPurpose. + Used by the DataConsumer facade for system-type consumers. + """ + + __tablename__ = "system_purpose" + __table_args__ = ( + UniqueConstraint("system_id", "data_purpose_id", name="uq_system_purpose"), + ) + + system_id = Column( + String, + ForeignKey("ctl_systems.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + data_purpose_id = Column( + String, + ForeignKey("data_purpose.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ) + assigned_by = Column( + String, + ForeignKey("fidesuser.id"), + nullable=True, + ) + + system = relationship("System", lazy="selectin") + data_purpose = relationship("DataPurpose", lazy="selectin") +``` + +- [ ] **Step 4: Add `system_purposes` relationship to System model** + +In `src/fides/api/models/sql_models.py`, add to the `System` class after the `system_groups` relationship: + +```python + system_purposes = relationship( + "SystemPurpose", + cascade="all, delete-orphan", + lazy="selectin", + ) +``` + +- [ ] **Step 5: Register model in base imports** + +Add to `src/fides/api/db/base.py`: +```python +from fides.api.models.system_purpose import SystemPurpose # noqa: F401 +``` + +- [ ] **Step 6: Run tests** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_system_purpose.py -v` +Expected: All 3 tests PASS + +- [ ] **Step 7: Commit** + +```bash +git add src/fides/api/models/system_purpose.py src/fides/api/models/sql_models.py src/fides/api/db/base.py tests/ops/models/test_system_purpose.py +git commit -m "feat: add SystemPurpose join table with cascade delete" +``` + +--- + +### Task 3: DataConsumer Model and DataConsumerPurpose Join Table + +**Files:** +- Create: `src/fides/api/models/data_consumer.py` +- Test: `tests/ops/models/test_data_consumer.py` + +- [ ] **Step 1: Write the test** + +```python +# tests/ops/models/test_data_consumer.py +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.data_consumer import DataConsumer, DataConsumerPurpose +from fides.api.models.sql_models import DataPurpose + + +@pytest.mark.postgres +class TestDataConsumerModel: + def test_create_group_consumer(self, db: Session): + consumer = DataConsumer.create( + db=db, + data={ + "name": "Marketing Team", + "description": "Marketing department Google Group", + "type": "group", + "external_id": "marketing@example.com", + "contact_email": "marketing-lead@example.com", + "tags": ["marketing", "internal"], + }, + ) + assert consumer.name == "Marketing Team" + assert consumer.type == "group" + assert consumer.external_id == "marketing@example.com" + assert consumer.tags == ["marketing", "internal"] + + def test_create_project_consumer(self, db: Session): + consumer = DataConsumer.create( + db=db, + data={ + "name": "Analytics Pipeline", + "type": "project", + "external_id": "bigquery-project-123", + }, + ) + assert consumer.type == "project" + + def test_system_type_rejected(self, db: Session): + """CHECK constraint prevents type='system' rows.""" + with pytest.raises(Exception): + DataConsumer.create( + db=db, + data={ + "name": "Should Fail", + "type": "system", + }, + ) + + def test_custom_type_allowed(self, db: Session): + consumer = DataConsumer.create( + db=db, + data={ + "name": "Custom Consumer", + "type": "data_warehouse", + }, + ) + assert consumer.type == "data_warehouse" + + +@pytest.mark.postgres +class TestDataConsumerPurposeModel: + @pytest.fixture + def purpose(self, db: Session) -> DataPurpose: + return DataPurpose.create( + db=db, + data={ + "fides_key": "consumer_test_purpose", + "name": "Test Purpose", + "data_use": "analytics", + }, + ) + + @pytest.fixture + def consumer(self, db: Session) -> DataConsumer: + return DataConsumer.create( + db=db, + data={ + "name": "Test Group", + "type": "group", + }, + ) + + def test_link_purpose_to_consumer(self, db: Session, consumer: DataConsumer, purpose: DataPurpose): + link = DataConsumerPurpose.create( + db=db, + data={ + "data_consumer_id": consumer.id, + "data_purpose_id": purpose.id, + }, + ) + assert link.data_consumer_id == consumer.id + assert link.data_purpose_id == purpose.id + + def test_unique_constraint(self, db: Session, consumer: DataConsumer, purpose: DataPurpose): + DataConsumerPurpose.create( + db=db, + data={ + "data_consumer_id": consumer.id, + "data_purpose_id": purpose.id, + }, + ) + with pytest.raises(Exception): + DataConsumerPurpose.create( + db=db, + data={ + "data_consumer_id": consumer.id, + "data_purpose_id": purpose.id, + }, + ) + + def test_cascade_on_consumer_delete(self, db: Session, consumer: DataConsumer, purpose: DataPurpose): + link = DataConsumerPurpose.create( + db=db, + data={ + "data_consumer_id": consumer.id, + "data_purpose_id": purpose.id, + }, + ) + link_id = link.id + consumer.delete(db) + assert db.query(DataConsumerPurpose).filter_by(id=link_id).first() is None +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_data_consumer.py -v` +Expected: ImportError + +- [ ] **Step 3: Write the DataConsumer and DataConsumerPurpose models** + +```python +# src/fides/api/models/data_consumer.py +from typing import Any + +from sqlalchemy import ARRAY, JSON, Boolean, CheckConstraint, Column, ForeignKey, String, UniqueConstraint +from sqlalchemy.orm import Session, relationship + +from fides.api.db.base_class import Base + + +class DataConsumer(Base): + """ + Non-system data consumers (groups, projects, custom types). + System-type consumers are surfaced via a facade over ctl_systems. + """ + + __tablename__ = "data_consumer" + __table_args__ = ( + CheckConstraint("type != 'system'", name="ck_data_consumer_not_system"), + ) + + name = Column(String, nullable=False) + description = Column(String, nullable=True) + type = Column(String, nullable=False, index=True) + external_id = Column(String, nullable=True) + egress = Column(JSON, nullable=True) + ingress = Column(JSON, nullable=True) + data_shared_with_third_parties = Column( + Boolean, server_default="f", nullable=False + ) + third_parties = Column(String, nullable=True) + shared_categories = Column(ARRAY(String), server_default="{}", nullable=False) + contact_email = Column(String, nullable=True) + contact_slack_channel = Column(String, nullable=True) + contact_details = Column(JSON, nullable=True) + tags = Column(ARRAY(String), server_default="{}", nullable=False) + + consumer_purposes = relationship( + "DataConsumerPurpose", + cascade="all, delete-orphan", + lazy="selectin", + ) + + @classmethod + def create( + cls, + db: Session, + *, + data: dict[str, Any], + check_name: bool = False, + ) -> "DataConsumer": + """Override create to skip name uniqueness check. + Multiple consumers can share a name.""" + return super().create(db=db, data=data, check_name=check_name) + + +class DataConsumerPurpose(Base): + """ + Audited join table linking a non-system DataConsumer to a DataPurpose. + """ + + __tablename__ = "data_consumer_purpose" + __table_args__ = ( + UniqueConstraint( + "data_consumer_id", "data_purpose_id", name="uq_data_consumer_purpose" + ), + ) + + data_consumer_id = Column( + String, + ForeignKey("data_consumer.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + data_purpose_id = Column( + String, + ForeignKey("data_purpose.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ) + assigned_by = Column( + String, + ForeignKey("fidesuser.id"), + nullable=True, + ) + + data_consumer = relationship("DataConsumer", lazy="selectin") + data_purpose = relationship("DataPurpose", lazy="selectin") +``` + +- [ ] **Step 4: Register models in base imports** + +Add to `src/fides/api/db/base.py`: +```python +from fides.api.models.data_consumer import DataConsumer, DataConsumerPurpose # noqa: F401 +``` + +- [ ] **Step 5: Run tests** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_data_consumer.py -v` +Expected: All 7 tests PASS + +- [ ] **Step 6: Commit** + +```bash +git add src/fides/api/models/data_consumer.py src/fides/api/db/base.py tests/ops/models/test_data_consumer.py +git commit -m "feat: add DataConsumer and DataConsumerPurpose models" +``` + +--- + +### Task 4: DataProducer Model and DataProducerMember Join Table + +**Files:** +- Create: `src/fides/api/models/data_producer.py` +- Test: `tests/ops/models/test_data_producer.py` + +- [ ] **Step 1: Write the test** + +```python +# tests/ops/models/test_data_producer.py +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.data_producer import DataProducer, DataProducerMember + + +@pytest.mark.postgres +class TestDataProducerModel: + def test_create_data_producer(self, db: Session): + producer = DataProducer.create( + db=db, + data={ + "name": "Analytics Engineering Team", + "description": "Responsible for analytics pipelines", + "external_id": "analytics-eng-okta-group", + "contact_email": "analytics-eng@example.com", + "contact_slack_channel": "#analytics-eng", + }, + ) + assert producer.name == "Analytics Engineering Team" + assert producer.external_id == "analytics-eng-okta-group" + assert producer.contact_email == "analytics-eng@example.com" + + def test_create_minimal_producer(self, db: Session): + producer = DataProducer.create( + db=db, + data={"name": "Minimal Producer"}, + ) + assert producer.name == "Minimal Producer" + assert producer.monitor_id is None + + def test_delete_producer(self, db: Session): + producer = DataProducer.create( + db=db, + data={"name": "Delete Me"}, + ) + producer_id = producer.id + producer.delete(db) + assert db.query(DataProducer).filter_by(id=producer_id).first() is None + + +@pytest.mark.postgres +class TestDataProducerMemberModel: + @pytest.fixture + def producer(self, db: Session) -> DataProducer: + return DataProducer.create( + db=db, + data={"name": "Test Producer"}, + ) + + def test_add_member(self, db: Session, producer: DataProducer, user): + member = DataProducerMember.create( + db=db, + data={ + "data_producer_id": producer.id, + "user_id": user.id, + }, + ) + assert member.data_producer_id == producer.id + assert member.user_id == user.id + + def test_unique_constraint(self, db: Session, producer: DataProducer, user): + DataProducerMember.create( + db=db, + data={ + "data_producer_id": producer.id, + "user_id": user.id, + }, + ) + with pytest.raises(Exception): + DataProducerMember.create( + db=db, + data={ + "data_producer_id": producer.id, + "user_id": user.id, + }, + ) + + def test_cascade_on_producer_delete(self, db: Session, producer: DataProducer, user): + member = DataProducerMember.create( + db=db, + data={ + "data_producer_id": producer.id, + "user_id": user.id, + }, + ) + member_id = member.id + producer.delete(db) + assert db.query(DataProducerMember).filter_by(id=member_id).first() is None +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_data_producer.py -v` +Expected: ImportError + +- [ ] **Step 3: Write the DataProducer and DataProducerMember models** + +```python +# src/fides/api/models/data_producer.py +from typing import Any + +from sqlalchemy import JSON, Column, ForeignKey, String, UniqueConstraint +from sqlalchemy.orm import Session, relationship + +from fides.api.db.base_class import Base + + +class DataProducer(Base): + """ + Represents a team or group responsible for data registration + and purpose assignment to datasets. + """ + + __tablename__ = "data_producer" + + name = Column(String, nullable=False) + description = Column(String, nullable=True) + external_id = Column(String, nullable=True) + monitor_id = Column( + String, + ForeignKey("monitorconfig.id"), + nullable=True, + ) + contact_email = Column(String, nullable=True) + contact_slack_channel = Column(String, nullable=True) + contact_details = Column(JSON, nullable=True) + + members = relationship( + "DataProducerMember", + cascade="all, delete-orphan", + lazy="selectin", + ) + monitor = relationship("MonitorConfig", lazy="selectin") + + @classmethod + def create( + cls, + db: Session, + *, + data: dict[str, Any], + check_name: bool = False, + ) -> "DataProducer": + """Override create to skip name uniqueness check. + Multiple producers can share a name.""" + return super().create(db=db, data=data, check_name=check_name) + + +class DataProducerMember(Base): + """ + Join table linking a DataProducer to FidesUser members. + """ + + __tablename__ = "data_producer_member" + __table_args__ = ( + UniqueConstraint( + "data_producer_id", "user_id", name="uq_data_producer_member" + ), + ) + + data_producer_id = Column( + String, + ForeignKey("data_producer.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + user_id = Column( + String, + ForeignKey("fidesuser.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + + data_producer = relationship("DataProducer", lazy="selectin") + user = relationship("FidesUser", lazy="selectin") +``` + +- [ ] **Step 4: Register models in base imports** + +Add to `src/fides/api/db/base.py`: +```python +from fides.api.models.data_producer import DataProducer, DataProducerMember # noqa: F401 +``` + +- [ ] **Step 5: Run tests** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_data_producer.py -v` +Expected: All 6 tests PASS + +- [ ] **Step 6: Commit** + +```bash +git add src/fides/api/models/data_producer.py src/fides/api/db/base.py tests/ops/models/test_data_producer.py +git commit -m "feat: add DataProducer and DataProducerMember models" +``` + +--- + +### Task 5: Extend Dataset Model + +**Files:** +- Modify: `src/fides/api/models/sql_models.py` (add columns to Dataset) +- Test: `tests/ops/models/test_dataset_purposes.py` + +- [ ] **Step 1: Write the test** + +```python +# tests/ops/models/test_dataset_purposes.py +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.sql_models import Dataset +from fides.api.models.sql_models import DataPurpose +from fides.api.models.data_producer import DataProducer + + +@pytest.mark.postgres +class TestDatasetPurposes: + def test_dataset_with_purposes(self, db: Session): + dataset = Dataset.create( + db=db, + data={ + "fides_key": "test_dataset_purposes", + "name": "Test Dataset", + "data_categories": [], + "collections": [], + "data_purposes": ["marketing_email", "analytics_basic"], + }, + ) + assert dataset.data_purposes == ["marketing_email", "analytics_basic"] + + def test_dataset_without_purposes(self, db: Session): + dataset = Dataset.create( + db=db, + data={ + "fides_key": "test_dataset_no_purposes", + "name": "Test Dataset No Purposes", + "data_categories": [], + "collections": [], + }, + ) + assert dataset.data_purposes == [] or dataset.data_purposes is None + + def test_dataset_with_producer(self, db: Session): + producer = DataProducer.create( + db=db, + data={"name": "Test Producer"}, + ) + dataset = Dataset.create( + db=db, + data={ + "fides_key": "test_dataset_producer", + "name": "Test Dataset With Producer", + "data_categories": [], + "collections": [], + "data_producer_id": producer.id, + }, + ) + assert dataset.data_producer_id == producer.id + + def test_producer_set_null_on_delete(self, db: Session): + producer = DataProducer.create( + db=db, + data={"name": "Delete Producer"}, + ) + dataset = Dataset.create( + db=db, + data={ + "fides_key": "test_dataset_producer_delete", + "name": "Test Dataset", + "data_categories": [], + "collections": [], + "data_producer_id": producer.id, + }, + ) + dataset_id = dataset.id + producer.delete(db) + db.expire_all() + refreshed = db.query(Dataset).filter_by(id=dataset_id).first() + assert refreshed.data_producer_id is None +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_dataset_purposes.py -v` +Expected: FAIL (columns don't exist yet) + +- [ ] **Step 3: Add columns to Dataset model** + +In `src/fides/api/models/sql_models.py`, add to the `Dataset` class after `fides_meta`: + +```python + data_purposes = Column(ARRAY(String), server_default="{}", nullable=True) + data_producer_id = Column( + String, + ForeignKey("data_producer.id", ondelete="SET NULL"), + nullable=True, + ) + data_producer = relationship("DataProducer", lazy="selectin") +``` + +- [ ] **Step 4: Run tests** + +Run: `nox -s "pytest(ops-unit)" -- tests/ops/models/test_dataset_purposes.py -v` +Expected: All 4 tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/fides/api/models/sql_models.py tests/ops/models/test_dataset_purposes.py +git commit -m "feat: add data_purposes and data_producer_id to Dataset model" +``` + +--- + +### Task 6: Permission Scopes + +**Note:** Since the purpose-based data model is a fidesplus-only feature, scope constants follow the established pattern of being defined in `plus_scope_registry.py` (not OSS `scope_registry.py`). This is consistent with how `SYSTEM_GROUP`, `DISCOVERY_MONITOR`, `TAXONOMY`, and other fidesplus feature scopes are defined. The scope definitions, `UPDATED_SCOPE_DOCS` entries, and role mappings are all handled in Task 13. **This task is a no-op and is merged into Task 13.** + +Skip to Task 7. + +--- + +### Task 7: Alembic Migration + +**Files:** +- Create: `src/fides/api/alembic/migrations/versions/xx_..._purpose_based_data_model.py` + +- [ ] **Step 1: Auto-generate migration** + +```bash +cd /Users/adriangalvan/Documents/Github/fides +nox -s generate_migration -- "purpose_based_data_model" +``` + +If `nox -s generate_migration` is not available, manually create the migration: + +```bash +cd src/fides/api +alembic revision --autogenerate -m "purpose_based_data_model" +``` + +- [ ] **Step 2: Review and edit the generated migration** + +Verify the generated migration includes: +1. `data_purpose` table creation +2. `data_consumer` table creation with CHECK constraint +3. `data_consumer_purpose` join table +4. `system_purpose` join table +5. `data_producer` table +6. `data_producer_member` join table +7. ALTER `ctl_datasets` to add `data_purposes` and `data_producer_id` columns + +Ensure all indexes are present per the spec. Add any missing CHECK constraints or cascade behaviors manually. + +- [ ] **Step 3: Test the migration** + +```bash +nox -s check_migrations +``` + +Expected: Migration check passes + +- [ ] **Step 4: Commit** + +```bash +git add src/fides/api/alembic/migrations/versions/ +git commit -m "feat: add alembic migration for purpose-based data model tables" +``` + +--- + +### Task 8: Pydantic Schemas + +**Files:** +- Create: `src/fides/api/schemas/data_purpose.py` +- Create: `src/fides/api/schemas/data_consumer.py` +- Create: `src/fides/api/schemas/data_producer.py` + +- [ ] **Step 1: Write DataPurpose schemas** + +```python +# src/fides/api/schemas/data_purpose.py +from datetime import datetime +from typing import List, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class DataPurposeCreate(BaseModel): + fides_key: str + name: str + description: Optional[str] = None + organization_fides_key: Optional[str] = "default_organization" + tags: Optional[List[str]] = None + data_use: str + data_subject: Optional[str] = None + data_categories: List[str] = Field(default_factory=list) + legal_basis_for_processing: Optional[str] = None + flexible_legal_basis_for_processing: bool = True + special_category_legal_basis: Optional[str] = None + impact_assessment_location: Optional[str] = None + retention_period: Optional[str] = None + features: List[str] = Field(default_factory=list) + + +class DataPurposeUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + data_use: Optional[str] = None + data_subject: Optional[str] = None + data_categories: Optional[List[str]] = None + legal_basis_for_processing: Optional[str] = None + flexible_legal_basis_for_processing: Optional[bool] = None + special_category_legal_basis: Optional[str] = None + impact_assessment_location: Optional[str] = None + retention_period: Optional[str] = None + features: Optional[List[str]] = None + + +class DataPurposeResponse(BaseModel): + id: str + fides_key: str + name: str + description: Optional[str] = None + organization_fides_key: Optional[str] = None + tags: Optional[List[str]] = None + data_use: str + data_subject: Optional[str] = None + data_categories: List[str] + legal_basis_for_processing: Optional[str] = None + flexible_legal_basis_for_processing: bool + special_category_legal_basis: Optional[str] = None + impact_assessment_location: Optional[str] = None + retention_period: Optional[str] = None + features: List[str] + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) +``` + +- [ ] **Step 2: Write DataConsumer schemas** + +```python +# src/fides/api/schemas/data_consumer.py +from datetime import datetime +from typing import Dict, List, Optional + +from pydantic import BaseModel, ConfigDict, Field + +from fides.api.schemas.data_purpose import DataPurposeResponse + + +class DataConsumerCreate(BaseModel): + name: str + description: Optional[str] = None + type: str + external_id: Optional[str] = None + egress: Optional[Dict] = None + ingress: Optional[Dict] = None + data_shared_with_third_parties: bool = False + third_parties: Optional[str] = None + shared_categories: List[str] = Field(default_factory=list) + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + tags: List[str] = Field(default_factory=list) + + +class DataConsumerUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + external_id: Optional[str] = None + egress: Optional[Dict] = None + ingress: Optional[Dict] = None + data_shared_with_third_parties: Optional[bool] = None + third_parties: Optional[str] = None + shared_categories: Optional[List[str]] = None + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + tags: Optional[List[str]] = None + + +class DataConsumerPurposeAssignment(BaseModel): + purpose_fides_keys: List[str] + + +class DataConsumerResponse(BaseModel): + id: str + name: str + description: Optional[str] = None + type: str + external_id: Optional[str] = None + purposes: List[DataPurposeResponse] = Field(default_factory=list) + system_fides_key: Optional[str] = None + vendor_id: Optional[str] = None + egress: Optional[Dict] = None + ingress: Optional[Dict] = None + data_shared_with_third_parties: Optional[bool] = None + third_parties: Optional[str] = None + shared_categories: Optional[List[str]] = None + tags: List[str] = Field(default_factory=list) + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) +``` + +- [ ] **Step 3: Write DataProducer schemas** + +```python +# src/fides/api/schemas/data_producer.py +from datetime import datetime +from typing import Dict, List, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class DataProducerCreate(BaseModel): + name: str + description: Optional[str] = None + external_id: Optional[str] = None + monitor_id: Optional[str] = None + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + + +class DataProducerUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + external_id: Optional[str] = None + monitor_id: Optional[str] = None + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + + +class DataProducerMemberAssignment(BaseModel): + user_ids: List[str] + + +class DataProducerResponse(BaseModel): + id: str + name: str + description: Optional[str] = None + external_id: Optional[str] = None + monitor_id: Optional[str] = None + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + member_ids: List[str] = Field(default_factory=list) + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) +``` + +- [ ] **Step 4: Extend Dataset response schema** + +The Dataset schema comes from the `fideslang` library. To surface `data_purposes` and `data_producer_id` in API responses, create a response schema extension. Find the existing Dataset response schema in fides (likely in `src/fides/api/schemas/dataset.py` or similar) and extend it: + +```python +# Add to the existing dataset schema file or create src/fides/api/schemas/dataset_extensions.py +from typing import List, Optional + +from pydantic import Field + + +class DatasetPurposesMixin: + """Mixin to add purpose-based fields to dataset responses.""" + data_purposes: List[str] = Field(default_factory=list) + data_producer_id: Optional[str] = None +``` + +If the existing dataset response schema is a fideslang model used directly, create a wrapper: + +```python +class DatasetResponseWithPurposes(FideslangDatasetResponse): + data_purposes: List[str] = Field(default_factory=list) + data_producer_id: Optional[str] = None +``` + +The exact approach depends on how the existing dataset GET endpoint constructs its response. The implementer should trace the existing dataset GET handler to determine the right extension point. + +- [ ] **Step 5: Commit** + +```bash +git add src/fides/api/schemas/data_purpose.py src/fides/api/schemas/data_consumer.py src/fides/api/schemas/data_producer.py +git commit -m "feat: add Pydantic schemas for data purpose, consumer, producer" +``` + +--- + +## Chunk 2: Services (fidesplus) + +### Task 9: Feature Flag and Settings + +**Files:** +- Create: `src/fidesplus/config/purpose_settings.py` +- Modify: `src/fidesplus/config/__init__.py` (add to FidesplusConfig) + +- [ ] **Step 1: Create the settings class** + +```python +# src/fidesplus/config/purpose_settings.py +from pydantic import Field +from pydantic_settings import SettingsConfigDict + +from fidesplus.config.fidesplus_settings import FidesplusSettings + + +class PurposeSettings(FidesplusSettings): + """Settings for the purpose-based data model feature.""" + + model_config = SettingsConfigDict(env_prefix="FIDESPLUS__PURPOSE__") + + purpose_based_model_enabled: bool = Field( + default=False, + description="Enable the purpose-based data model (Data Purpose, Data Consumer, Data Producer APIs)", + ) +``` + +- [ ] **Step 2: Register on FidesplusConfig** + +In `src/fidesplus/config/__init__.py`, add: +- Import `PurposeSettings` +- Add `purpose: PurposeSettings = PurposeSettings()` field to `FidesplusConfig` +- Add loading logic in `get_config()` for both try and except branches + +- [ ] **Step 3: Create the feature flag dependency** + +Add to `src/fidesplus/api/deps.py`: + +```python +from fastapi import Depends, HTTPException + +def require_purpose_model_enabled( + config: FidesplusConfig = Depends(get_fidesplus_config), +) -> None: + """Dependency that returns 404 when purpose-based model is disabled.""" + if not config.purpose.purpose_based_model_enabled: + raise HTTPException(status_code=404) +``` + +- [ ] **Step 4: Commit** + +```bash +git add src/fidesplus/config/purpose_settings.py src/fidesplus/config/__init__.py src/fidesplus/api/deps.py +git commit -m "feat: add purpose_based_model_enabled feature flag and dependency" +``` + +--- + +### Task 10: DataPurposeService + +**Files:** +- Create: `src/fidesplus/service/data_purpose/data_purpose_service.py` +- Test: (fidesplus) `tests/ops/service/test_data_purpose_service.py` + +- [ ] **Step 1: Write the service test** + +```python +# tests/ops/service/test_data_purpose_service.py +import pytest +from sqlalchemy.orm import Session + +from fidesplus.service.data_purpose.data_purpose_service import DataPurposeService +from fides.api.models.sql_models import DataPurpose + + +@pytest.mark.integration +class TestDataPurposeService: + def test_create_purpose(self, db: Session): + service = DataPurposeService(db) + purpose = service.create( + fides_key="svc_test_purpose", + name="Service Test Purpose", + data_use="marketing.advertising", + ) + assert purpose.fides_key == "svc_test_purpose" + assert purpose.data_use == "marketing.advertising" + + def test_create_validates_data_use(self, db: Session): + service = DataPurposeService(db) + with pytest.raises(ValueError, match="data_use"): + service.create( + fides_key="bad_use", + name="Bad Use", + data_use="nonexistent.use.key", + ) + + def test_get_by_fides_key(self, db: Session): + service = DataPurposeService(db) + service.create( + fides_key="get_test", + name="Get Test", + data_use="analytics", + ) + result = service.get_by_fides_key("get_test") + assert result is not None + assert result.name == "Get Test" + + def test_get_by_fides_key_not_found(self, db: Session): + service = DataPurposeService(db) + result = service.get_by_fides_key("nonexistent") + assert result is None + + def test_update_purpose(self, db: Session): + service = DataPurposeService(db) + service.create( + fides_key="update_test", + name="Original Name", + data_use="analytics", + ) + updated = service.update("update_test", name="Updated Name") + assert updated.name == "Updated Name" + + def test_delete_purpose(self, db: Session): + service = DataPurposeService(db) + service.create( + fides_key="delete_test", + name="Delete Me", + data_use="analytics", + ) + service.delete("delete_test") + assert service.get_by_fides_key("delete_test") is None + + def test_delete_blocked_when_in_use(self, db: Session, system): + """Deleting a purpose that's linked to a system should fail.""" + service = DataPurposeService(db) + purpose = service.create( + fides_key="in_use_purpose", + name="In Use", + data_use="analytics", + ) + # Link purpose to system via SystemPurpose + from fides.api.models.system_purpose import SystemPurpose + SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + with pytest.raises(Exception): + service.delete("in_use_purpose") + + def test_force_delete_removes_references(self, db: Session, system): + service = DataPurposeService(db) + purpose = service.create( + fides_key="force_delete_purpose", + name="Force Delete", + data_use="analytics", + ) + from fides.api.models.system_purpose import SystemPurpose + SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + service.delete("force_delete_purpose", force=True) + assert service.get_by_fides_key("force_delete_purpose") is None + + def test_list_query(self, db: Session): + service = DataPurposeService(db) + service.create(fides_key="list_a", name="A", data_use="analytics") + service.create(fides_key="list_b", name="B", data_use="marketing") + results = service.list_query().all() + fides_keys = [r.fides_key for r in results] + assert "list_a" in fides_keys + assert "list_b" in fides_keys + + def test_list_query_filter_by_data_use(self, db: Session): + service = DataPurposeService(db) + service.create(fides_key="filter_a", name="A", data_use="analytics") + service.create(fides_key="filter_b", name="B", data_use="marketing") + results = service.list_query(data_use="analytics").all() + fides_keys = [r.fides_key for r in results] + assert "filter_a" in fides_keys + assert "filter_b" not in fides_keys +``` + +- [ ] **Step 2: Run test to verify it fails** + +Expected: ImportError + +- [ ] **Step 3: Write the DataPurposeService** + +```python +# src/fidesplus/service/data_purpose/data_purpose_service.py +from typing import List, Optional + +from sqlalchemy.orm import Query, Session + +from fides.api.models.sql_models import DataPurpose +from fides.api.models.data_consumer import DataConsumerPurpose +from fides.api.models.system_purpose import SystemPurpose +from fides.api.models.sql_models import DataUse, DataSubject + + +class DataPurposeService: + def __init__(self, db: Session): + self.db = db + + def _validate_data_use(self, data_use: str) -> None: + exists = self.db.query(DataUse).filter_by(fides_key=data_use).first() + if not exists: + raise ValueError(f"Invalid data_use: '{data_use}' not found in taxonomy") + + def _validate_data_subject(self, data_subject: Optional[str]) -> None: + if data_subject: + exists = self.db.query(DataSubject).filter_by(fides_key=data_subject).first() + if not exists: + raise ValueError(f"Invalid data_subject: '{data_subject}' not found in taxonomy") + + def create(self, *, fides_key: str, name: str, data_use: str, **kwargs) -> DataPurpose: + self._validate_data_use(data_use) + self._validate_data_subject(kwargs.get("data_subject")) + return DataPurpose.create( + db=self.db, + data={"fides_key": fides_key, "name": name, "data_use": data_use, **kwargs}, + ) + + def get_by_fides_key(self, fides_key: str) -> Optional[DataPurpose]: + return self.db.query(DataPurpose).filter_by(fides_key=fides_key).first() + + def update(self, fides_key: str, **kwargs) -> DataPurpose: + purpose = self.get_by_fides_key(fides_key) + if not purpose: + raise ValueError(f"DataPurpose '{fides_key}' not found") + if "data_use" in kwargs and kwargs["data_use"] is not None: + self._validate_data_use(kwargs["data_use"]) + if "data_subject" in kwargs: + self._validate_data_subject(kwargs["data_subject"]) + # Filter out None values for partial update + update_data = {k: v for k, v in kwargs.items() if v is not None} + return purpose.update(db=self.db, data=update_data) + + def delete(self, fides_key: str, force: bool = False) -> None: + purpose = self.get_by_fides_key(fides_key) + if not purpose: + raise ValueError(f"DataPurpose '{fides_key}' not found") + if force: + # Remove all join table references first + self.db.query(SystemPurpose).filter_by(data_purpose_id=purpose.id).delete() + self.db.query(DataConsumerPurpose).filter_by(data_purpose_id=purpose.id).delete() + purpose.delete(self.db) + + def list_query( + self, + data_use: Optional[str] = None, + data_subject: Optional[str] = None, + ) -> "Query[DataPurpose]": + """Return a query object for pagination by the route layer.""" + query = self.db.query(DataPurpose) + if data_use: + query = query.filter(DataPurpose.data_use == data_use) + if data_subject: + query = query.filter(DataPurpose.data_subject == data_subject) + return query +``` + +- [ ] **Step 4: Run tests** + +Expected: All 9 tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/fidesplus/service/data_purpose/__init__.py src/fidesplus/service/data_purpose/data_purpose_service.py tests/ops/service/test_data_purpose_service.py +git commit -m "feat: add DataPurposeService with CRUD and validation" +``` + +--- + +### Task 11: DataConsumerService (Facade) + +**Files:** +- Create: `src/fidesplus/service/data_consumer/data_consumer_service.py` +- Test: (fidesplus) `tests/ops/service/test_data_consumer_service.py` + +- [ ] **Step 1: Write the service test** + +Key tests to include: +- `test_create_group_consumer` - creates a non-system consumer +- `test_create_system_type_rejected` - returns error for type=system +- `test_get_non_system_consumer` - fetches by ID from data_consumer table +- `test_get_system_consumer` - fetches from ctl_systems with type=system param +- `test_list_unified` - returns both system and non-system consumers +- `test_list_filter_by_type` - filters by type +- `test_assign_purpose_to_system` - writes to system_purpose +- `test_assign_purpose_to_group` - writes to data_consumer_purpose +- `test_remove_purpose_from_system` - deletes from system_purpose +- `test_replace_purposes` - replaces all purposes for a consumer +- `test_system_response_coercion` - tags coalesced to [], type hardcoded to "system" + +- [ ] **Step 2: Write the DataConsumerService** + +The service should implement: +- `create(data: DataConsumerCreate) -> DataConsumerResponse` - validates type != system, creates in data_consumer table +- `get(id: str, type: Optional[str] = None) -> DataConsumerResponse` - if type=system, query ctl_systems; else query data_consumer +- `list(type: Optional[str], purpose_fides_key: Optional[str], tags: Optional[List[str]]) -> List[DataConsumerResponse]` - query both sources, merge +- `update(id: str, data: DataConsumerUpdate) -> DataConsumerResponse` - non-system only +- `delete(id: str) -> None` - non-system only +- `assign_purposes(id: str, type: Optional[str], purpose_fides_keys: List[str], assigned_by: Optional[str]) -> DataConsumerResponse` - replace semantics +- `add_purpose(id: str, type: Optional[str], purpose_fides_key: str, assigned_by: Optional[str]) -> DataConsumerResponse` +- `remove_purpose(id: str, type: Optional[str], purpose_fides_key: str) -> DataConsumerResponse` +- `_system_to_response(system: System) -> DataConsumerResponse` - facade mapper with coercion: + +```python +def _system_to_response(self, system: System) -> DataConsumerResponse: + """Coerce a System row into the unified DataConsumerResponse schema.""" + purposes = [] + for sp in system.system_purposes: + purposes.append(DataPurposeResponse.model_validate(sp.data_purpose)) + + # Note: data_shared_with_third_parties, third_parties, and shared_categories + # could be populated from the system's privacy declarations in a future phase. + # For Phase 1, these are set to None for system-type consumers. + return DataConsumerResponse( + id=system.id, + name=system.name or system.fides_key, + description=system.description, + type="system", + external_id=None, + purposes=purposes, + system_fides_key=system.fides_key, + vendor_id=getattr(system, "vendor_id", None), + egress=system.egress if isinstance(system.egress, dict) else None, + ingress=system.ingress if isinstance(system.ingress, dict) else None, + data_shared_with_third_parties=None, + third_parties=None, + shared_categories=None, + tags=system.tags or [], # Coalesce None to [] + contact_email=None, + contact_slack_channel=None, + contact_details=None, + created_at=system.created_at, + updated_at=system.updated_at, + ) +``` + +- [ ] **Step 3: Run tests** + +Expected: All tests PASS + +- [ ] **Step 4: Commit** + +```bash +git add src/fidesplus/service/data_consumer/__init__.py src/fidesplus/service/data_consumer/data_consumer_service.py tests/ops/service/test_data_consumer_service.py +git commit -m "feat: add DataConsumerService with facade pattern" +``` + +--- + +### Task 12: DataProducerService + +**Files:** +- Create: `src/fidesplus/service/data_producer/data_producer_service.py` +- Test: (fidesplus) `tests/ops/service/test_data_producer_service.py` + +- [ ] **Step 1: Write the service test** + +Key tests: +- `test_create_producer` +- `test_get_producer` +- `test_update_producer` +- `test_delete_producer_nullifies_datasets` +- `test_add_member` +- `test_remove_member` +- `test_replace_members` +- `test_assign_dataset` +- `test_validate_monitor_id` + +- [ ] **Step 2: Write the DataProducerService** + +The service should implement: +- `create(data: DataProducerCreate) -> DataProducer` +- `get(id: str) -> Optional[DataProducer]` +- `list_query() -> Query[DataProducer]` (returns query for pagination, consistent with DataPurposeService) +- `update(id: str, data: DataProducerUpdate) -> DataProducer` +- `delete(id: str) -> None` +- `set_members(id: str, user_ids: List[str]) -> DataProducer` - replace semantics +- `add_member(id: str, user_id: str) -> DataProducer` +- `remove_member(id: str, user_id: str) -> DataProducer` + +- [ ] **Step 3: Run tests** + +Expected: All tests PASS + +- [ ] **Step 4: Commit** + +```bash +git add src/fidesplus/service/data_producer/__init__.py src/fidesplus/service/data_producer/data_producer_service.py tests/ops/service/test_data_producer_service.py +git commit -m "feat: add DataProducerService with member management" +``` + +--- + +## Chunk 3: API Routes (fidesplus) + +### Task 13: Register Scopes and URN Paths in Fidesplus + +**Files:** +- Modify: `src/fidesplus/api/plus_scope_registry.py` +- Modify: `src/fidesplus/api/urn_registry.py` + +- [ ] **Step 1: Define scope constants and register with role mappings** + +In `src/fidesplus/api/plus_scope_registry.py`, define the new scope constants (following the existing pattern for fidesplus-only features like `SYSTEM_GROUP`, `DISCOVERY_MONITOR`, etc.): + +```python +# Data Purpose scopes +DATA_PURPOSE = "data_purpose" +DATA_PURPOSE_CREATE = f"{DATA_PURPOSE}:{CREATE}" +DATA_PURPOSE_READ = f"{DATA_PURPOSE}:{READ}" +DATA_PURPOSE_UPDATE = f"{DATA_PURPOSE}:{UPDATE}" +DATA_PURPOSE_DELETE = f"{DATA_PURPOSE}:{DELETE}" + +# Data Consumer scopes +DATA_CONSUMER = "data_consumer" +DATA_CONSUMER_CREATE = f"{DATA_CONSUMER}:{CREATE}" +DATA_CONSUMER_READ = f"{DATA_CONSUMER}:{READ}" +DATA_CONSUMER_UPDATE = f"{DATA_CONSUMER}:{UPDATE}" +DATA_CONSUMER_DELETE = f"{DATA_CONSUMER}:{DELETE}" + +# Data Producer scopes +DATA_PRODUCER = "data_producer" +DATA_PRODUCER_CREATE = f"{DATA_PRODUCER}:{CREATE}" +DATA_PRODUCER_READ = f"{DATA_PRODUCER}:{READ}" +DATA_PRODUCER_UPDATE = f"{DATA_PRODUCER}:{UPDATE}" +DATA_PRODUCER_DELETE = f"{DATA_PRODUCER}:{DELETE}" +``` + +Add to `UPDATED_SCOPE_DOCS`: +```python +DATA_PURPOSE_CREATE: "Create data purposes", +DATA_PURPOSE_READ: "Read data purposes", +DATA_PURPOSE_UPDATE: "Update data purposes", +DATA_PURPOSE_DELETE: "Delete data purposes", +DATA_CONSUMER_CREATE: "Create data consumers", +DATA_CONSUMER_READ: "Read data consumers", +DATA_CONSUMER_UPDATE: "Update data consumers", +DATA_CONSUMER_DELETE: "Delete data consumers", +DATA_PRODUCER_CREATE: "Create data producers", +DATA_PRODUCER_READ: "Read data producers", +DATA_PRODUCER_UPDATE: "Update data producers", +DATA_PRODUCER_DELETE: "Delete data producers", +``` + +Add all 12 scopes to `PLUS_OWNER_SCOPES`. Add the READ scopes to `PLUS_VIEWER_SCOPES`. Add CREATE/READ/UPDATE to `PLUS_CONTRIBUTOR_SCOPES`. Follow the existing pattern in the file for how scopes are grouped by role. + +- [ ] **Step 2: Add URN registry entries** + +In `src/fidesplus/api/urn_registry.py`, add URL path constants: + +```python +DATA_PURPOSE = "/data-purpose" +DATA_PURPOSE_DETAIL = "/data-purpose/{fides_key}" +DATA_CONSUMER = "/data-consumer" +DATA_CONSUMER_DETAIL = "/data-consumer/{id}" +DATA_CONSUMER_PURPOSE = "/data-consumer/{id}/purpose" +DATA_CONSUMER_PURPOSE_DETAIL = "/data-consumer/{id}/purpose/{fides_key}" +DATA_PRODUCER = "/data-producer" +DATA_PRODUCER_DETAIL = "/data-producer/{id}" +DATA_PRODUCER_MEMBER = "/data-producer/{id}/member" +DATA_PRODUCER_MEMBER_DETAIL = "/data-producer/{id}/member/{user_id}" +``` + +- [ ] **Step 3: Commit** + +```bash +git add src/fidesplus/api/plus_scope_registry.py src/fidesplus/api/urn_registry.py +git commit -m "feat: register purpose-based data model scopes and URN paths in fidesplus" +``` + +--- + +### Task 14: DataPurpose Routes + +**Files:** +- Create: `src/fidesplus/api/routes/data_purpose.py` +- Test: (fidesplus) `tests/ops/api/test_data_purpose_api.py` + +- [ ] **Step 1: Write the API test** + +Key tests: +- `test_create_purpose` - POST /data-purpose, 201 +- `test_create_purpose_invalid_data_use` - POST, 422 +- `test_get_purpose` - GET /data-purpose/{fides_key}, 200 +- `test_get_purpose_not_found` - GET, 404 +- `test_list_purposes` - GET /data-purpose, 200 +- `test_list_filter_by_data_use` - GET /data-purpose?data_use=analytics, 200 +- `test_update_purpose` - PUT /data-purpose/{fides_key}, 200 +- `test_delete_purpose` - DELETE /data-purpose/{fides_key}, 204 +- `test_delete_purpose_in_use` - DELETE, 409 +- `test_delete_purpose_force` - DELETE ?force=true, 204 +- `test_unauthorized` - all endpoints without auth, 401 +- `test_feature_flag_off` - endpoints return 404 when disabled + +- [ ] **Step 2: Write the routes** + +**Important patterns:** Use `fides.api.util.api_router.APIRouter` (not plain `fastapi.APIRouter`). Use `Optional[str]` (not `str | None`). Add `require_purpose_model_enabled` as a router-level dependency so all routes return 404 when the feature flag is off. Use `fastapi_pagination.ext.sqlalchemy.paginate()` for pagination. + +```python +# src/fidesplus/api/routes/data_purpose.py +from typing import Optional + +from fastapi import Depends, HTTPException, Query, Security +from fastapi_pagination import Page, Params +from fastapi_pagination.ext.sqlalchemy import paginate +from sqlalchemy.orm import Session +from starlette.status import HTTP_201_CREATED, HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND + +from fides.api.deps import get_db +from fides.api.schemas.data_purpose import DataPurposeCreate, DataPurposeResponse, DataPurposeUpdate +from fides.api.util.api_router import APIRouter +from fidesplus.api.plus_scope_registry import DATA_PURPOSE_CREATE, DATA_PURPOSE_DELETE, DATA_PURPOSE_READ, DATA_PURPOSE_UPDATE +from fidesplus.api.deps import require_purpose_model_enabled +from fidesplus.api.plus_scope_registry import verify_oauth_client_plus +from fidesplus.service.data_purpose.data_purpose_service import DataPurposeService + +router = APIRouter( + prefix="/data-purpose", + tags=["Data Purpose"], + dependencies=[Depends(require_purpose_model_enabled)], +) + + +@router.post( + "", + response_model=DataPurposeResponse, + status_code=HTTP_201_CREATED, + dependencies=[Security(verify_oauth_client_plus, scopes=[DATA_PURPOSE_CREATE])], +) +def create_data_purpose( + data: DataPurposeCreate, + db: Session = Depends(get_db), +) -> DataPurposeResponse: + service = DataPurposeService(db) + return service.create(**data.model_dump()) + + +@router.get( + "", + response_model=Page[DataPurposeResponse], + dependencies=[Security(verify_oauth_client_plus, scopes=[DATA_PURPOSE_READ])], +) +def list_data_purposes( + data_use: Optional[str] = Query(None), + data_subject: Optional[str] = Query(None), + db: Session = Depends(get_db), + params: Params = Depends(), +) -> Page[DataPurposeResponse]: + service = DataPurposeService(db) + query = service.list_query(data_use=data_use, data_subject=data_subject) + return paginate(query, params) + + +@router.get( + "/{fides_key}", + response_model=DataPurposeResponse, + dependencies=[Security(verify_oauth_client_plus, scopes=[DATA_PURPOSE_READ])], +) +def get_data_purpose( + fides_key: str, + db: Session = Depends(get_db), +) -> DataPurposeResponse: + service = DataPurposeService(db) + purpose = service.get_by_fides_key(fides_key) + if not purpose: + raise HTTPException(status_code=HTTP_404_NOT_FOUND) + return purpose + + +@router.put( + "/{fides_key}", + response_model=DataPurposeResponse, + dependencies=[Security(verify_oauth_client_plus, scopes=[DATA_PURPOSE_UPDATE])], +) +def update_data_purpose( + fides_key: str, + data: DataPurposeUpdate, + db: Session = Depends(get_db), +) -> DataPurposeResponse: + service = DataPurposeService(db) + return service.update(fides_key, **data.model_dump(exclude_unset=True)) + + +@router.delete( + "/{fides_key}", + status_code=HTTP_204_NO_CONTENT, + dependencies=[Security(verify_oauth_client_plus, scopes=[DATA_PURPOSE_DELETE])], +) +def delete_data_purpose( + fides_key: str, + force: bool = Query(False), + db: Session = Depends(get_db), +) -> None: + service = DataPurposeService(db) + service.delete(fides_key, force=force) +``` + +- [ ] **Step 3: Mount the router** + +In `src/fidesplus/main.py:prepare_plus_app()`, add to the `plus_router` registrations (routes will be under `/api/v1/plus/`): + +```python +from fidesplus.api.routes.data_purpose import router as data_purpose_router +plus_router.include_router(data_purpose_router) +``` + +- [ ] **Step 4: Run API tests** + +Expected: All tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/fidesplus/api/routes/data_purpose.py tests/ops/api/test_data_purpose_api.py +git commit -m "feat: add DataPurpose CRUD API routes" +``` + +--- + +### Task 15: DataConsumer Routes + +**Files:** +- Create: `src/fidesplus/api/routes/data_consumer.py` +- Test: (fidesplus) `tests/ops/api/test_data_consumer_api.py` + +- [ ] **Step 1: Write the API test** + +Key tests: +- `test_create_group_consumer` - POST /data-consumer with type=group, 201 +- `test_create_system_type_rejected` - POST with type=system, 400 +- `test_get_non_system_consumer` - GET /data-consumer/{id}, 200 +- `test_get_system_consumer` - GET /data-consumer/{id}?type=system, 200 +- `test_get_without_type_param_not_found_for_system` - GET /data-consumer/{system_id} (no type param), 404 +- `test_list_consumers_unified` - GET /data-consumer, returns both types +- `test_list_filter_by_type` - GET /data-consumer?type=group +- `test_update_non_system` - PUT /data-consumer/{id}, 200 +- `test_update_system_type_rejected` - PUT with system id + ?type=system, 400 +- `test_delete_non_system` - DELETE /data-consumer/{id}, 204 +- `test_assign_purposes_to_system` - PUT /data-consumer/{id}/purpose?type=system, 200 +- `test_assign_purposes_to_group` - PUT /data-consumer/{id}/purpose, 200 +- `test_add_single_purpose` - POST /data-consumer/{id}/purpose/{fides_key}, 200 +- `test_remove_purpose` - DELETE /data-consumer/{id}/purpose/{fides_key}, 204 + +- [ ] **Step 2: Write the routes** + +**Important patterns:** Use `fides.api.util.api_router.APIRouter` (not plain `fastapi.APIRouter`). Use `Optional[str]` (not `str | None`). Add `require_purpose_model_enabled` as a router-level dependency. Use `fastapi_pagination.ext.sqlalchemy.paginate()` for the list endpoint. The `?type=system` query param controls routing to the facade. + +```python +# src/fidesplus/api/routes/data_consumer.py +from typing import Optional + +from fastapi import Depends, HTTPException, Query, Security +from fastapi_pagination import Page, Params +from sqlalchemy.orm import Session +from starlette.status import HTTP_201_CREATED, HTTP_204_NO_CONTENT, HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND + +from fides.api.deps import get_db +from fides.api.schemas.data_consumer import ( + DataConsumerCreate, + DataConsumerPurposeAssignment, + DataConsumerResponse, + DataConsumerUpdate, +) +from fides.api.util.api_router import APIRouter +from fidesplus.api.plus_scope_registry import ( + DATA_CONSUMER_CREATE, + DATA_CONSUMER_DELETE, + DATA_CONSUMER_READ, + DATA_CONSUMER_UPDATE, +) +from fidesplus.api.deps import require_purpose_model_enabled +from fidesplus.api.plus_scope_registry import verify_oauth_client_plus +from fidesplus.service.data_consumer.data_consumer_service import DataConsumerService + +router = APIRouter( + prefix="/data-consumer", + tags=["Data Consumer"], + dependencies=[Depends(require_purpose_model_enabled)], +) +``` + +Implement all CRUD routes + purpose assignment sub-routes (`PUT /{id}/purpose`, `POST /{id}/purpose/{fides_key}`, `DELETE /{id}/purpose/{fides_key}`). System-type consumers are read-only (no create/update/delete); the `?type=system` query param triggers the facade path. + +**Important:** Purpose assignment endpoints for system-type consumers (`?type=system`) must additionally verify the `system:update` scope (`SYSTEM_UPDATE` from `fides.common.scope_registry`). Add a runtime scope check in the route handler: + +```python +from fides.common.scope_registry import SYSTEM_UPDATE + +# In purpose assignment handlers, when type == "system": +if consumer_type == "system": + # verify_oauth_client_plus already ran via Security dependency; + # additionally check SYSTEM_UPDATE scope + verify_oauth_client_plus(security_scopes=SecurityScopes(scopes=[SYSTEM_UPDATE]), authorization=authorization) +``` + +- [ ] **Step 3: Mount the router** + +In the fidesplus router registration file, add: +```python +from fidesplus.api.routes.data_consumer import router as data_consumer_router +plus_router.include_router(data_consumer_router) +``` + +- [ ] **Step 4: Run API tests** + +Expected: All tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/fidesplus/api/routes/data_consumer.py tests/ops/api/test_data_consumer_api.py +git commit -m "feat: add DataConsumer CRUD and purpose assignment API routes" +``` + +--- + +### Task 16: DataProducer Routes + +**Files:** +- Create: `src/fidesplus/api/routes/data_producer.py` +- Test: (fidesplus) `tests/ops/api/test_data_producer_api.py` + +- [ ] **Step 1: Write the API test** + +Key tests: +- `test_create_producer` - POST /data-producer, 201 +- `test_get_producer` - GET /data-producer/{id}, 200 +- `test_list_producers` - GET /data-producer, 200 +- `test_update_producer` - PUT /data-producer/{id}, 200 +- `test_delete_producer` - DELETE /data-producer/{id}, 204 +- `test_set_members` - PUT /data-producer/{id}/member, 200 +- `test_add_member` - POST /data-producer/{id}/member/{user_id}, 200 +- `test_remove_member` - DELETE /data-producer/{id}/member/{user_id}, 204 + +- [ ] **Step 2: Write the routes** + +**Important patterns:** Use `fides.api.util.api_router.APIRouter` (not plain `fastapi.APIRouter`). Use `Optional[str]` (not `str | None`). Add `require_purpose_model_enabled` as a router-level dependency. Use `fastapi_pagination.ext.sqlalchemy.paginate()` for the list endpoint. + +```python +# src/fidesplus/api/routes/data_producer.py +from typing import Optional + +from fastapi import Depends, HTTPException, Security +from fastapi_pagination import Page, Params +from sqlalchemy.orm import Session +from starlette.status import HTTP_201_CREATED, HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND + +from fides.api.deps import get_db +from fides.api.schemas.data_producer import ( + DataProducerCreate, + DataProducerMemberAssignment, + DataProducerResponse, + DataProducerUpdate, +) +from fides.api.util.api_router import APIRouter +from fidesplus.api.plus_scope_registry import ( + DATA_PRODUCER_CREATE, + DATA_PRODUCER_DELETE, + DATA_PRODUCER_READ, + DATA_PRODUCER_UPDATE, +) +from fidesplus.api.deps import require_purpose_model_enabled +from fidesplus.api.plus_scope_registry import verify_oauth_client_plus +from fidesplus.service.data_producer.data_producer_service import DataProducerService + +router = APIRouter( + prefix="/data-producer", + tags=["Data Producer"], + dependencies=[Depends(require_purpose_model_enabled)], +) +``` + +Implement CRUD routes + member management sub-routes (`PUT /{id}/member`, `POST /{id}/member/{user_id}`, `DELETE /{id}/member/{user_id}`). + +- [ ] **Step 3: Mount the router** + +In the fidesplus router registration file, add: +```python +from fidesplus.api.routes.data_producer import router as data_producer_router +plus_router.include_router(data_producer_router) +``` + +- [ ] **Step 4: Run API tests** + +Expected: All tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/fidesplus/api/routes/data_producer.py tests/ops/api/test_data_producer_api.py +git commit -m "feat: add DataProducer CRUD and member management API routes" +``` + +--- + +## Chunk 4: Dataset Integration and Final Verification + +### Task 17: Dataset Purpose Validation in Write Path + +**Files:** +- Modify: existing dataset service/endpoint in fidesplus (find the dataset write handler) +- Test: extend existing dataset API tests + +- [ ] **Step 1: Find the dataset write handler** + +Locate where dataset create/update is handled in fidesplus. The handler needs to be extended to: +1. Validate `data_purposes` fides_key strings at dataset level against `data_purpose` table +2. Validate `data_purposes` strings within collection/field/sub-field JSON +3. Validate `data_producer_id` references a valid `DataProducer` +4. Pass through when feature flag is off (ignore new fields) + +- [ ] **Step 2: Write the validation test** + +Key tests: +- `test_create_dataset_with_valid_purposes` - purposes validated, stored +- `test_create_dataset_with_invalid_purpose` - 422 for nonexistent fides_key +- `test_create_dataset_with_collection_purposes` - collection-level purposes in JSON +- `test_create_dataset_with_field_purposes` - field-level purposes in JSON +- `test_create_dataset_with_producer` - valid data_producer_id +- `test_create_dataset_with_invalid_producer` - 422 for nonexistent producer +- `test_existing_dataset_payloads_unaffected` - backward compatibility +- `test_feature_flag_off_ignores_purposes` - new fields stripped when disabled + +- [ ] **Step 3: Implement the validation** + +Add purpose validation to the dataset write path. Extract a helper that recursively validates `data_purposes` arrays at all levels of the collections JSON. + +```python +def validate_dataset_purposes(db: Session, dataset_data: dict) -> None: + """Validate all data_purposes references in a dataset payload.""" + all_purpose_keys = set() + + # Dataset-level purposes + if dataset_data.get("data_purposes"): + all_purpose_keys.update(dataset_data["data_purposes"]) + + # Collection/field/sub-field purposes (recursive) + def collect_purposes(obj): + if isinstance(obj, dict): + if obj.get("data_purposes"): + all_purpose_keys.update(obj["data_purposes"]) + for value in obj.values(): + collect_purposes(value) + elif isinstance(obj, list): + for item in obj: + collect_purposes(item) + + collect_purposes(dataset_data.get("collections", [])) + + # Validate all collected keys exist + if all_purpose_keys: + existing = { + p.fides_key + for p in db.query(DataPurpose.fides_key) + .filter(DataPurpose.fides_key.in_(all_purpose_keys)) + .all() + } + missing = all_purpose_keys - existing + if missing: + raise ValueError(f"Invalid data_purposes references: {missing}") +``` + +- [ ] **Step 4: Run tests** + +Expected: All tests PASS + +- [ ] **Step 5: Commit** + +```bash +# Add the specific files modified for dataset purpose validation +git add src/fidesplus/service/dataset/ tests/ops/api/test_dataset_purpose_validation.py +git commit -m "feat: add dataset purpose validation in write path" +``` + +--- + +### Task 18: End-to-End Verification + +- [ ] **Step 1: Run the full model test suite** + +```bash +nox -s "pytest(ops-unit)" -- tests/ops/models/test_data_purpose.py tests/ops/models/test_data_consumer.py tests/ops/models/test_system_purpose.py tests/ops/models/test_data_producer.py tests/ops/models/test_dataset_purposes.py -v +``` + +Expected: All model tests PASS + +- [ ] **Step 2: Run static checks** + +```bash +nox -s static_checks +``` + +Expected: ruff format, ruff lint, mypy all pass + +- [ ] **Step 3: Run the full fidesplus service + API test suite** + +Run all new service and API tests together. + +Expected: All tests PASS + +- [ ] **Step 4: Check migrations** + +```bash +nox -s check_migrations +``` + +Expected: No missing migrations + +- [ ] **Step 5: Run existing test suites to check backward compatibility** + +```bash +nox -s "pytest(ops-unit)" -- tests/ops/models/ -v --timeout=300 +nox -s "pytest(ctl-unit)" -- tests/ctl/ -v --timeout=300 +``` + +Expected: No regressions in existing tests + +- [ ] **Step 6: Final commit (if any fixups needed)** + +```bash +git commit -m "fix: address any issues found during e2e verification" +``` + +--- + +## Summary + +| Chunk | Tasks | What it delivers | +|-------|-------|-----------------| +| **1: Models & Migration** | Tasks 1-8 | All SQLAlchemy models, Alembic migration, Pydantic schemas, permission scopes | +| **2: Services** | Tasks 9-12 | Feature flag, DataPurposeService, DataConsumerService (facade), DataProducerService | +| **3: API Routes** | Tasks 13-16 | All REST endpoints mounted and tested | +| **4: Integration** | Tasks 17-18 | Dataset purpose validation, end-to-end verification, backward compat confirmation | From d08360ce20ca8f5d6d4710c77c4967d2f66cf7c0 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:19:27 -0400 Subject: [PATCH 05/17] refactor: move DataPurpose model to its own file DataPurpose goes in src/fides/api/models/data_purpose.py instead of sql_models.py. Imports FidesBase from sql_models without circular dependency since sql_models doesn't import back. Co-Authored-By: Claude Opus 4.6 --- .../2026-03-11-purpose-based-data-model.md | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md b/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md index e3a8b3ae4ad..4f29f811bca 100644 --- a/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md +++ b/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md @@ -18,7 +18,8 @@ | Action | File | Responsibility | |--------|------|---------------| -| Modify | `src/fides/api/models/sql_models.py` | Add DataPurpose model (alongside other FidesBase models), add Dataset columns, add System relationship | +| Create | `src/fides/api/models/data_purpose.py` | DataPurpose model | +| Modify | `src/fides/api/models/sql_models.py` | Add Dataset columns, add System relationship | | Create | `src/fides/api/models/data_consumer.py` | DataConsumer model, DataConsumerPurpose join table | | Create | `src/fides/api/models/system_purpose.py` | SystemPurpose join table | | Create | `src/fides/api/models/data_producer.py` | DataProducer model, DataProducerMember join table | @@ -71,7 +72,7 @@ ### Task 1: DataPurpose Model **Files:** -- Modify: `src/fides/api/models/sql_models.py` (add DataPurpose class) +- Create: `src/fides/api/models/data_purpose.py` - Test: `tests/ops/models/test_data_purpose.py` - [ ] **Step 1: Write the model test** @@ -81,7 +82,7 @@ import pytest from sqlalchemy.orm import Session -from fides.api.models.sql_models import DataPurpose +from fides.api.models.data_purpose import DataPurpose @pytest.mark.postgres @@ -168,10 +169,18 @@ Expected: ImportError (module does not exist yet) **Important:** `FidesBase` (the mixin providing `fides_key`, `name`, `description`, `organization_fides_key`, `tags`) is defined in `sql_models.py`, NOT in `base_class.py`. The `Base` class from `base_class.py` provides `id`, `created_at`, `updated_at`, and CRUD methods. Models inheriting both get a composite primary key (`id` + `fides_key`). This is the same pattern used by `System`, `Dataset`, `DataCategory`, etc. Join tables reference `data_purpose.id` (the UUID column), which works because `id` is part of the composite PK. -Add the `DataPurpose` class to `src/fides/api/models/sql_models.py` (alongside the other FidesBase models) to avoid circular import issues: +Create `src/fides/api/models/data_purpose.py`. Import `FidesBase` from `sql_models` and `Base` from `base_class`. There is no circular import since `sql_models.py` does not import from `data_purpose.py`. ```python -# Add to src/fides/api/models/sql_models.py after the Dataset class +# src/fides/api/models/data_purpose.py +from typing import Any + +from sqlalchemy import ARRAY, Boolean, Column, String +from sqlalchemy.orm import Session + +from fides.api.db.base_class import Base +from fides.api.models.sql_models import FidesBase + class DataPurpose(Base, FidesBase): """ @@ -215,7 +224,7 @@ Columns inherited from `Base` (do NOT redeclare): `id`, `created_at`, `updated_a Add to `src/fides/api/db/base.py`: ```python -from fides.api.models.sql_models import DataPurpose # noqa: F401 +from fides.api.models.data_purpose import DataPurpose # noqa: F401 ``` - [ ] **Step 5: Run tests to verify they pass** @@ -226,7 +235,7 @@ Expected: All 4 tests PASS - [ ] **Step 6: Commit** ```bash -git add src/fides/api/models/sql_models.py src/fides/api/db/base.py tests/ops/models/test_data_purpose.py +git add src/fides/api/models/data_purpose.py src/fides/api/db/base.py tests/ops/models/test_data_purpose.py git commit -m "feat: add DataPurpose model with unit tests" ``` @@ -246,7 +255,7 @@ git commit -m "feat: add DataPurpose model with unit tests" import pytest from sqlalchemy.orm import Session -from fides.api.models.sql_models import DataPurpose +from fides.api.models.data_purpose import DataPurpose from fides.api.models.system_purpose import SystemPurpose from fides.api.models.sql_models import System @@ -402,7 +411,7 @@ import pytest from sqlalchemy.orm import Session from fides.api.models.data_consumer import DataConsumer, DataConsumerPurpose -from fides.api.models.sql_models import DataPurpose +from fides.api.models.data_purpose import DataPurpose @pytest.mark.postgres @@ -861,7 +870,7 @@ import pytest from sqlalchemy.orm import Session from fides.api.models.sql_models import Dataset -from fides.api.models.sql_models import DataPurpose +from fides.api.models.data_purpose import DataPurpose from fides.api.models.data_producer import DataProducer @@ -1322,7 +1331,7 @@ import pytest from sqlalchemy.orm import Session from fidesplus.service.data_purpose.data_purpose_service import DataPurposeService -from fides.api.models.sql_models import DataPurpose +from fides.api.models.data_purpose import DataPurpose @pytest.mark.integration @@ -1451,7 +1460,7 @@ from typing import List, Optional from sqlalchemy.orm import Query, Session -from fides.api.models.sql_models import DataPurpose +from fides.api.models.data_purpose import DataPurpose from fides.api.models.data_consumer import DataConsumerPurpose from fides.api.models.system_purpose import SystemPurpose from fides.api.models.sql_models import DataUse, DataSubject From fdfe6a2a6aa9ecb2de60797b2dd5c67dcec6df3d Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:20:28 -0400 Subject: [PATCH 06/17] fix: remove unnecessary @pytest.mark.postgres from model tests Model tests use the standard db session fixture and don't need the postgres marker. Co-Authored-By: Claude Opus 4.6 --- .../plans/2026-03-11-purpose-based-data-model.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md b/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md index 4f29f811bca..4b4771b2a85 100644 --- a/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md +++ b/docs/superpowers/plans/2026-03-11-purpose-based-data-model.md @@ -85,7 +85,6 @@ from sqlalchemy.orm import Session from fides.api.models.data_purpose import DataPurpose -@pytest.mark.postgres class TestDataPurposeModel: def test_create_data_purpose(self, db: Session): purpose = DataPurpose.create( @@ -260,7 +259,6 @@ from fides.api.models.system_purpose import SystemPurpose from fides.api.models.sql_models import System -@pytest.mark.postgres class TestSystemPurposeModel: @pytest.fixture def purpose(self, db: Session) -> DataPurpose: @@ -414,7 +412,6 @@ from fides.api.models.data_consumer import DataConsumer, DataConsumerPurpose from fides.api.models.data_purpose import DataPurpose -@pytest.mark.postgres class TestDataConsumerModel: def test_create_group_consumer(self, db: Session): consumer = DataConsumer.create( @@ -466,7 +463,6 @@ class TestDataConsumerModel: assert consumer.type == "data_warehouse" -@pytest.mark.postgres class TestDataConsumerPurposeModel: @pytest.fixture def purpose(self, db: Session) -> DataPurpose: @@ -664,7 +660,6 @@ from sqlalchemy.orm import Session from fides.api.models.data_producer import DataProducer, DataProducerMember -@pytest.mark.postgres class TestDataProducerModel: def test_create_data_producer(self, db: Session): producer = DataProducer.create( @@ -699,7 +694,6 @@ class TestDataProducerModel: assert db.query(DataProducer).filter_by(id=producer_id).first() is None -@pytest.mark.postgres class TestDataProducerMemberModel: @pytest.fixture def producer(self, db: Session) -> DataProducer: @@ -874,7 +868,6 @@ from fides.api.models.data_purpose import DataPurpose from fides.api.models.data_producer import DataProducer -@pytest.mark.postgres class TestDatasetPurposes: def test_dataset_with_purposes(self, db: Session): dataset = Dataset.create( From bcd4d76e7a653fd6241441cc081f7b8836b8cc16 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:41:44 -0400 Subject: [PATCH 07/17] feat: add DataPurpose model with unit tests Introduces the DataPurpose SQLAlchemy model as a centrally-governed, reusable declaration of why data is processed. Follows the same pattern as DataCategory/DataUse (Base + FidesBase, fides_key uniqueness enforced in create override). Registers the model in base.py for Alembic discovery. Co-Authored-By: Claude Sonnet 4.6 --- src/fides/api/db/base.py | 1 + src/fides/api/models/data_purpose.py | 53 ++++++++++++++++++ tests/ops/models/test_data_purpose.py | 78 +++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 src/fides/api/models/data_purpose.py create mode 100644 tests/ops/models/test_data_purpose.py diff --git a/src/fides/api/db/base.py b/src/fides/api/db/base.py index 9ea3b330f0f..e0bbe7e1eea 100644 --- a/src/fides/api/db/base.py +++ b/src/fides/api/db/base.py @@ -16,6 +16,7 @@ from fides.api.models.custom_asset import CustomAsset from fides.api.models.custom_connector_template import CustomConnectorTemplate from fides.api.models.custom_report import CustomReport +from fides.api.models.data_purpose import DataPurpose # noqa: F401 from fides.api.models.datasetconfig import DatasetConfig from fides.api.models.db_cache import DBCache from fides.api.models.detection_discovery.core import MonitorConfig, StagedResource diff --git a/src/fides/api/models/data_purpose.py b/src/fides/api/models/data_purpose.py new file mode 100644 index 00000000000..35a6fcc74d7 --- /dev/null +++ b/src/fides/api/models/data_purpose.py @@ -0,0 +1,53 @@ +from typing import Any, Type + +from sqlalchemy import Boolean, Column, String +from sqlalchemy.dialects.postgresql import ARRAY +from sqlalchemy.orm import Session + +from fides.api.common_exceptions import KeyOrNameAlreadyExists +from fides.api.db.base_class import Base +from fides.api.models.sql_models import FidesBase + + +class DataPurpose(Base, FidesBase): + """ + A standalone, reusable declaration of why data is processed. + + Replaces the system-bound PrivacyDeclaration with a centrally-governed + entity. Flat (no hierarchy) but inherits FidesBase for fides_key, name, + description, organization_fides_key, and tags. + """ + + __tablename__ = "data_purpose" + + data_use = Column(String, nullable=False, index=True) + data_subject = Column(String, nullable=True) + data_categories = Column(ARRAY(String), server_default="{}", nullable=False) + legal_basis_for_processing = Column(String, nullable=True) + flexible_legal_basis_for_processing = Column( + Boolean, server_default="t", nullable=False + ) + special_category_legal_basis = Column(String, nullable=True) + impact_assessment_location = Column(String, nullable=True) + retention_period = Column(String, nullable=True) + features = Column(ARRAY(String), server_default="{}", nullable=False) + + @classmethod + def create( + cls: Type["DataPurpose"], + db: Session, + *, + data: dict[str, Any], + check_name: bool = False, + ) -> "DataPurpose": + """Override create to enforce fides_key uniqueness and skip name uniqueness check. + + DataPurpose uses fides_key for uniqueness, not name. + """ + if "fides_key" in data and data["fides_key"]: + existing = db.query(cls).filter(cls.fides_key == data["fides_key"]).first() + if existing: + raise KeyOrNameAlreadyExists( + f'DataPurpose with fides_key "{data["fides_key"]}" already exists.' + ) + return super().create(db=db, data=data, check_name=False) diff --git a/tests/ops/models/test_data_purpose.py b/tests/ops/models/test_data_purpose.py new file mode 100644 index 00000000000..74315e39360 --- /dev/null +++ b/tests/ops/models/test_data_purpose.py @@ -0,0 +1,78 @@ +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.data_purpose import DataPurpose + + +class TestDataPurposeModel: + def test_create_data_purpose(self, db: Session): + purpose = DataPurpose.create( + db=db, + data={ + "fides_key": "marketing_email", + "name": "Email Marketing", + "description": "Processing for email marketing campaigns", + "data_use": "marketing.advertising", + "data_subject": "customer", + "data_categories": ["user.contact.email"], + "legal_basis_for_processing": "Consent", + "flexible_legal_basis_for_processing": True, + "retention_period": "90 days", + "features": ["email_targeting"], + }, + ) + assert purpose.fides_key == "marketing_email" + assert purpose.data_use == "marketing.advertising" + assert purpose.data_subject == "customer" + assert purpose.data_categories == ["user.contact.email"] + assert purpose.flexible_legal_basis_for_processing is True + assert purpose.features == ["email_targeting"] + assert purpose.id is not None + assert purpose.created_at is not None + + def test_create_minimal_data_purpose(self, db: Session): + """Only fides_key, name, and data_use are required.""" + purpose = DataPurpose.create( + db=db, + data={ + "fides_key": "analytics_basic", + "name": "Basic Analytics", + "data_use": "analytics", + }, + ) + assert purpose.fides_key == "analytics_basic" + assert purpose.data_subject is None + assert purpose.data_categories == [] + assert purpose.legal_basis_for_processing is None + + def test_fides_key_uniqueness(self, db: Session): + DataPurpose.create( + db=db, + data={ + "fides_key": "unique_purpose", + "name": "Purpose A", + "data_use": "analytics", + }, + ) + with pytest.raises(Exception): + DataPurpose.create( + db=db, + data={ + "fides_key": "unique_purpose", + "name": "Purpose B", + "data_use": "marketing", + }, + ) + + def test_delete_data_purpose(self, db: Session): + purpose = DataPurpose.create( + db=db, + data={ + "fides_key": "to_delete", + "name": "Delete Me", + "data_use": "analytics", + }, + ) + purpose_id = purpose.id + purpose.delete(db) + assert db.query(DataPurpose).filter_by(id=purpose_id).first() is None From 97ad38d582b8081c5b23dcc90151498eb1cb237f Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:43:13 -0400 Subject: [PATCH 08/17] feat: add SystemPurpose join table with cascade delete Co-Authored-By: Claude Sonnet 4.6 --- src/fides/api/db/base.py | 1 + src/fides/api/models/sql_models.py | 6 +++ src/fides/api/models/system_purpose.py | 37 ++++++++++++++ tests/ops/models/test_system_purpose.py | 65 +++++++++++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 src/fides/api/models/system_purpose.py create mode 100644 tests/ops/models/test_system_purpose.py diff --git a/src/fides/api/db/base.py b/src/fides/api/db/base.py index e0bbe7e1eea..55c3a535719 100644 --- a/src/fides/api/db/base.py +++ b/src/fides/api/db/base.py @@ -98,6 +98,7 @@ from fides.api.models.system_group import SystemGroup, SystemGroupMember from fides.api.models.system_history import SystemHistory from fides.api.models.system_manager import SystemManager +from fides.api.models.system_purpose import SystemPurpose # noqa: F401 from fides.api.models.taxonomy import ( Taxonomy, TaxonomyAllowedUsage, diff --git a/src/fides/api/models/sql_models.py b/src/fides/api/models/sql_models.py index 316a845dddd..e886db12a63 100644 --- a/src/fides/api/models/sql_models.py +++ b/src/fides/api/models/sql_models.py @@ -593,6 +593,12 @@ class System(Base, FidesBase): viewonly=True, ) + system_purposes = relationship( + "SystemPurpose", + cascade="all, delete-orphan", + lazy="selectin", + ) + @classmethod def get_data_uses( cls: Type[System], systems: List[System], include_parents: bool = True diff --git a/src/fides/api/models/system_purpose.py b/src/fides/api/models/system_purpose.py new file mode 100644 index 00000000000..085a7d75a4d --- /dev/null +++ b/src/fides/api/models/system_purpose.py @@ -0,0 +1,37 @@ +from sqlalchemy import Column, ForeignKey, String, UniqueConstraint +from sqlalchemy.orm import relationship + +from fides.api.db.base_class import Base + + +class SystemPurpose(Base): + """ + Audited join table linking a System to a DataPurpose. + Used by the DataConsumer facade for system-type consumers. + """ + + __tablename__ = "system_purpose" + __table_args__ = ( + UniqueConstraint("system_id", "data_purpose_id", name="uq_system_purpose"), + ) + + system_id = Column( + String, + ForeignKey("ctl_systems.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + data_purpose_id = Column( + String, + ForeignKey("data_purpose.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ) + assigned_by = Column( + String, + ForeignKey("fidesuser.id"), + nullable=True, + ) + + system = relationship("System", lazy="selectin") + data_purpose = relationship("DataPurpose", lazy="selectin") diff --git a/tests/ops/models/test_system_purpose.py b/tests/ops/models/test_system_purpose.py new file mode 100644 index 00000000000..918fdf472a7 --- /dev/null +++ b/tests/ops/models/test_system_purpose.py @@ -0,0 +1,65 @@ +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.data_purpose import DataPurpose +from fides.api.models.sql_models import System +from fides.api.models.system_purpose import SystemPurpose + + +class TestSystemPurposeModel: + @pytest.fixture + def purpose(self, db: Session) -> DataPurpose: + return DataPurpose.create( + db=db, + data={ + "fides_key": "test_purpose", + "name": "Test Purpose", + "data_use": "analytics", + }, + ) + + def test_create_system_purpose( + self, db: Session, system: System, purpose: DataPurpose + ): + sp = SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + assert sp.system_id == system.id + assert sp.data_purpose_id == purpose.id + assert sp.assigned_by is None + assert sp.created_at is not None + + def test_unique_constraint(self, db: Session, system: System, purpose: DataPurpose): + SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + with pytest.raises(Exception): + SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + + def test_cascade_on_system_delete( + self, db: Session, system: System, purpose: DataPurpose + ): + sp = SystemPurpose.create( + db=db, + data={ + "system_id": system.id, + "data_purpose_id": purpose.id, + }, + ) + sp_id = sp.id + system.delete(db) + assert db.query(SystemPurpose).filter_by(id=sp_id).first() is None From 5733cc5a0d4fbaa4d1f79545404e40c820c444e4 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:44:48 -0400 Subject: [PATCH 09/17] feat: add DataConsumer and DataConsumerPurpose models Co-Authored-By: Claude Sonnet 4.6 --- src/fides/api/db/base.py | 4 + src/fides/api/models/data_consumer.py | 93 ++++++++++++++++++ tests/ops/models/test_data_consumer.py | 125 +++++++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100644 src/fides/api/models/data_consumer.py create mode 100644 tests/ops/models/test_data_consumer.py diff --git a/src/fides/api/db/base.py b/src/fides/api/db/base.py index 55c3a535719..5ec2b5923b3 100644 --- a/src/fides/api/db/base.py +++ b/src/fides/api/db/base.py @@ -16,6 +16,10 @@ from fides.api.models.custom_asset import CustomAsset from fides.api.models.custom_connector_template import CustomConnectorTemplate from fides.api.models.custom_report import CustomReport +from fides.api.models.data_consumer import ( # noqa: F401 + DataConsumer, + DataConsumerPurpose, +) from fides.api.models.data_purpose import DataPurpose # noqa: F401 from fides.api.models.datasetconfig import DatasetConfig from fides.api.models.db_cache import DBCache diff --git a/src/fides/api/models/data_consumer.py b/src/fides/api/models/data_consumer.py new file mode 100644 index 00000000000..4ca5bbf43f7 --- /dev/null +++ b/src/fides/api/models/data_consumer.py @@ -0,0 +1,93 @@ +from typing import Any + +from sqlalchemy import ( + ARRAY, + JSON, + Boolean, + CheckConstraint, + Column, + ForeignKey, + String, + UniqueConstraint, +) +from sqlalchemy.orm import Session, relationship + +from fides.api.db.base_class import Base + + +class DataConsumer(Base): + """ + Non-system data consumers (groups, projects, custom types). + System-type consumers are surfaced via a facade over ctl_systems. + """ + + __tablename__ = "data_consumer" + __table_args__ = ( + CheckConstraint("type != 'system'", name="ck_data_consumer_not_system"), + ) + + name = Column(String, nullable=False) + description = Column(String, nullable=True) + type = Column(String, nullable=False, index=True) + external_id = Column(String, nullable=True) + egress = Column(JSON, nullable=True) + ingress = Column(JSON, nullable=True) + data_shared_with_third_parties = Column(Boolean, server_default="f", nullable=False) + third_parties = Column(String, nullable=True) + shared_categories = Column(ARRAY(String), server_default="{}", nullable=False) + contact_email = Column(String, nullable=True) + contact_slack_channel = Column(String, nullable=True) + contact_details = Column(JSON, nullable=True) + tags = Column(ARRAY(String), server_default="{}", nullable=False) + + consumer_purposes = relationship( + "DataConsumerPurpose", + cascade="all, delete-orphan", + lazy="selectin", + ) + + @classmethod + def create( + cls, + db: Session, + *, + data: dict[str, Any], + check_name: bool = False, + ) -> "DataConsumer": + """Override create to skip name uniqueness check. + Multiple consumers can share a name.""" + return super().create(db=db, data=data, check_name=check_name) + + +class DataConsumerPurpose(Base): + """ + Audited join table linking a non-system DataConsumer to a DataPurpose. + """ + + __tablename__ = "data_consumer_purpose" + __table_args__ = ( + UniqueConstraint( + "data_consumer_id", "data_purpose_id", name="uq_data_consumer_purpose" + ), + ) + + data_consumer_id = Column( + String, + ForeignKey("data_consumer.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + data_purpose_id = Column( + String, + ForeignKey("data_purpose.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ) + assigned_by = Column( + String, + ForeignKey("fidesuser.id"), + nullable=True, + ) + + data_consumer = relationship("DataConsumer", lazy="selectin") + data_purpose = relationship("DataPurpose", lazy="selectin") diff --git a/tests/ops/models/test_data_consumer.py b/tests/ops/models/test_data_consumer.py new file mode 100644 index 00000000000..121f89d2cdc --- /dev/null +++ b/tests/ops/models/test_data_consumer.py @@ -0,0 +1,125 @@ +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.data_consumer import DataConsumer, DataConsumerPurpose +from fides.api.models.data_purpose import DataPurpose + + +class TestDataConsumerModel: + def test_create_group_consumer(self, db: Session): + consumer = DataConsumer.create( + db=db, + data={ + "name": "Marketing Team", + "description": "Marketing department Google Group", + "type": "group", + "external_id": "marketing@example.com", + "contact_email": "marketing-lead@example.com", + "tags": ["marketing", "internal"], + }, + ) + assert consumer.name == "Marketing Team" + assert consumer.type == "group" + assert consumer.external_id == "marketing@example.com" + assert consumer.tags == ["marketing", "internal"] + + def test_create_project_consumer(self, db: Session): + consumer = DataConsumer.create( + db=db, + data={ + "name": "Analytics Pipeline", + "type": "project", + "external_id": "bigquery-project-123", + }, + ) + assert consumer.type == "project" + + def test_system_type_rejected(self, db: Session): + """CHECK constraint prevents type='system' rows.""" + with pytest.raises(Exception): + DataConsumer.create( + db=db, + data={ + "name": "Should Fail", + "type": "system", + }, + ) + + def test_custom_type_allowed(self, db: Session): + consumer = DataConsumer.create( + db=db, + data={ + "name": "Custom Consumer", + "type": "data_warehouse", + }, + ) + assert consumer.type == "data_warehouse" + + +class TestDataConsumerPurposeModel: + @pytest.fixture + def purpose(self, db: Session) -> DataPurpose: + return DataPurpose.create( + db=db, + data={ + "fides_key": "consumer_test_purpose", + "name": "Test Purpose", + "data_use": "analytics", + }, + ) + + @pytest.fixture + def consumer(self, db: Session) -> DataConsumer: + return DataConsumer.create( + db=db, + data={ + "name": "Test Group", + "type": "group", + }, + ) + + def test_link_purpose_to_consumer( + self, db: Session, consumer: DataConsumer, purpose: DataPurpose + ): + link = DataConsumerPurpose.create( + db=db, + data={ + "data_consumer_id": consumer.id, + "data_purpose_id": purpose.id, + }, + ) + assert link.data_consumer_id == consumer.id + assert link.data_purpose_id == purpose.id + + def test_unique_constraint( + self, db: Session, consumer: DataConsumer, purpose: DataPurpose + ): + DataConsumerPurpose.create( + db=db, + data={ + "data_consumer_id": consumer.id, + "data_purpose_id": purpose.id, + }, + ) + with pytest.raises(Exception): + DataConsumerPurpose.create( + db=db, + data={ + "data_consumer_id": consumer.id, + "data_purpose_id": purpose.id, + }, + ) + + def test_cascade_on_consumer_delete( + self, db: Session, consumer: DataConsumer, purpose: DataPurpose + ): + link = DataConsumerPurpose.create( + db=db, + data={ + "data_consumer_id": consumer.id, + "data_purpose_id": purpose.id, + }, + ) + link_id = link.id + consumer.delete(db) + assert db.query(DataConsumerPurpose).filter_by(id=link_id).first() is None From 7be822957e39dc70753c04a433be7aafbd40eeb1 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:46:26 -0400 Subject: [PATCH 10/17] feat: add DataProducer and DataProducerMember models Co-Authored-By: Claude Sonnet 4.6 --- src/fides/api/db/base.py | 4 ++ src/fides/api/models/data_producer.py | 73 +++++++++++++++++++++ tests/ops/models/test_data_producer.py | 89 ++++++++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 src/fides/api/models/data_producer.py create mode 100644 tests/ops/models/test_data_producer.py diff --git a/src/fides/api/db/base.py b/src/fides/api/db/base.py index 5ec2b5923b3..5bb0242b57c 100644 --- a/src/fides/api/db/base.py +++ b/src/fides/api/db/base.py @@ -20,6 +20,10 @@ DataConsumer, DataConsumerPurpose, ) +from fides.api.models.data_producer import ( # noqa: F401 + DataProducer, + DataProducerMember, +) from fides.api.models.data_purpose import DataPurpose # noqa: F401 from fides.api.models.datasetconfig import DatasetConfig from fides.api.models.db_cache import DBCache diff --git a/src/fides/api/models/data_producer.py b/src/fides/api/models/data_producer.py new file mode 100644 index 00000000000..6673a2bdf1f --- /dev/null +++ b/src/fides/api/models/data_producer.py @@ -0,0 +1,73 @@ +from typing import Any + +from sqlalchemy import JSON, Column, ForeignKey, String, UniqueConstraint +from sqlalchemy.orm import Session, relationship + +from fides.api.db.base_class import Base + + +class DataProducer(Base): + """ + Represents a team or group responsible for data registration + and purpose assignment to datasets. + """ + + __tablename__ = "data_producer" + + name = Column(String, nullable=False) + description = Column(String, nullable=True) + external_id = Column(String, nullable=True) + monitor_id = Column( + String, + ForeignKey("monitorconfig.id"), + nullable=True, + ) + contact_email = Column(String, nullable=True) + contact_slack_channel = Column(String, nullable=True) + contact_details = Column(JSON, nullable=True) + + members = relationship( + "DataProducerMember", + cascade="all, delete-orphan", + lazy="selectin", + ) + monitor = relationship("MonitorConfig", lazy="selectin") + + @classmethod + def create( + cls, + db: Session, + *, + data: dict[str, Any], + check_name: bool = False, + ) -> "DataProducer": + """Override create to skip name uniqueness check. + Multiple producers can share a name.""" + return super().create(db=db, data=data, check_name=check_name) + + +class DataProducerMember(Base): + """ + Join table linking a DataProducer to FidesUser members. + """ + + __tablename__ = "data_producer_member" + __table_args__ = ( + UniqueConstraint("data_producer_id", "user_id", name="uq_data_producer_member"), + ) + + data_producer_id = Column( + String, + ForeignKey("data_producer.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + user_id = Column( + String, + ForeignKey("fidesuser.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + + data_producer = relationship("DataProducer", lazy="selectin") + user = relationship("FidesUser", lazy="selectin") diff --git a/tests/ops/models/test_data_producer.py b/tests/ops/models/test_data_producer.py new file mode 100644 index 00000000000..d76c6fb7906 --- /dev/null +++ b/tests/ops/models/test_data_producer.py @@ -0,0 +1,89 @@ +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.data_producer import DataProducer, DataProducerMember + + +class TestDataProducerModel: + def test_create_data_producer(self, db: Session): + producer = DataProducer.create( + db=db, + data={ + "name": "Analytics Engineering Team", + "description": "Responsible for analytics pipelines", + "external_id": "analytics-eng-okta-group", + "contact_email": "analytics-eng@example.com", + "contact_slack_channel": "#analytics-eng", + }, + ) + assert producer.name == "Analytics Engineering Team" + assert producer.external_id == "analytics-eng-okta-group" + assert producer.contact_email == "analytics-eng@example.com" + + def test_create_minimal_producer(self, db: Session): + producer = DataProducer.create( + db=db, + data={"name": "Minimal Producer"}, + ) + assert producer.name == "Minimal Producer" + assert producer.monitor_id is None + + def test_delete_producer(self, db: Session): + producer = DataProducer.create( + db=db, + data={"name": "Delete Me"}, + ) + producer_id = producer.id + producer.delete(db) + assert db.query(DataProducer).filter_by(id=producer_id).first() is None + + +class TestDataProducerMemberModel: + @pytest.fixture + def producer(self, db: Session) -> DataProducer: + return DataProducer.create( + db=db, + data={"name": "Test Producer"}, + ) + + def test_add_member(self, db: Session, producer: DataProducer, user): + member = DataProducerMember.create( + db=db, + data={ + "data_producer_id": producer.id, + "user_id": user.id, + }, + ) + assert member.data_producer_id == producer.id + assert member.user_id == user.id + + def test_unique_constraint(self, db: Session, producer: DataProducer, user): + DataProducerMember.create( + db=db, + data={ + "data_producer_id": producer.id, + "user_id": user.id, + }, + ) + with pytest.raises(Exception): + DataProducerMember.create( + db=db, + data={ + "data_producer_id": producer.id, + "user_id": user.id, + }, + ) + + def test_cascade_on_producer_delete( + self, db: Session, producer: DataProducer, user + ): + member = DataProducerMember.create( + db=db, + data={ + "data_producer_id": producer.id, + "user_id": user.id, + }, + ) + member_id = member.id + producer.delete(db) + assert db.query(DataProducerMember).filter_by(id=member_id).first() is None From 8ac3690ac13394bd5216ea34aa5ae06ed558fa57 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:46:54 -0400 Subject: [PATCH 11/17] fix: add TYPE_CHECKING imports for MonitorConfig and FidesUser Resolves mypy errors for string-based relationship references. Co-Authored-By: Claude Opus 4.6 --- src/fides/api/models/data_producer.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/fides/api/models/data_producer.py b/src/fides/api/models/data_producer.py index 6673a2bdf1f..9a65da66442 100644 --- a/src/fides/api/models/data_producer.py +++ b/src/fides/api/models/data_producer.py @@ -1,10 +1,16 @@ -from typing import Any +from __future__ import annotations + +from typing import TYPE_CHECKING, Any from sqlalchemy import JSON, Column, ForeignKey, String, UniqueConstraint from sqlalchemy.orm import Session, relationship from fides.api.db.base_class import Base +if TYPE_CHECKING: + from fides.api.models.detection_discovery import MonitorConfig + from fides.api.models.fides_user import FidesUser + class DataProducer(Base): """ From c874a61eba4a9f975adcc96a0156497db0826551 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:48:12 -0400 Subject: [PATCH 12/17] feat: add data_purposes and data_producer_id to Dataset model Co-Authored-By: Claude Sonnet 4.6 --- src/fides/api/models/sql_models.py | 7 +++ tests/ops/models/test_dataset_purposes.py | 71 +++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 tests/ops/models/test_dataset_purposes.py diff --git a/src/fides/api/models/sql_models.py b/src/fides/api/models/sql_models.py index e886db12a63..0f3617ae7e4 100644 --- a/src/fides/api/models/sql_models.py +++ b/src/fides/api/models/sql_models.py @@ -408,6 +408,13 @@ class Dataset(Base, FidesBase): data_categories = Column(ARRAY(String)) collections = Column(JSON) fides_meta = Column(JSON) + data_purposes = Column(ARRAY(String), server_default="{}", nullable=True) + data_producer_id = Column( + String, + ForeignKey("data_producer.id", ondelete="SET NULL"), + nullable=True, + ) + data_producer = relationship("DataProducer", lazy="selectin") @classmethod def create( diff --git a/tests/ops/models/test_dataset_purposes.py b/tests/ops/models/test_dataset_purposes.py new file mode 100644 index 00000000000..7d4f9000f3f --- /dev/null +++ b/tests/ops/models/test_dataset_purposes.py @@ -0,0 +1,71 @@ +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.data_producer import DataProducer +from fides.api.models.data_purpose import DataPurpose +from fides.api.models.sql_models import Dataset + + +class TestDatasetPurposes: + def test_dataset_with_purposes(self, db: Session): + dataset = Dataset.create( + db=db, + data={ + "fides_key": "test_dataset_purposes", + "name": "Test Dataset", + "data_categories": [], + "collections": [], + "data_purposes": ["marketing_email", "analytics_basic"], + }, + ) + assert dataset.data_purposes == ["marketing_email", "analytics_basic"] + + def test_dataset_without_purposes(self, db: Session): + dataset = Dataset.create( + db=db, + data={ + "fides_key": "test_dataset_no_purposes", + "name": "Test Dataset No Purposes", + "data_categories": [], + "collections": [], + }, + ) + assert dataset.data_purposes == [] or dataset.data_purposes is None + + def test_dataset_with_producer(self, db: Session): + producer = DataProducer.create( + db=db, + data={"name": "Test Producer"}, + ) + dataset = Dataset.create( + db=db, + data={ + "fides_key": "test_dataset_producer", + "name": "Test Dataset With Producer", + "data_categories": [], + "collections": [], + "data_producer_id": producer.id, + }, + ) + assert dataset.data_producer_id == producer.id + + def test_producer_set_null_on_delete(self, db: Session): + producer = DataProducer.create( + db=db, + data={"name": "Delete Producer"}, + ) + dataset = Dataset.create( + db=db, + data={ + "fides_key": "test_dataset_producer_delete", + "name": "Test Dataset", + "data_categories": [], + "collections": [], + "data_producer_id": producer.id, + }, + ) + dataset_id = dataset.id + producer.delete(db) + db.expire_all() + refreshed = db.query(Dataset).filter_by(id=dataset_id).first() + assert refreshed.data_producer_id is None From 02fb5acbd890a99a13703e15cda601bf0c4d9281 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 22:49:21 -0400 Subject: [PATCH 13/17] feat: add Pydantic schemas for data purpose, consumer, producer Co-Authored-By: Claude Sonnet 4.6 --- src/fides/api/schemas/data_consumer.py | 65 ++++++++++++++++++++++++++ src/fides/api/schemas/data_producer.py | 44 +++++++++++++++++ src/fides/api/schemas/data_purpose.py | 57 ++++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 src/fides/api/schemas/data_consumer.py create mode 100644 src/fides/api/schemas/data_producer.py create mode 100644 src/fides/api/schemas/data_purpose.py diff --git a/src/fides/api/schemas/data_consumer.py b/src/fides/api/schemas/data_consumer.py new file mode 100644 index 00000000000..87b4e0b4f2c --- /dev/null +++ b/src/fides/api/schemas/data_consumer.py @@ -0,0 +1,65 @@ +from datetime import datetime +from typing import Dict, List, Optional + +from pydantic import BaseModel, ConfigDict, Field + +from fides.api.schemas.data_purpose import DataPurposeResponse + + +class DataConsumerCreate(BaseModel): + name: str + description: Optional[str] = None + type: str + external_id: Optional[str] = None + egress: Optional[Dict] = None + ingress: Optional[Dict] = None + data_shared_with_third_parties: bool = False + third_parties: Optional[str] = None + shared_categories: List[str] = Field(default_factory=list) + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + tags: List[str] = Field(default_factory=list) + + +class DataConsumerUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + external_id: Optional[str] = None + egress: Optional[Dict] = None + ingress: Optional[Dict] = None + data_shared_with_third_parties: Optional[bool] = None + third_parties: Optional[str] = None + shared_categories: Optional[List[str]] = None + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + tags: Optional[List[str]] = None + + +class DataConsumerPurposeAssignment(BaseModel): + purpose_fides_keys: List[str] + + +class DataConsumerResponse(BaseModel): + id: str + name: str + description: Optional[str] = None + type: str + external_id: Optional[str] = None + purposes: List[DataPurposeResponse] = Field(default_factory=list) + system_fides_key: Optional[str] = None + vendor_id: Optional[str] = None + egress: Optional[Dict] = None + ingress: Optional[Dict] = None + data_shared_with_third_parties: Optional[bool] = None + third_parties: Optional[str] = None + shared_categories: Optional[List[str]] = None + tags: List[str] = Field(default_factory=list) + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) diff --git a/src/fides/api/schemas/data_producer.py b/src/fides/api/schemas/data_producer.py new file mode 100644 index 00000000000..08804e0897b --- /dev/null +++ b/src/fides/api/schemas/data_producer.py @@ -0,0 +1,44 @@ +from datetime import datetime +from typing import Dict, List, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class DataProducerCreate(BaseModel): + name: str + description: Optional[str] = None + external_id: Optional[str] = None + monitor_id: Optional[str] = None + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + + +class DataProducerUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + external_id: Optional[str] = None + monitor_id: Optional[str] = None + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + + +class DataProducerMemberAssignment(BaseModel): + user_ids: List[str] + + +class DataProducerResponse(BaseModel): + id: str + name: str + description: Optional[str] = None + external_id: Optional[str] = None + monitor_id: Optional[str] = None + contact_email: Optional[str] = None + contact_slack_channel: Optional[str] = None + contact_details: Optional[Dict] = None + member_ids: List[str] = Field(default_factory=list) + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) diff --git a/src/fides/api/schemas/data_purpose.py b/src/fides/api/schemas/data_purpose.py new file mode 100644 index 00000000000..d0beb62375b --- /dev/null +++ b/src/fides/api/schemas/data_purpose.py @@ -0,0 +1,57 @@ +from datetime import datetime +from typing import List, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class DataPurposeCreate(BaseModel): + fides_key: str + name: str + description: Optional[str] = None + organization_fides_key: Optional[str] = "default_organization" + tags: Optional[List[str]] = None + data_use: str + data_subject: Optional[str] = None + data_categories: List[str] = Field(default_factory=list) + legal_basis_for_processing: Optional[str] = None + flexible_legal_basis_for_processing: bool = True + special_category_legal_basis: Optional[str] = None + impact_assessment_location: Optional[str] = None + retention_period: Optional[str] = None + features: List[str] = Field(default_factory=list) + + +class DataPurposeUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + data_use: Optional[str] = None + data_subject: Optional[str] = None + data_categories: Optional[List[str]] = None + legal_basis_for_processing: Optional[str] = None + flexible_legal_basis_for_processing: Optional[bool] = None + special_category_legal_basis: Optional[str] = None + impact_assessment_location: Optional[str] = None + retention_period: Optional[str] = None + features: Optional[List[str]] = None + + +class DataPurposeResponse(BaseModel): + id: str + fides_key: str + name: str + description: Optional[str] = None + organization_fides_key: Optional[str] = None + tags: Optional[List[str]] = None + data_use: str + data_subject: Optional[str] = None + data_categories: List[str] + legal_basis_for_processing: Optional[str] = None + flexible_legal_basis_for_processing: bool + special_category_legal_basis: Optional[str] = None + impact_assessment_location: Optional[str] = None + retention_period: Optional[str] = None + features: List[str] + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) From 05e99fc594c9e7b44ba7a03bd47361ae5ce29115 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 11 Mar 2026 23:21:33 -0400 Subject: [PATCH 14/17] fix: add mypy type ignore comments for SQLAlchemy relationships and FidesBase import --- src/fides/api/models/data_consumer.py | 7 ++++++- src/fides/api/models/data_purpose.py | 2 +- src/fides/api/models/system_purpose.py | 10 +++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/fides/api/models/data_consumer.py b/src/fides/api/models/data_consumer.py index 4ca5bbf43f7..e6c18997a1a 100644 --- a/src/fides/api/models/data_consumer.py +++ b/src/fides/api/models/data_consumer.py @@ -1,4 +1,6 @@ -from typing import Any +from __future__ import annotations + +from typing import TYPE_CHECKING, Any from sqlalchemy import ( ARRAY, @@ -14,6 +16,9 @@ from fides.api.db.base_class import Base +if TYPE_CHECKING: + from fides.api.models.data_purpose import DataPurpose + class DataConsumer(Base): """ diff --git a/src/fides/api/models/data_purpose.py b/src/fides/api/models/data_purpose.py index 35a6fcc74d7..267763a572f 100644 --- a/src/fides/api/models/data_purpose.py +++ b/src/fides/api/models/data_purpose.py @@ -6,7 +6,7 @@ from fides.api.common_exceptions import KeyOrNameAlreadyExists from fides.api.db.base_class import Base -from fides.api.models.sql_models import FidesBase +from fides.api.models.sql_models import FidesBase # type: ignore[attr-defined] class DataPurpose(Base, FidesBase): diff --git a/src/fides/api/models/system_purpose.py b/src/fides/api/models/system_purpose.py index 085a7d75a4d..a8625b8a5e1 100644 --- a/src/fides/api/models/system_purpose.py +++ b/src/fides/api/models/system_purpose.py @@ -1,8 +1,16 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + from sqlalchemy import Column, ForeignKey, String, UniqueConstraint from sqlalchemy.orm import relationship from fides.api.db.base_class import Base +if TYPE_CHECKING: + from fides.api.models.data_purpose import DataPurpose + from fides.api.models.sql_models import System # type: ignore[attr-defined] + class SystemPurpose(Base): """ @@ -33,5 +41,5 @@ class SystemPurpose(Base): nullable=True, ) - system = relationship("System", lazy="selectin") + system = relationship("System", lazy="selectin") # type: ignore[misc] data_purpose = relationship("DataPurpose", lazy="selectin") From 03f750f7535e1408b36dcea9171bb2610cd84c4d Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Thu, 12 Mar 2026 10:36:58 -0400 Subject: [PATCH 15/17] feat: add ORM relationships and migration for purpose-based data model Add bidirectional relationships between DataPurpose, DataConsumer, DataProducer, System, and their join tables. Include migration for all purpose-model tables (data_purpose, data_consumer, data_producer, system_purpose, data_consumer_purpose, data_producer_member). Co-Authored-By: Claude Opus 4.6 --- ...31c_add_purpose_based_data_model_tables.py | 159 ++++++++++++++++++ src/fides/api/models/data_consumer.py | 16 +- src/fides/api/models/data_producer.py | 13 +- src/fides/api/models/data_purpose.py | 5 +- src/fides/api/models/system_purpose.py | 10 +- 5 files changed, 189 insertions(+), 14 deletions(-) create mode 100644 src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py diff --git a/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py b/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py new file mode 100644 index 00000000000..d5a203ce3b2 --- /dev/null +++ b/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py @@ -0,0 +1,159 @@ +"""add purpose based data model tables + +Revision ID: 7ba8b184d31c +Revises: 4ac4864180db +Create Date: 2026-03-12 03:58:30.461412 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '7ba8b184d31c' +down_revision = '4ac4864180db' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('data_consumer', + sa.Column('id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('name', sa.String(), nullable=False), + sa.Column('description', sa.String(), nullable=True), + sa.Column('type', sa.String(), nullable=False), + sa.Column('external_id', sa.String(), nullable=True), + sa.Column('egress', sa.JSON(), nullable=True), + sa.Column('ingress', sa.JSON(), nullable=True), + sa.Column('data_shared_with_third_parties', sa.Boolean(), server_default='f', nullable=False), + sa.Column('third_parties', sa.String(), nullable=True), + sa.Column('shared_categories', sa.ARRAY(sa.String()), server_default='{}', nullable=False), + sa.Column('contact_email', sa.String(), nullable=True), + sa.Column('contact_slack_channel', sa.String(), nullable=True), + sa.Column('contact_details', sa.JSON(), nullable=True), + sa.Column('tags', sa.ARRAY(sa.String()), server_default='{}', nullable=False), + sa.CheckConstraint("type != 'system'", name='ck_data_consumer_not_system'), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_data_consumer_id'), 'data_consumer', ['id'], unique=False) + op.create_index(op.f('ix_data_consumer_type'), 'data_consumer', ['type'], unique=False) + op.create_table('data_purpose', + sa.Column('id', sa.String(length=255), nullable=False), + sa.Column('fides_key', sa.String(), nullable=False), + sa.Column('organization_fides_key', sa.Text(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('name', sa.Text(), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('data_use', sa.String(), nullable=False), + sa.Column('data_subject', sa.String(), nullable=True), + sa.Column('data_categories', postgresql.ARRAY(sa.String()), server_default='{}', nullable=False), + sa.Column('legal_basis_for_processing', sa.String(), nullable=True), + sa.Column('flexible_legal_basis_for_processing', sa.Boolean(), server_default='t', nullable=False), + sa.Column('special_category_legal_basis', sa.String(), nullable=True), + sa.Column('impact_assessment_location', sa.String(), nullable=True), + sa.Column('retention_period', sa.String(), nullable=True), + sa.Column('features', postgresql.ARRAY(sa.String()), server_default='{}', nullable=False), + sa.PrimaryKeyConstraint('id', 'fides_key') + ) + op.create_index(op.f('ix_data_purpose_data_use'), 'data_purpose', ['data_use'], unique=False) + op.create_index(op.f('ix_data_purpose_fides_key'), 'data_purpose', ['fides_key'], unique=True) + op.create_index(op.f('ix_data_purpose_id'), 'data_purpose', ['id'], unique=True) + op.create_table('data_consumer_purpose', + sa.Column('id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('data_consumer_id', sa.String(), nullable=False), + sa.Column('data_purpose_id', sa.String(), nullable=False), + sa.Column('assigned_by', sa.String(), nullable=True), + sa.ForeignKeyConstraint(['assigned_by'], ['fidesuser.id'], ondelete='SET NULL'), + sa.ForeignKeyConstraint(['data_consumer_id'], ['data_consumer.id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['data_purpose_id'], ['data_purpose.id'], ondelete='RESTRICT'), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('data_consumer_id', 'data_purpose_id', name='uq_data_consumer_purpose') + ) + op.create_index(op.f('ix_data_consumer_purpose_data_consumer_id'), 'data_consumer_purpose', ['data_consumer_id'], unique=False) + op.create_index(op.f('ix_data_consumer_purpose_data_purpose_id'), 'data_consumer_purpose', ['data_purpose_id'], unique=False) + op.create_index(op.f('ix_data_consumer_purpose_id'), 'data_consumer_purpose', ['id'], unique=False) + op.create_table('system_purpose', + sa.Column('id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('system_id', sa.String(), nullable=False), + sa.Column('data_purpose_id', sa.String(), nullable=False), + sa.Column('assigned_by', sa.String(), nullable=True), + sa.ForeignKeyConstraint(['assigned_by'], ['fidesuser.id'], ondelete='SET NULL'), + sa.ForeignKeyConstraint(['data_purpose_id'], ['data_purpose.id'], ondelete='RESTRICT'), + sa.ForeignKeyConstraint(['system_id'], ['ctl_systems.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('system_id', 'data_purpose_id', name='uq_system_purpose') + ) + op.create_index(op.f('ix_system_purpose_data_purpose_id'), 'system_purpose', ['data_purpose_id'], unique=False) + op.create_index(op.f('ix_system_purpose_id'), 'system_purpose', ['id'], unique=False) + op.create_index(op.f('ix_system_purpose_system_id'), 'system_purpose', ['system_id'], unique=False) + op.create_table('data_producer', + sa.Column('id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('name', sa.String(), nullable=False), + sa.Column('description', sa.String(), nullable=True), + sa.Column('external_id', sa.String(), nullable=True), + sa.Column('monitor_id', sa.String(), nullable=True), + sa.Column('contact_email', sa.String(), nullable=True), + sa.Column('contact_slack_channel', sa.String(), nullable=True), + sa.Column('contact_details', sa.JSON(), nullable=True), + sa.ForeignKeyConstraint(['monitor_id'], ['monitorconfig.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_data_producer_id'), 'data_producer', ['id'], unique=False) + op.create_table('data_producer_member', + sa.Column('id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('data_producer_id', sa.String(), nullable=False), + sa.Column('user_id', sa.String(), nullable=False), + sa.ForeignKeyConstraint(['data_producer_id'], ['data_producer.id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['user_id'], ['fidesuser.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('data_producer_id', 'user_id', name='uq_data_producer_member') + ) + op.create_index(op.f('ix_data_producer_member_data_producer_id'), 'data_producer_member', ['data_producer_id'], unique=False) + op.create_index(op.f('ix_data_producer_member_id'), 'data_producer_member', ['id'], unique=False) + op.create_index(op.f('ix_data_producer_member_user_id'), 'data_producer_member', ['user_id'], unique=False) + op.add_column('ctl_datasets', sa.Column('data_purposes', postgresql.ARRAY(sa.String()), server_default='{}', nullable=True)) + op.add_column('ctl_datasets', sa.Column('data_producer_id', sa.String(), nullable=True)) + op.create_foreign_key('fk_ctl_datasets_data_producer_id', 'ctl_datasets', 'data_producer', ['data_producer_id'], ['id'], ondelete='SET NULL') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint('fk_ctl_datasets_data_producer_id', 'ctl_datasets', type_='foreignkey') + op.drop_column('ctl_datasets', 'data_producer_id') + op.drop_column('ctl_datasets', 'data_purposes') + op.drop_index(op.f('ix_data_producer_member_user_id'), table_name='data_producer_member') + op.drop_index(op.f('ix_data_producer_member_id'), table_name='data_producer_member') + op.drop_index(op.f('ix_data_producer_member_data_producer_id'), table_name='data_producer_member') + op.drop_table('data_producer_member') + op.drop_index(op.f('ix_data_producer_id'), table_name='data_producer') + op.drop_table('data_producer') + op.drop_index(op.f('ix_system_purpose_system_id'), table_name='system_purpose') + op.drop_index(op.f('ix_system_purpose_id'), table_name='system_purpose') + op.drop_index(op.f('ix_system_purpose_data_purpose_id'), table_name='system_purpose') + op.drop_table('system_purpose') + op.drop_index(op.f('ix_data_consumer_purpose_id'), table_name='data_consumer_purpose') + op.drop_index(op.f('ix_data_consumer_purpose_data_purpose_id'), table_name='data_consumer_purpose') + op.drop_index(op.f('ix_data_consumer_purpose_data_consumer_id'), table_name='data_consumer_purpose') + op.drop_table('data_consumer_purpose') + op.drop_index(op.f('ix_data_purpose_id'), table_name='data_purpose') + op.drop_index(op.f('ix_data_purpose_fides_key'), table_name='data_purpose') + op.drop_index(op.f('ix_data_purpose_data_use'), table_name='data_purpose') + op.drop_table('data_purpose') + op.drop_index(op.f('ix_data_consumer_type'), table_name='data_consumer') + op.drop_index(op.f('ix_data_consumer_id'), table_name='data_consumer') + op.drop_table('data_consumer') + # ### end Alembic commands ### diff --git a/src/fides/api/models/data_consumer.py b/src/fides/api/models/data_consumer.py index e6c18997a1a..c50d9922dd9 100644 --- a/src/fides/api/models/data_consumer.py +++ b/src/fides/api/models/data_consumer.py @@ -12,12 +12,13 @@ String, UniqueConstraint, ) +from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm import Session, relationship from fides.api.db.base_class import Base if TYPE_CHECKING: - from fides.api.models.data_purpose import DataPurpose + from fides.api.models.data_purpose import DataPurpose # noqa: F401 class DataConsumer(Base): @@ -26,7 +27,10 @@ class DataConsumer(Base): System-type consumers are surfaced via a facade over ctl_systems. """ - __tablename__ = "data_consumer" + @declared_attr + def __tablename__(self) -> str: + return "data_consumer" + __table_args__ = ( CheckConstraint("type != 'system'", name="ck_data_consumer_not_system"), ) @@ -69,7 +73,7 @@ class DataConsumerPurpose(Base): Audited join table linking a non-system DataConsumer to a DataPurpose. """ - __tablename__ = "data_consumer_purpose" + __tablename__ = "data_consumer_purpose" # type: ignore[assignment] __table_args__ = ( UniqueConstraint( "data_consumer_id", "data_purpose_id", name="uq_data_consumer_purpose" @@ -90,9 +94,11 @@ class DataConsumerPurpose(Base): ) assigned_by = Column( String, - ForeignKey("fidesuser.id"), + ForeignKey("fidesuser.id", ondelete="SET NULL"), nullable=True, ) - data_consumer = relationship("DataConsumer", lazy="selectin") + data_consumer = relationship( + "DataConsumer", lazy="selectin", overlaps="consumer_purposes" + ) # type: ignore[call-arg] data_purpose = relationship("DataPurpose", lazy="selectin") diff --git a/src/fides/api/models/data_producer.py b/src/fides/api/models/data_producer.py index 9a65da66442..08d6207b411 100644 --- a/src/fides/api/models/data_producer.py +++ b/src/fides/api/models/data_producer.py @@ -3,13 +3,14 @@ from typing import TYPE_CHECKING, Any from sqlalchemy import JSON, Column, ForeignKey, String, UniqueConstraint +from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm import Session, relationship from fides.api.db.base_class import Base if TYPE_CHECKING: - from fides.api.models.detection_discovery import MonitorConfig - from fides.api.models.fides_user import FidesUser + from fides.api.models.detection_discovery import MonitorConfig # noqa: F401 + from fides.api.models.fides_user import FidesUser # noqa: F401 class DataProducer(Base): @@ -18,7 +19,9 @@ class DataProducer(Base): and purpose assignment to datasets. """ - __tablename__ = "data_producer" + @declared_attr + def __tablename__(self) -> str: + return "data_producer" name = Column(String, nullable=False) description = Column(String, nullable=True) @@ -57,7 +60,7 @@ class DataProducerMember(Base): Join table linking a DataProducer to FidesUser members. """ - __tablename__ = "data_producer_member" + __tablename__ = "data_producer_member" # type: ignore[assignment] __table_args__ = ( UniqueConstraint("data_producer_id", "user_id", name="uq_data_producer_member"), ) @@ -75,5 +78,5 @@ class DataProducerMember(Base): index=True, ) - data_producer = relationship("DataProducer", lazy="selectin") + data_producer = relationship("DataProducer", lazy="selectin", overlaps="members") # type: ignore[call-arg] user = relationship("FidesUser", lazy="selectin") diff --git a/src/fides/api/models/data_purpose.py b/src/fides/api/models/data_purpose.py index 267763a572f..18216a72224 100644 --- a/src/fides/api/models/data_purpose.py +++ b/src/fides/api/models/data_purpose.py @@ -2,6 +2,7 @@ from sqlalchemy import Boolean, Column, String from sqlalchemy.dialects.postgresql import ARRAY +from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm import Session from fides.api.common_exceptions import KeyOrNameAlreadyExists @@ -18,7 +19,9 @@ class DataPurpose(Base, FidesBase): description, organization_fides_key, and tags. """ - __tablename__ = "data_purpose" + @declared_attr + def __tablename__(self) -> str: + return "data_purpose" data_use = Column(String, nullable=False, index=True) data_subject = Column(String, nullable=True) diff --git a/src/fides/api/models/system_purpose.py b/src/fides/api/models/system_purpose.py index a8625b8a5e1..d23aaaa5606 100644 --- a/src/fides/api/models/system_purpose.py +++ b/src/fides/api/models/system_purpose.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from sqlalchemy import Column, ForeignKey, String, UniqueConstraint +from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm import relationship from fides.api.db.base_class import Base @@ -18,7 +19,10 @@ class SystemPurpose(Base): Used by the DataConsumer facade for system-type consumers. """ - __tablename__ = "system_purpose" + @declared_attr + def __tablename__(self) -> str: + return "system_purpose" + __table_args__ = ( UniqueConstraint("system_id", "data_purpose_id", name="uq_system_purpose"), ) @@ -37,9 +41,9 @@ class SystemPurpose(Base): ) assigned_by = Column( String, - ForeignKey("fidesuser.id"), + ForeignKey("fidesuser.id", ondelete="SET NULL"), nullable=True, ) - system = relationship("System", lazy="selectin") # type: ignore[misc] + system = relationship("System", lazy="selectin", overlaps="system_purposes") # type: ignore[call-arg, misc] data_purpose = relationship("DataPurpose", lazy="selectin") From 4ea9bf18abc9f67e4790d28b78c71f4e0c477d06 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Thu, 12 Mar 2026 16:48:21 -0400 Subject: [PATCH 16/17] Cleaning up migration --- ...31c_add_purpose_based_data_model_tables.py | 237 ++++++++---------- 1 file changed, 105 insertions(+), 132 deletions(-) diff --git a/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py b/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py index d5a203ce3b2..fc7aeaba887 100644 --- a/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py +++ b/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py @@ -10,150 +10,123 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = '7ba8b184d31c' -down_revision = '4ac4864180db' +revision = "7ba8b184d31c" +down_revision = "4ac4864180db" branch_labels = None depends_on = None def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('data_consumer', - sa.Column('id', sa.String(length=255), nullable=False), + op.create_table("data_consumer", + sa.Column("id", sa.String(length=255), nullable=False), sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('description', sa.String(), nullable=True), - sa.Column('type', sa.String(), nullable=False), - sa.Column('external_id', sa.String(), nullable=True), - sa.Column('egress', sa.JSON(), nullable=True), - sa.Column('ingress', sa.JSON(), nullable=True), - sa.Column('data_shared_with_third_parties', sa.Boolean(), server_default='f', nullable=False), - sa.Column('third_parties', sa.String(), nullable=True), - sa.Column('shared_categories', sa.ARRAY(sa.String()), server_default='{}', nullable=False), - sa.Column('contact_email', sa.String(), nullable=True), - sa.Column('contact_slack_channel', sa.String(), nullable=True), - sa.Column('contact_details', sa.JSON(), nullable=True), - sa.Column('tags', sa.ARRAY(sa.String()), server_default='{}', nullable=False), - sa.CheckConstraint("type != 'system'", name='ck_data_consumer_not_system'), - sa.PrimaryKeyConstraint('id') + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("name", sa.String(), nullable=False), + sa.Column("description", sa.String(), nullable=True), + sa.Column("type", sa.String(), nullable=False), + sa.Column("external_id", sa.String(), nullable=True), + sa.Column("egress", sa.JSON(), nullable=True), + sa.Column("ingress", sa.JSON(), nullable=True), + sa.Column("data_shared_with_third_parties", sa.Boolean(), server_default="f", nullable=False), + sa.Column("third_parties", sa.String(), nullable=True), + sa.Column("shared_categories", sa.ARRAY(sa.String()), server_default="{}", nullable=False), + sa.Column("contact_email", sa.String(), nullable=True), + sa.Column("contact_slack_channel", sa.String(), nullable=True), + sa.Column("contact_details", sa.JSON(), nullable=True), + sa.Column("tags", sa.ARRAY(sa.String()), server_default="{}", nullable=False), + sa.CheckConstraint("type != 'system'", name="ck_data_consumer_not_system"), + sa.PrimaryKeyConstraint("id") ) - op.create_index(op.f('ix_data_consumer_id'), 'data_consumer', ['id'], unique=False) - op.create_index(op.f('ix_data_consumer_type'), 'data_consumer', ['type'], unique=False) - op.create_table('data_purpose', - sa.Column('id', sa.String(length=255), nullable=False), - sa.Column('fides_key', sa.String(), nullable=False), - sa.Column('organization_fides_key', sa.Text(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('name', sa.Text(), nullable=False), - sa.Column('description', sa.Text(), nullable=True), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('data_use', sa.String(), nullable=False), - sa.Column('data_subject', sa.String(), nullable=True), - sa.Column('data_categories', postgresql.ARRAY(sa.String()), server_default='{}', nullable=False), - sa.Column('legal_basis_for_processing', sa.String(), nullable=True), - sa.Column('flexible_legal_basis_for_processing', sa.Boolean(), server_default='t', nullable=False), - sa.Column('special_category_legal_basis', sa.String(), nullable=True), - sa.Column('impact_assessment_location', sa.String(), nullable=True), - sa.Column('retention_period', sa.String(), nullable=True), - sa.Column('features', postgresql.ARRAY(sa.String()), server_default='{}', nullable=False), - sa.PrimaryKeyConstraint('id', 'fides_key') + op.create_index(op.f("ix_data_consumer_id"), "data_consumer", ["id"], unique=False) + op.create_index(op.f("ix_data_consumer_type"), "data_consumer", ["type"], unique=False) + op.create_table("data_purpose", + sa.Column("id", sa.String(length=255), nullable=False), + sa.Column("fides_key", sa.String(), nullable=False), + sa.Column("organization_fides_key", sa.Text(), nullable=True), + sa.Column("tags", postgresql.ARRAY(sa.String()), nullable=True), + sa.Column("name", sa.Text(), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa + sa.PrimaryKeyConstraint("id", "fides_key") ) - op.create_index(op.f('ix_data_purpose_data_use'), 'data_purpose', ['data_use'], unique=False) - op.create_index(op.f('ix_data_purpose_fides_key'), 'data_purpose', ['fides_key'], unique=True) - op.create_index(op.f('ix_data_purpose_id'), 'data_purpose', ['id'], unique=True) - op.create_table('data_consumer_purpose', - sa.Column('id', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('data_consumer_id', sa.String(), nullable=False), - sa.Column('data_purpose_id', sa.String(), nullable=False), - sa.Column('assigned_by', sa.String(), nullable=True), - sa.ForeignKeyConstraint(['assigned_by'], ['fidesuser.id'], ondelete='SET NULL'), - sa.ForeignKeyConstraint(['data_consumer_id'], ['data_consumer.id'], ondelete='CASCADE'), - sa.ForeignKeyConstraint(['data_purpose_id'], ['data_purpose.id'], ondelete='RESTRICT'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('data_consumer_id', 'data_purpose_id', name='uq_data_consumer_purpose') + op.create_index(op.f("ix_data_consumer_purpose_data_consumer_id"), "data_consumer_purpose", ["data_consumer_id"], unique=False) + op.create_index(op.f("ix_data_consumer_purpose_data_purpose_id"), "data_consumer_purpose", ["data_purpose_id"], unique=False) + op.create_index(op.f("ix_data_consumer_purpose_id"), "data_consumer_purpose", ["id"], unique=False) + op.create_table("system_purpose", + sa.Column("id", sa.String(length=255), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("system_id", sa.String(), nullable=False), + sa.Column("data_purpose_id", sa.String(), nullable=False), + sa.Column("assigned_by", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["assigned_by"], ["fidesuser.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint(["data_purpose_id"], ["data_purpose.id"], ondelete="RESTRICT"), + sa.ForeignKeyConstraint(["system_id"], ["ctl_systems.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("system_id", "data_purpose_id", name="uq_system_purpose") ) - op.create_index(op.f('ix_data_consumer_purpose_data_consumer_id'), 'data_consumer_purpose', ['data_consumer_id'], unique=False) - op.create_index(op.f('ix_data_consumer_purpose_data_purpose_id'), 'data_consumer_purpose', ['data_purpose_id'], unique=False) - op.create_index(op.f('ix_data_consumer_purpose_id'), 'data_consumer_purpose', ['id'], unique=False) - op.create_table('system_purpose', - sa.Column('id', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('system_id', sa.String(), nullable=False), - sa.Column('data_purpose_id', sa.String(), nullable=False), - sa.Column('assigned_by', sa.String(), nullable=True), - sa.ForeignKeyConstraint(['assigned_by'], ['fidesuser.id'], ondelete='SET NULL'), - sa.ForeignKeyConstraint(['data_purpose_id'], ['data_purpose.id'], ondelete='RESTRICT'), - sa.ForeignKeyConstraint(['system_id'], ['ctl_systems.id'], ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('system_id', 'data_purpose_id', name='uq_system_purpose') + op.create_index(op.f("ix_system_purpose_data_purpose_id"), "system_purpose", ["data_purpose_id"], unique=False) + op.create_index(op.f("ix_system_purpose_id"), "system_purpose", ["id"], unique=False) + op.create_index(op.f("ix_system_purpose_system_id"), "system_purpose", ["system_id"], unique=False) + op.create_table("data_producer", + sa.Column("id", sa.String(length=255), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("name", sa.String(), nullable=False), + sa.Column("description", sa.String(), nullable=True), + sa.Column("external_id", sa.String(), nullable=True), + sa.Column("monitor_id", sa.String(), nullable=True), + sa.Column("contact_email", sa.String(), nullable=True), + sa.Column("contact_slack_channel", sa.String(), nullable=True), + sa.Column("contact_details", sa.JSON(), nullable=True), + sa.ForeignKeyConstraint(["monitor_id"], ["monitorconfig.id"], ), + sa.PrimaryKeyConstraint("id") ) - op.create_index(op.f('ix_system_purpose_data_purpose_id'), 'system_purpose', ['data_purpose_id'], unique=False) - op.create_index(op.f('ix_system_purpose_id'), 'system_purpose', ['id'], unique=False) - op.create_index(op.f('ix_system_purpose_system_id'), 'system_purpose', ['system_id'], unique=False) - op.create_table('data_producer', - sa.Column('id', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('name', sa.String(), nullable=False), - sa.Column('description', sa.String(), nullable=True), - sa.Column('external_id', sa.String(), nullable=True), - sa.Column('monitor_id', sa.String(), nullable=True), - sa.Column('contact_email', sa.String(), nullable=True), - sa.Column('contact_slack_channel', sa.String(), nullable=True), - sa.Column('contact_details', sa.JSON(), nullable=True), - sa.ForeignKeyConstraint(['monitor_id'], ['monitorconfig.id'], ), - sa.PrimaryKeyConstraint('id') + op.create_index(op.f("ix_data_producer_id"), "data_producer", ["id"], unique=False) + op.create_table("data_producer_member", + sa.Column("id", sa.String(length=255), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("data_producer_id", sa.String(), nullable=False), + sa.Column("user_id", sa.String(), nullable=False), + sa.ForeignKeyConstraint(["data_producer_id"], ["data_producer.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["user_id"], ["fidesuser.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("data_producer_id", "user_id", name="uq_data_producer_member") ) - op.create_index(op.f('ix_data_producer_id'), 'data_producer', ['id'], unique=False) - op.create_table('data_producer_member', - sa.Column('id', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('data_producer_id', sa.String(), nullable=False), - sa.Column('user_id', sa.String(), nullable=False), - sa.ForeignKeyConstraint(['data_producer_id'], ['data_producer.id'], ondelete='CASCADE'), - sa.ForeignKeyConstraint(['user_id'], ['fidesuser.id'], ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('data_producer_id', 'user_id', name='uq_data_producer_member') - ) - op.create_index(op.f('ix_data_producer_member_data_producer_id'), 'data_producer_member', ['data_producer_id'], unique=False) - op.create_index(op.f('ix_data_producer_member_id'), 'data_producer_member', ['id'], unique=False) - op.create_index(op.f('ix_data_producer_member_user_id'), 'data_producer_member', ['user_id'], unique=False) - op.add_column('ctl_datasets', sa.Column('data_purposes', postgresql.ARRAY(sa.String()), server_default='{}', nullable=True)) - op.add_column('ctl_datasets', sa.Column('data_producer_id', sa.String(), nullable=True)) - op.create_foreign_key('fk_ctl_datasets_data_producer_id', 'ctl_datasets', 'data_producer', ['data_producer_id'], ['id'], ondelete='SET NULL') + op.create_index(op.f("ix_data_producer_member_data_producer_id"), "data_producer_member", ["data_producer_id"], unique=False) + op.create_index(op.f("ix_data_producer_member_id"), "data_producer_member", ["id"], unique=False) + op.create_index(op.f("ix_data_producer_member_user_id"), "data_producer_member", ["user_id"], unique=False) + op.add_column("ctl_datasets", sa.Column("data_purposes", postgresql.ARRAY(sa.String()), server_default="{}", nullable=True)) + op.add_column("ctl_datasets", sa.Column("data_producer_id", sa.String(), nullable=True)) + op.create_foreign_key("fk_ctl_datasets_data_producer_id", "ctl_datasets", "data_producer", ["data_producer_id"], ["id"], ondelete="SET NULL") # ### end Alembic commands ### def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint('fk_ctl_datasets_data_producer_id', 'ctl_datasets', type_='foreignkey') - op.drop_column('ctl_datasets', 'data_producer_id') - op.drop_column('ctl_datasets', 'data_purposes') - op.drop_index(op.f('ix_data_producer_member_user_id'), table_name='data_producer_member') - op.drop_index(op.f('ix_data_producer_member_id'), table_name='data_producer_member') - op.drop_index(op.f('ix_data_producer_member_data_producer_id'), table_name='data_producer_member') - op.drop_table('data_producer_member') - op.drop_index(op.f('ix_data_producer_id'), table_name='data_producer') - op.drop_table('data_producer') - op.drop_index(op.f('ix_system_purpose_system_id'), table_name='system_purpose') - op.drop_index(op.f('ix_system_purpose_id'), table_name='system_purpose') - op.drop_index(op.f('ix_system_purpose_data_purpose_id'), table_name='system_purpose') - op.drop_table('system_purpose') - op.drop_index(op.f('ix_data_consumer_purpose_id'), table_name='data_consumer_purpose') - op.drop_index(op.f('ix_data_consumer_purpose_data_purpose_id'), table_name='data_consumer_purpose') - op.drop_index(op.f('ix_data_consumer_purpose_data_consumer_id'), table_name='data_consumer_purpose') - op.drop_table('data_consumer_purpose') - op.drop_index(op.f('ix_data_purpose_id'), table_name='data_purpose') - op.drop_index(op.f('ix_data_purpose_fides_key'), table_name='data_purpose') - op.drop_index(op.f('ix_data_purpose_data_use'), table_name='data_purpose') - op.drop_table('data_purpose') - op.drop_index(op.f('ix_data_consumer_type'), table_name='data_consumer') - op.drop_index(op.f('ix_data_consumer_id'), table_name='data_consumer') - op.drop_table('data_consumer') - # ### end Alembic commands ### + op.drop_constraint("fk_ctl_datasets_data_producer_id", "ctl_datasets", type_="foreignkey") + op.drop_column("ctl_datasets", "data_producer_id") + op.drop_column("ctl_datasets", "data_purposes") + op.drop_index(op.f("ix_data_producer_member_user_id"), table_name="data_producer_member") + op.drop_index(op.f("ix_data_producer_member_id"), table_name="data_producer_member") + op.drop_index(op.f("ix_data_producer_member_data_producer_id"), table_name="data_producer_member") + op.drop_table("data_producer_member") + op.drop_index(op.f("ix_data_producer_id"), table_name="data_producer") + op.drop_table("data_producer") + op.drop_index(op.f("ix_system_purpose_system_id"), table_name="system_purpose") + op.drop_index(op.f("ix_system_purpose_id"), table_name="system_purpose") + op.drop_index(op.f("ix_system_purpose_data_purpose_id"), table_name="system_purpose") + op.drop_table("system_purpose") + op.drop_index(op.f("ix_data_consumer_purpose_id"), table_name="data_consumer_purpose") + op.drop_index(op.f("ix_data_consumer_purpose_data_purpose_id"), table_name="data_consumer_purpose") + op.drop_index(op.f("ix_data_consumer_purpose_data_consumer_id"), table_name="data_consumer_purpose") + op.drop_table("data_consumer_purpose") + op.drop_index(op.f("ix_data_purpose_id"), table_name="data_purpose") + op.drop_index(op.f("ix_data_purpose_fides_key"), table_name="data_purpose") + op.drop_index(op.f("ix_data_purpose_data_use"), table_name="data_purpose") + op.drop_table("data_purpose") + op.drop_index(op.f("ix_data_consumer_type"), table_name="data_consumer") + op.drop_index(op.f("ix_data_consumer_id"), table_name="data_consumer") + op.drop_table("data_consumer") From 3ffc6421f4c00618c82d775239bc8cbfeaec3f61 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Mon, 16 Mar 2026 23:21:20 -0700 Subject: [PATCH 17/17] fix: complete truncated data_purpose migration and add missing data_consumer_purpose table The migration had a truncated line (sa on its own) and was missing: - data_purpose columns (data_use, data_subject, data_categories, etc.) - data_consumer_purpose join table creation - data_purpose indexes Co-Authored-By: Claude Opus 4.6 (1M context) --- ...31c_add_purpose_based_data_model_tables.py | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py b/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py index fc7aeaba887..ca4d37f4d6e 100644 --- a/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py +++ b/src/fides/api/alembic/migrations/versions/xx_2026_03_12_0358_7ba8b184d31c_add_purpose_based_data_model_tables.py @@ -48,8 +48,33 @@ def upgrade(): sa.Column("description", sa.Text(), nullable=True), sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), - sa - sa.PrimaryKeyConstraint("id", "fides_key") + sa.Column("data_use", sa.String(), nullable=False), + sa.Column("data_subject", sa.String(), nullable=True), + sa.Column("data_categories", postgresql.ARRAY(sa.String()), server_default="{}", nullable=True), + sa.Column("legal_basis_for_processing", sa.String(), nullable=True), + sa.Column("flexible_legal_basis_for_processing", sa.Boolean(), server_default="t", nullable=False), + sa.Column("special_category_legal_basis", sa.String(), nullable=True), + sa.Column("impact_assessment_location", sa.String(), nullable=True), + sa.Column("retention_period", sa.String(), nullable=True), + sa.Column("features", postgresql.ARRAY(sa.String()), server_default="{}", nullable=True), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("fides_key", name="uq_data_purpose_fides_key") + ) + op.create_index(op.f("ix_data_purpose_id"), "data_purpose", ["id"], unique=False) + op.create_index(op.f("ix_data_purpose_fides_key"), "data_purpose", ["fides_key"], unique=True) + op.create_index(op.f("ix_data_purpose_data_use"), "data_purpose", ["data_use"], unique=False) + op.create_table("data_consumer_purpose", + sa.Column("id", sa.String(length=255), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True), + sa.Column("data_consumer_id", sa.String(), nullable=False), + sa.Column("data_purpose_id", sa.String(), nullable=False), + sa.Column("assigned_by", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["assigned_by"], ["fidesuser.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint(["data_consumer_id"], ["data_consumer.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["data_purpose_id"], ["data_purpose.id"], ondelete="RESTRICT"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("data_consumer_id", "data_purpose_id", name="uq_consumer_purpose") ) op.create_index(op.f("ix_data_consumer_purpose_data_consumer_id"), "data_consumer_purpose", ["data_consumer_id"], unique=False) op.create_index(op.f("ix_data_consumer_purpose_data_purpose_id"), "data_consumer_purpose", ["data_purpose_id"], unique=False)