Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,200 changes: 2,200 additions & 0 deletions docs/superpowers/plans/2026-03-11-purpose-based-data-model.md

Large diffs are not rendered by default.

549 changes: 549 additions & 0 deletions docs/superpowers/specs/2026-03-11-purpose-based-data-model-design.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""add purpose based data model tables

Revision ID: 7ba8b184d31c
Revises: 4ac4864180db
Create Date: 2026-03-12 03:58:30.461412

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "7ba8b184d31c"
down_revision = "4ac4864180db"
branch_labels = None
depends_on = None


def upgrade():
op.create_table("data_consumer",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("name", sa.String(), nullable=False),
sa.Column("description", sa.String(), nullable=True),
sa.Column("type", sa.String(), nullable=False),
sa.Column("external_id", sa.String(), nullable=True),
sa.Column("egress", sa.JSON(), nullable=True),
sa.Column("ingress", sa.JSON(), nullable=True),
sa.Column("data_shared_with_third_parties", sa.Boolean(), server_default="f", nullable=False),
sa.Column("third_parties", sa.String(), nullable=True),
sa.Column("shared_categories", sa.ARRAY(sa.String()), server_default="{}", nullable=False),
sa.Column("contact_email", sa.String(), nullable=True),
sa.Column("contact_slack_channel", sa.String(), nullable=True),
sa.Column("contact_details", sa.JSON(), nullable=True),
sa.Column("tags", sa.ARRAY(sa.String()), server_default="{}", nullable=False),
sa.CheckConstraint("type != 'system'", name="ck_data_consumer_not_system"),
sa.PrimaryKeyConstraint("id")
)
op.create_index(op.f("ix_data_consumer_id"), "data_consumer", ["id"], unique=False)
op.create_index(op.f("ix_data_consumer_type"), "data_consumer", ["type"], unique=False)
op.create_table("data_purpose",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column("fides_key", sa.String(), nullable=False),
sa.Column("organization_fides_key", sa.Text(), nullable=True),
sa.Column("tags", postgresql.ARRAY(sa.String()), nullable=True),
sa.Column("name", sa.Text(), nullable=False),
sa.Column("description", sa.Text(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("data_use", sa.String(), nullable=False),
sa.Column("data_subject", sa.String(), nullable=True),
sa.Column("data_categories", postgresql.ARRAY(sa.String()), server_default="{}", nullable=True),
sa.Column("legal_basis_for_processing", sa.String(), nullable=True),
sa.Column("flexible_legal_basis_for_processing", sa.Boolean(), server_default="t", nullable=False),
sa.Column("special_category_legal_basis", sa.String(), nullable=True),
sa.Column("impact_assessment_location", sa.String(), nullable=True),
sa.Column("retention_period", sa.String(), nullable=True),
sa.Column("features", postgresql.ARRAY(sa.String()), server_default="{}", nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("fides_key", name="uq_data_purpose_fides_key")
)
op.create_index(op.f("ix_data_purpose_id"), "data_purpose", ["id"], unique=False)
op.create_index(op.f("ix_data_purpose_fides_key"), "data_purpose", ["fides_key"], unique=True)
op.create_index(op.f("ix_data_purpose_data_use"), "data_purpose", ["data_use"], unique=False)
op.create_table("data_consumer_purpose",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("data_consumer_id", sa.String(), nullable=False),
sa.Column("data_purpose_id", sa.String(), nullable=False),
sa.Column("assigned_by", sa.String(), nullable=True),
sa.ForeignKeyConstraint(["assigned_by"], ["fidesuser.id"], ondelete="SET NULL"),
sa.ForeignKeyConstraint(["data_consumer_id"], ["data_consumer.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["data_purpose_id"], ["data_purpose.id"], ondelete="RESTRICT"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("data_consumer_id", "data_purpose_id", name="uq_consumer_purpose")
)
op.create_index(op.f("ix_data_consumer_purpose_data_consumer_id"), "data_consumer_purpose", ["data_consumer_id"], unique=False)
op.create_index(op.f("ix_data_consumer_purpose_data_purpose_id"), "data_consumer_purpose", ["data_purpose_id"], unique=False)
op.create_index(op.f("ix_data_consumer_purpose_id"), "data_consumer_purpose", ["id"], unique=False)
op.create_table("system_purpose",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("system_id", sa.String(), nullable=False),
sa.Column("data_purpose_id", sa.String(), nullable=False),
sa.Column("assigned_by", sa.String(), nullable=True),
sa.ForeignKeyConstraint(["assigned_by"], ["fidesuser.id"], ondelete="SET NULL"),
sa.ForeignKeyConstraint(["data_purpose_id"], ["data_purpose.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["system_id"], ["ctl_systems.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("system_id", "data_purpose_id", name="uq_system_purpose")
)
op.create_index(op.f("ix_system_purpose_data_purpose_id"), "system_purpose", ["data_purpose_id"], unique=False)
op.create_index(op.f("ix_system_purpose_id"), "system_purpose", ["id"], unique=False)
op.create_index(op.f("ix_system_purpose_system_id"), "system_purpose", ["system_id"], unique=False)
op.create_table("data_producer",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("name", sa.String(), nullable=False),
sa.Column("description", sa.String(), nullable=True),
sa.Column("external_id", sa.String(), nullable=True),
sa.Column("monitor_id", sa.String(), nullable=True),
sa.Column("contact_email", sa.String(), nullable=True),
sa.Column("contact_slack_channel", sa.String(), nullable=True),
sa.Column("contact_details", sa.JSON(), nullable=True),
sa.ForeignKeyConstraint(["monitor_id"], ["monitorconfig.id"], ),
sa.PrimaryKeyConstraint("id")
)
op.create_index(op.f("ix_data_producer_id"), "data_producer", ["id"], unique=False)
op.create_table("data_producer_member",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True),
sa.Column("data_producer_id", sa.String(), nullable=False),
sa.Column("user_id", sa.String(), nullable=False),
sa.ForeignKeyConstraint(["data_producer_id"], ["data_producer.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["user_id"], ["fidesuser.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("data_producer_id", "user_id", name="uq_data_producer_member")
)
op.create_index(op.f("ix_data_producer_member_data_producer_id"), "data_producer_member", ["data_producer_id"], unique=False)
op.create_index(op.f("ix_data_producer_member_id"), "data_producer_member", ["id"], unique=False)
op.create_index(op.f("ix_data_producer_member_user_id"), "data_producer_member", ["user_id"], unique=False)
op.add_column("ctl_datasets", sa.Column("data_purposes", postgresql.ARRAY(sa.String()), server_default="{}", nullable=True))
op.add_column("ctl_datasets", sa.Column("data_producer_id", sa.String(), nullable=True))
op.create_foreign_key("fk_ctl_datasets_data_producer_id", "ctl_datasets", "data_producer", ["data_producer_id"], ["id"], ondelete="SET NULL")
# ### end Alembic commands ###


def downgrade():
op.drop_constraint("fk_ctl_datasets_data_producer_id", "ctl_datasets", type_="foreignkey")
op.drop_column("ctl_datasets", "data_producer_id")
op.drop_column("ctl_datasets", "data_purposes")
op.drop_index(op.f("ix_data_producer_member_user_id"), table_name="data_producer_member")
op.drop_index(op.f("ix_data_producer_member_id"), table_name="data_producer_member")
op.drop_index(op.f("ix_data_producer_member_data_producer_id"), table_name="data_producer_member")
op.drop_table("data_producer_member")
op.drop_index(op.f("ix_data_producer_id"), table_name="data_producer")
op.drop_table("data_producer")
op.drop_index(op.f("ix_system_purpose_system_id"), table_name="system_purpose")
op.drop_index(op.f("ix_system_purpose_id"), table_name="system_purpose")
op.drop_index(op.f("ix_system_purpose_data_purpose_id"), table_name="system_purpose")
op.drop_table("system_purpose")
op.drop_index(op.f("ix_data_consumer_purpose_id"), table_name="data_consumer_purpose")
op.drop_index(op.f("ix_data_consumer_purpose_data_purpose_id"), table_name="data_consumer_purpose")
op.drop_index(op.f("ix_data_consumer_purpose_data_consumer_id"), table_name="data_consumer_purpose")
op.drop_table("data_consumer_purpose")
op.drop_index(op.f("ix_data_purpose_id"), table_name="data_purpose")
op.drop_index(op.f("ix_data_purpose_fides_key"), table_name="data_purpose")
op.drop_index(op.f("ix_data_purpose_data_use"), table_name="data_purpose")
op.drop_table("data_purpose")
op.drop_index(op.f("ix_data_consumer_type"), table_name="data_consumer")
op.drop_index(op.f("ix_data_consumer_id"), table_name="data_consumer")
op.drop_table("data_consumer")
10 changes: 10 additions & 0 deletions src/fides/api/db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@
from fides.api.models.custom_asset import CustomAsset
from fides.api.models.custom_connector_template import CustomConnectorTemplate
from fides.api.models.custom_report import CustomReport
from fides.api.models.data_consumer import ( # noqa: F401
DataConsumer,
DataConsumerPurpose,
)
from fides.api.models.data_producer import ( # noqa: F401
DataProducer,
DataProducerMember,
)
from fides.api.models.data_purpose import DataPurpose # noqa: F401
from fides.api.models.datasetconfig import DatasetConfig
from fides.api.models.db_cache import DBCache
from fides.api.models.detection_discovery.core import MonitorConfig, StagedResource
Expand Down Expand Up @@ -97,6 +106,7 @@
from fides.api.models.system_group import SystemGroup, SystemGroupMember
from fides.api.models.system_history import SystemHistory
from fides.api.models.system_manager import SystemManager
from fides.api.models.system_purpose import SystemPurpose # noqa: F401
from fides.api.models.taxonomy import (
Taxonomy,
TaxonomyAllowedUsage,
Expand Down
104 changes: 104 additions & 0 deletions src/fides/api/models/data_consumer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any

from sqlalchemy import (
ARRAY,
JSON,
Boolean,
CheckConstraint,
Column,
ForeignKey,
String,
UniqueConstraint,
)
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.orm import Session, relationship

from fides.api.db.base_class import Base

if TYPE_CHECKING:
from fides.api.models.data_purpose import DataPurpose # noqa: F401


class DataConsumer(Base):
"""
Non-system data consumers (groups, projects, custom types).
System-type consumers are surfaced via a facade over ctl_systems.
"""

@declared_attr
def __tablename__(self) -> str:
return "data_consumer"

__table_args__ = (
CheckConstraint("type != 'system'", name="ck_data_consumer_not_system"),
)

name = Column(String, nullable=False)
description = Column(String, nullable=True)
type = Column(String, nullable=False, index=True)
external_id = Column(String, nullable=True)
egress = Column(JSON, nullable=True)
ingress = Column(JSON, nullable=True)
data_shared_with_third_parties = Column(Boolean, server_default="f", nullable=False)
third_parties = Column(String, nullable=True)
shared_categories = Column(ARRAY(String), server_default="{}", nullable=False)
contact_email = Column(String, nullable=True)
contact_slack_channel = Column(String, nullable=True)
contact_details = Column(JSON, nullable=True)
tags = Column(ARRAY(String), server_default="{}", nullable=False)

consumer_purposes = relationship(
"DataConsumerPurpose",
cascade="all, delete-orphan",
lazy="selectin",
)

@classmethod
def create(
cls,
db: Session,
*,
data: dict[str, Any],
check_name: bool = False,
) -> "DataConsumer":
"""Override create to skip name uniqueness check.
Multiple consumers can share a name."""
return super().create(db=db, data=data, check_name=check_name)


class DataConsumerPurpose(Base):
"""
Audited join table linking a non-system DataConsumer to a DataPurpose.
"""

__tablename__ = "data_consumer_purpose" # type: ignore[assignment]
__table_args__ = (
UniqueConstraint(
"data_consumer_id", "data_purpose_id", name="uq_data_consumer_purpose"
),
)

data_consumer_id = Column(
String,
ForeignKey("data_consumer.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
data_purpose_id = Column(
String,
ForeignKey("data_purpose.id", ondelete="RESTRICT"),
nullable=False,
index=True,
)
assigned_by = Column(
String,
ForeignKey("fidesuser.id", ondelete="SET NULL"),
nullable=True,
)

data_consumer = relationship(
"DataConsumer", lazy="selectin", overlaps="consumer_purposes"
) # type: ignore[call-arg]
data_purpose = relationship("DataPurpose", lazy="selectin")
82 changes: 82 additions & 0 deletions src/fides/api/models/data_producer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any

from sqlalchemy import JSON, Column, ForeignKey, String, UniqueConstraint
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.orm import Session, relationship

from fides.api.db.base_class import Base

if TYPE_CHECKING:
from fides.api.models.detection_discovery import MonitorConfig # noqa: F401
from fides.api.models.fides_user import FidesUser # noqa: F401


class DataProducer(Base):
"""
Represents a team or group responsible for data registration
and purpose assignment to datasets.
"""

@declared_attr
def __tablename__(self) -> str:
return "data_producer"

name = Column(String, nullable=False)
description = Column(String, nullable=True)
external_id = Column(String, nullable=True)
monitor_id = Column(
String,
ForeignKey("monitorconfig.id"),
nullable=True,
)
contact_email = Column(String, nullable=True)
contact_slack_channel = Column(String, nullable=True)
contact_details = Column(JSON, nullable=True)

members = relationship(
"DataProducerMember",
cascade="all, delete-orphan",
lazy="selectin",
)
monitor = relationship("MonitorConfig", lazy="selectin")

@classmethod
def create(
cls,
db: Session,
*,
data: dict[str, Any],
check_name: bool = False,
) -> "DataProducer":
"""Override create to skip name uniqueness check.
Multiple producers can share a name."""
return super().create(db=db, data=data, check_name=check_name)


class DataProducerMember(Base):
"""
Join table linking a DataProducer to FidesUser members.
"""

__tablename__ = "data_producer_member" # type: ignore[assignment]
__table_args__ = (
UniqueConstraint("data_producer_id", "user_id", name="uq_data_producer_member"),
)

data_producer_id = Column(
String,
ForeignKey("data_producer.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
user_id = Column(
String,
ForeignKey("fidesuser.id", ondelete="CASCADE"),
nullable=False,
index=True,
)

data_producer = relationship("DataProducer", lazy="selectin", overlaps="members") # type: ignore[call-arg]
user = relationship("FidesUser", lazy="selectin")
Loading
Loading