diff --git a/.gitignore b/.gitignore
index e43b0f988..fe1fdc20d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
 .DS_Store
+.agents/
+.config/agents/
+agents.md
diff --git a/gateway/.envs/example/minio.env b/gateway/.envs/example/minio.env
deleted file mode 100644
index 965d69133..000000000
--- a/gateway/.envs/example/minio.env
+++ /dev/null
@@ -1,13 +0,0 @@
-# -------------------------------------------------------
-# ====================== LOCAL ENV ======================
-# MINIO Config
-MINIO_ROOT_USER=minioadmin
-MINIO_ROOT_PASSWORD=<SAME AS AWS_SECRET_ACCESS_KEY>
-MINIO_ENDPOINT_URL=minio:9000
-MINIO_STORAGE_USE_HTTPS=false # prod: true
-
-# AWS S3 Config
-AWS_ACCESS_KEY_ID=minioadmin
-AWS_SECRET_ACCESS_KEY=<SAME AS MINIO_ROOT_PASSWORD>
-AWS_STORAGE_BUCKET_NAME=spectrumx
-AWS_S3_ENDPOINT_URL=http://minio:9000
diff --git a/gateway/.envs/example/storage.env b/gateway/.envs/example/storage.env
new file mode 100644
index 000000000..dcabfbf95
--- /dev/null
+++ b/gateway/.envs/example/storage.env
@@ -0,0 +1,24 @@
+# ====================== STORAGE ENV ======================
+# PRIMARY (RustFS) — S3-compatible storage, default for local/CI
+# SECONDARY (SeaweedFS) — S3-compatible object store for local/dev
+
+# PRIMARY (RustFS) credentials
+PRIMARY_ACCESS_KEY_ID=admin
+PRIMARY_ENDPOINT_URL=sds-gateway-local-rustfs:9000
+PRIMARY_S3_ENDPOINT_URL=http://sds-gateway-local-rustfs:9000
+PRIMARY_SECRET_ACCESS_KEY=admin
+PRIMARY_STORAGE_BUCKET_NAME=spectrumx
+PRIMARY_STORAGE_USE_HTTPS=false
+
+# SECONDARY (SeaweedFS) credentials
+SECONDARY_ACCESS_KEY_ID=admin
+SECONDARY_SECRET_ACCESS_KEY=admin
+SECONDARY_ENDPOINT_URL=sds-gateway-local-sfs-s3:8333
+SECONDARY_S3_ENDPOINT_URL=http://sds-gateway-local-sfs-s3:8333
+SECONDARY_STORAGE_BUCKET_NAME=spectrumx
+SECONDARY_STORAGE_USE_HTTPS=false
+
+# Transition controls
+OBJECT_STORE_DUAL_WRITE_STRICT=false
+OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED=false
+OBJECT_STORE_WRITE_BOTH_ENABLED=false
diff --git a/gateway/.envs/example/storage.prod.env b/gateway/.envs/example/storage.prod.env
new file mode 100644
index 000000000..426d967a7
--- /dev/null
+++ b/gateway/.envs/example/storage.prod.env
@@ -0,0 +1,26 @@
+# ====================== STORAGE ENV (PRODUCTION) ======================
+# SeaweedFS config — see seaweedfs/compose.production.yaml
+# RustFS config — see gateway/compose.<env>.yaml
+
+# PRIMARY credentials (RustFS in local and ci, SeaweedFS in prod)
+PRIMARY_ACCESS_KEY_ID=admin
+PRIMARY_ENDPOINT_URL=sds-gateway-prod-sfs-s3:8333
+PRIMARY_S3_ENDPOINT_URL=http://sds-gateway-prod-sfs-s3:8333
+PRIMARY_SECRET_ACCESS_KEY=admin
+PRIMARY_STORAGE_BUCKET_NAME=spectrumx
+PRIMARY_STORAGE_USE_HTTPS=false
+
+# SECONDARY credentials (usually RustFS in prod; absent in local and ci)
+SECONDARY_ACCESS_KEY_ID=minioadmin
+SECONDARY_ENDPOINT_URL=prod-secondary-rustfs:9000
+SECONDARY_ROOT_PASSWORD=<GENERATED SECONDARY ROOT PASSWORD>
+SECONDARY_ROOT_USER=minioadmin
+SECONDARY_S3_ENDPOINT_URL=http://prod-secondary-rustfs:9000
+SECONDARY_SECRET_ACCESS_KEY=<SAME AS SECONDARY_ROOT_PASSWORD>
+SECONDARY_STORAGE_BUCKET_NAME=spectrumx
+SECONDARY_STORAGE_USE_HTTPS=false
+
+# Transition controls
+OBJECT_STORE_DUAL_WRITE_STRICT=false
+OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED=false
+OBJECT_STORE_WRITE_BOTH_ENABLED=false
diff --git a/gateway/.github/workflows/ci.yml b/gateway/.github/workflows/ci.yml
deleted file mode 100644
index d490ab4e2..000000000
--- a/gateway/.github/workflows/ci.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-name: CI
-
-# Enable Buildkit and let compose use it to speed up image building
-env:
-    DOCKER_BUILDKIT: 1
-    COMPOSE_DOCKER_CLI_BUILD: 1
-
-on:
-    workflow_dispatch:
-        # To manually trigger the workflow
-        # https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflow_dispatch
-
-    pull_request:
-        types: ["ready_for_review", "synchronize"]
-        branches: ["master", "main"]
-        paths-ignore: ["docs/**"]
-
-    push:
-        branches: ["master", "main"]
-        paths-ignore: ["docs/**"]
-
-concurrency:
-    group: ${{ github.head_ref || github.run_id }}
-    cancel-in-progress: true
-
-jobs:
-    linter:
-        runs-on: ubuntu-latest
-        steps:
-            - name: Checkout Code Repository
-              uses: actions/checkout@v4
-
-            - name: Set up Python
-              uses: actions/setup-python@v6
-              with:
-                  python-version: "3.12"
-            # Consider using pre-commit.ci for open source project
-            - name: Run pre-commit
-              uses: pre-commit/action@v3.0.1
-
-    # With no caching at all the entire ci process takes 3m to complete!
-    pytest:
-        runs-on: ubuntu-latest
-
-        steps:
-            - name: Checkout Code Repository
-              uses: actions/checkout@v4
-
-            - name: Build the Stack
-              run: docker compose -f compose.local.yaml build django
-
-            - name: Build the docs
-              run: docker compose -f compose.docs.yaml build docs
-
-            - name: Run DB Migrations
-              run: docker compose -f compose.local.yaml run --rm django uv run manage.py migrate
-
-            - name: Run Django Tests
-              run: docker compose -f compose.local.yaml run --rm django uv run manage.py test
-
-            - name: Tear down the Stack
-              run: docker compose -f compose.local.yaml down
diff --git a/gateway/compose.ci.yaml b/gateway/compose.ci.yaml
index 14ea08c80..ae9350c17 100644
--- a/gateway/compose.ci.yaml
+++ b/gateway/compose.ci.yaml
@@ -13,7 +13,7 @@ volumes:
     sds-gateway-ci-uv-venv-worker: {}
     sds-gateway-ci-uv-venv-beat: {}
     sds-gateway-ci-uv-venv-flower: {}
-    sds-gateway-ci-minio-files: {}
+    sds-gateway-ci-rustfs-files: {}
     sds-gateway-ci-opensearch-data: {}
     sds-gateway-ci-postgres-data-backups: {}
     sds-gateway-ci-postgres-data: {}
@@ -21,15 +21,15 @@ volumes:
 
 networks:
     # for safety, all gateway CI networks start with "sds-gateway-ci-"
-    sds-gateway-ci-minio-net:
+    sds-gateway-ci-rustfs-net:
         driver: bridge
     sds-gateway-ci-opensearch-net:
         driver: bridge
+    sds-gateway-ci-postgres-net:
+        driver: bridge
     sds-network-ci:
-        # external: true # make it external if running with traefik on this machine
-        # should match traefik's network name
+        external: true
         name: sds-network-ci
-        driver: bridge
 services:
     sds-gateway-ci-app:
         build:
@@ -45,8 +45,6 @@ services:
                 condition: service_healthy
             redis:
                 condition: service_healthy
-            minio:
-                condition: service_healthy
         volumes:
             - sds-gateway-ci-uv-cache:/opt/uv-cache/
             - sds-gateway-ci-uv-venv-app:/opt/uv-venv/
@@ -74,7 +72,7 @@ services:
             # - ./staticfiles/:/app/staticfiles/:z  # used in prod only
         env_file:
             - ./.envs/ci/django.env
-            - ./.envs/ci/minio.env
+            - ./.envs/ci/storage.env # PRIMARY (RustFS) — local/CI: primary only, no secondary
             - ./.envs/ci/postgres.env
             - ./.envs/ci/opensearch.env
         # remember /entrypoint runs first
@@ -82,8 +80,9 @@ services:
         ports:
             - "8000:8000" # make sure this port matches traefik's config, if used
         networks:
-            - sds-gateway-ci-minio-net
+            - sds-gateway-ci-rustfs-net
             - sds-gateway-ci-opensearch-net
+            - sds-gateway-ci-postgres-net
             - sds-network-ci
         healthcheck:
             test: ["CMD-SHELL", "curl -f http://localhost:8000/ || exit 1"]
@@ -124,34 +123,49 @@ services:
                   selinux: z
         networks:
             - sds-network-ci
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    "wget -q -O /dev/null http://localhost/ || exit 1",
+                ]
+            interval: 30s
+            timeout: 5s
+            retries: 5
+            start_period: 10s
 
-    minio:
-        # main file storage for sds
-        # minio uses rolling upgrades that are non-disruptive, so we can target latest
-        # For more information on how to upgrade MinIO deployment, refer to the MinIO documentation:
-        # https://min.io/docs/minio/container/operations/install-deploy-manage/upgrade-minio-deployment.html
-        image: minio/minio:latest
-        container_name: sds-gateway-ci-minio
+    # Primary storage (RustFS) — S3-compatible, default for local/CI
+    rustfs:
+        image: rustfs/rustfs:latest
+        container_name: sds-gateway-ci-rustfs
         volumes:
-            - sds-gateway-ci-minio-files:/files
+            - sds-gateway-ci-rustfs-files:/data
         ports:
-            - "9000:9000"
+            - "19000:9000"
             - "9001:9001"
         env_file:
-            - ./.envs/ci/minio.env
+            - ./.envs/ci/storage.env
+        environment:
+            - RUSTFS_VOLUMES=/data
+            - RUSTFS_ADDRESS=0.0.0.0:9000
+            - RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9001
+            - RUSTFS_CONSOLE_ENABLE=true
+            - RUSTFS_CORS_ALLOWED_ORIGINS=*
+            - RUSTFS_CONSOLE_CORS_ALLOWED_ORIGINS=*
+            - RUSTFS_ACCESS_KEY=${PRIMARY_ACCESS_KEY_ID}
+            - RUSTFS_SECRET_KEY=${PRIMARY_SECRET_ACCESS_KEY}
+        networks:
+            - sds-gateway-ci-rustfs-net
         healthcheck:
             test:
                 [
                     "CMD-SHELL",
-                    "curl -f http://localhost:9000/minio/health/live || exit 1",
+                    "curl -f http://localhost:9000/rustfs/console/health || exit 1",
                 ]
             interval: 30s
             timeout: 5s
             retries: 5
             start_period: 10s
-        command: 'server /files --console-address ":9001"'
-        networks:
-            - sds-gateway-ci-minio-net
 
     opensearch:
         # used for indexing and searching documents
@@ -191,7 +205,7 @@ services:
         build:
             context: .
             dockerfile: ./compose/production/postgres/Dockerfile
-            # this dockerfile is used for both local/CI and prod
+            # this dockerfile is used for both local and prod
         image: sds-gateway-ci-postgres
         container_name: sds-gateway-ci-postgres
         volumes:
@@ -200,7 +214,7 @@ services:
         env_file:
             - ./.envs/ci/postgres.env
         networks:
-            - sds-gateway-ci-minio-net
+            - sds-gateway-ci-postgres-net
         healthcheck:
             test:
                 [
@@ -264,14 +278,25 @@ services:
                   selinux: z
         env_file:
             - ./.envs/ci/django.env
-            - ./.envs/ci/minio.env
+            - ./.envs/ci/storage.env # PRIMARY (RustFS) — local/CI: primary only, no secondary
             - ./.envs/ci/postgres.env
             - ./.envs/ci/opensearch.env
         command: "/worker-start"
         networks:
-            - sds-gateway-ci-minio-net
+            - sds-gateway-ci-rustfs-net
             - sds-gateway-ci-opensearch-net
+            - sds-gateway-ci-postgres-net
             - sds-network-ci
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    'uv run celery -A config.celery_app inspect ping -d "celery@$$HOSTNAME" | grep -q "OK"',
+                ]
+            interval: 30s
+            timeout: 30s
+            retries: 5
+            start_period: 30s
 
     celery-beat:
         # Celery Beat scheduler for periodic tasks
@@ -309,14 +334,25 @@ services:
                   selinux: z
         env_file:
             - ./.envs/ci/django.env
-            - ./.envs/ci/minio.env
+            - ./.envs/ci/storage.env # PRIMARY (RustFS) — local/CI: primary only, no secondary
             - ./.envs/ci/postgres.env
             - ./.envs/ci/opensearch.env
         command: "/beat-start"
         networks:
-            - sds-gateway-ci-minio-net
+            - sds-gateway-ci-rustfs-net
             - sds-gateway-ci-opensearch-net
+            - sds-gateway-ci-postgres-net
             - sds-network-ci
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    'uv run python -c "import pathlib,sys; ok=any((b\"beat\" in data) and ((b\"celery\" in data) or (b\"watchfiles\" in data)) for data in (path.read_bytes() for path in pathlib.Path(\"/proc\").glob(\"[0-9]*/cmdline\"))); sys.exit(0 if ok else 1)"',
+                ]
+            interval: 30s
+            timeout: 30s
+            retries: 5
+            start_period: 30s
 
     celery-flower:
         # Celery monitoring and administration tool
@@ -354,16 +390,27 @@ services:
                   selinux: z
         env_file:
             - ./.envs/ci/django.env
-            - ./.envs/ci/minio.env
+            - ./.envs/ci/storage.env # PRIMARY (RustFS) — local/CI: primary only, no secondary
             - ./.envs/ci/postgres.env
             - ./.envs/ci/opensearch.env
         command: "/flower-start"
         ports:
             - "5555:5555" # Flower web interface
         networks:
-            - sds-gateway-ci-minio-net
+            - sds-gateway-ci-rustfs-net
             - sds-gateway-ci-opensearch-net
+            - sds-gateway-ci-postgres-net
             - sds-network-ci
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    'curl -f --header "Authorization: Basic $(echo -n "$$CELERY_FLOWER_USER:$$CELERY_FLOWER_PASSWORD" | base64)" http://localhost:5555/api/workers || exit 1',
+                ]
+            interval: 30s
+            timeout: 30s
+            retries: 5
+            start_period: 30s
 
     # ==========================
     # local development services
@@ -395,6 +442,16 @@ services:
                 - action: sync
                   path: ./
                   target: /app/
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    'node -e "const http=require(\"http\"); const req=http.get(\"http://127.0.0.1:3000\", res => process.exit(res.statusCode < 500 ? 0 : 1)); req.on(\"error\", () => process.exit(1)); req.setTimeout(5000, () => { req.destroy(); process.exit(1); });"',
+                ]
+            interval: 30s
+            timeout: 10s
+            retries: 5
+            start_period: 45s
 
     mailhog:
         # email testing service for local development
@@ -405,3 +462,13 @@ services:
             - "8025:8025" # Web UI
         networks:
             - sds-network-ci
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    "wget -q -O /dev/null http://localhost:8025/api/v2/messages || exit 1",
+                ]
+            interval: 30s
+            timeout: 5s
+            retries: 5
+            start_period: 10s
diff --git a/gateway/compose.local.yaml b/gateway/compose.local.yaml
index b0358c8ca..5e687ca50 100644
--- a/gateway/compose.local.yaml
+++ b/gateway/compose.local.yaml
@@ -13,7 +13,7 @@ volumes:
     sds-gateway-local-uv-venv-worker: {}
     sds-gateway-local-uv-venv-beat: {}
     sds-gateway-local-uv-venv-flower: {}
-    sds-gateway-local-minio-files: {}
+    sds-gateway-local-rustfs-files: {}
     sds-gateway-local-opensearch-data: {}
     sds-gateway-local-postgres-data-backups: {}
     sds-gateway-local-postgres-data: {}
@@ -21,12 +21,18 @@ volumes:
 
 networks:
     # for safety, all gateway local networks start with "sds-gateway-local-"
-    sds-gateway-local-minio-net:
+    sds-gateway-local-rustfs-net:
         driver: bridge
+        name: sds-gateway-local-rustfs-net
     sds-gateway-local-opensearch-net:
         driver: bridge
+        name: sds-gateway-local-opensearch-net
+    sds-gateway-local-postgres-net:
+        driver: bridge
+        name: sds-gateway-local-postgres-net
     sds-network-local:
-        # external: true # make it external if running with traefik on this machine
+        # externally defined in traefik and/or in the primary storage compose file
+        external: true
         # should match traefik's network name
         name: sds-network-local
         driver: bridge
@@ -45,8 +51,6 @@ services:
                 condition: service_healthy
             redis:
                 condition: service_healthy
-            minio:
-                condition: service_healthy
         volumes:
             - sds-gateway-local-uv-cache:/opt/uv-cache/
             - sds-gateway-local-uv-venv-app:/opt/uv-venv/
@@ -74,7 +78,7 @@ services:
             # - ./staticfiles/:/app/staticfiles/:z  # used in prod only
         env_file:
             - ./.envs/local/django.env
-            - ./.envs/local/minio.env
+            - ./.envs/local/storage.env
             - ./.envs/local/postgres.env
             - ./.envs/local/opensearch.env
         # remember /entrypoint runs first
@@ -82,8 +86,9 @@ services:
         ports:
             - "8000:8000" # make sure this port matches traefik's config, if used
         networks:
-            - sds-gateway-local-minio-net
             - sds-gateway-local-opensearch-net
+            - sds-gateway-local-rustfs-net
+            - sds-gateway-local-postgres-net
             - sds-network-local
         healthcheck:
             test: ["CMD-SHELL", "curl -f http://localhost:8000/ || exit 1"]
@@ -124,34 +129,49 @@ services:
                   selinux: z
         networks:
             - sds-network-local
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    "wget -q -O /dev/null http://localhost/healthz || exit 1",
+                ]
+            interval: 30s
+            timeout: 5s
+            retries: 5
+            start_period: 10s
 
-    minio:
-        # main file storage for sds
-        # minio uses rolling upgrades that are non-disruptive, so we can target latest
-        # For more information on how to upgrade MinIO deployment, refer to the MinIO documentation:
-        # https://min.io/docs/minio/container/operations/install-deploy-manage/upgrade-minio-deployment.html
-        image: minio/minio:latest
-        container_name: sds-gateway-local-minio
+    # Primary storage (RustFS) — S3-compatible, default for local/CI
+    rustfs:
+        image: rustfs/rustfs:latest
+        container_name: sds-gateway-local-rustfs
         volumes:
-            - sds-gateway-local-minio-files:/files
+            - sds-gateway-local-rustfs-files:/data
         ports:
-            - "9000:9000"
+            - "19000:9000"
             - "9001:9001"
         env_file:
-            - ./.envs/local/minio.env
+            - ./.envs/local/storage.env
+        environment:
+            - RUSTFS_VOLUMES=/data
+            - RUSTFS_ADDRESS=0.0.0.0:9000
+            - RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9001
+            - RUSTFS_CONSOLE_ENABLE=true
+            - RUSTFS_CORS_ALLOWED_ORIGINS=*
+            - RUSTFS_CONSOLE_CORS_ALLOWED_ORIGINS=*
+            - RUSTFS_ACCESS_KEY=${PRIMARY_ACCESS_KEY_ID}
+            - RUSTFS_SECRET_KEY=${PRIMARY_SECRET_ACCESS_KEY}
+        networks:
+            - sds-gateway-local-rustfs-net
         healthcheck:
             test:
                 [
                     "CMD-SHELL",
-                    "curl -f http://localhost:9000/minio/health/live || exit 1",
+                    "curl -f http://localhost:9000/rustfs/console/health || exit 1",
                 ]
             interval: 30s
             timeout: 5s
             retries: 5
             start_period: 10s
-        command: 'server /files --console-address ":9001"'
-        networks:
-            - sds-gateway-local-minio-net
 
     opensearch:
         # used for indexing and searching documents
@@ -200,7 +220,7 @@ services:
         env_file:
             - ./.envs/local/postgres.env
         networks:
-            - sds-gateway-local-minio-net
+            - sds-gateway-local-postgres-net
         healthcheck:
             test:
                 [
@@ -264,14 +284,26 @@ services:
                   selinux: z
         env_file:
             - ./.envs/local/django.env
-            - ./.envs/local/minio.env
+            - ./.envs/local/storage.env
             - ./.envs/local/postgres.env
             - ./.envs/local/opensearch.env
         command: "/worker-start"
         networks:
-            - sds-gateway-local-minio-net
+            # additional networks are used for health checks
             - sds-gateway-local-opensearch-net
+            - sds-gateway-local-postgres-net
+            - sds-gateway-local-rustfs-net
             - sds-network-local
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    'uv run celery -A config.celery_app inspect ping -d "celery@$$HOSTNAME" | grep -q "OK"',
+                ]
+            interval: 30s
+            timeout: 30s
+            retries: 5
+            start_period: 30s
 
     celery-beat:
         # Celery Beat scheduler for periodic tasks
@@ -309,61 +341,25 @@ services:
                   selinux: z
         env_file:
             - ./.envs/local/django.env
-            - ./.envs/local/minio.env
+            - ./.envs/local/storage.env
             - ./.envs/local/postgres.env
             - ./.envs/local/opensearch.env
         command: "/beat-start"
         networks:
-            - sds-gateway-local-minio-net
-            - sds-gateway-local-opensearch-net
-            - sds-network-local
-
-    celery-flower:
-        # Celery monitoring and administration tool
-        build:
-            context: .
-            dockerfile: ./compose/local/django/Dockerfile
-        image: sds-gateway-local-app
-        container_name: sds-gateway-local-celery-flower
-        tty: true
-        depends_on:
-            sds-gateway-local-app:
-                condition: service_healthy
-        volumes:
-            - sds-gateway-local-uv-cache:/opt/uv-cache/
-            - sds-gateway-local-uv-venv-flower:/opt/uv-venv/
-            - sds-gateway-local-app-media:/app/sds_gateway/media
-            - sds-gateway-local-temp-zips:/app/sds_gateway/media/temp_zips
-            - source: ./sds_gateway/api_methods/migrations
-              target: /app/sds_gateway/api_methods/migrations
-              type: bind
-              read_only: false
-              bind:
-                  selinux: z
-            - source: ./sds_gateway/users/migrations
-              target: /app/sds_gateway/users/migrations
-              type: bind
-              read_only: false
-              bind:
-                  selinux: z
-            - source: ./sds_gateway/visualizations/migrations
-              target: /app/sds_gateway/visualizations/migrations
-              type: bind
-              read_only: false
-              bind:
-                  selinux: z
-        env_file:
-            - ./.envs/local/django.env
-            - ./.envs/local/minio.env
-            - ./.envs/local/postgres.env
-            - ./.envs/local/opensearch.env
-        command: "/flower-start"
-        ports:
-            - "5555:5555" # Flower web interface
-        networks:
-            - sds-gateway-local-minio-net
+            - sds-gateway-local-rustfs-net
             - sds-gateway-local-opensearch-net
+            - sds-gateway-local-postgres-net
             - sds-network-local
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    'curl -f --header "Authorization: Basic $(echo -n "$$CELERY_FLOWER_USER:$$CELERY_FLOWER_PASSWORD" | base64)" http://localhost:5555/api/workers || exit 1',
+                ]
+            interval: 30s
+            timeout: 30s
+            retries: 5
+            start_period: 30s
 
     # ==========================
     # local development services
@@ -395,13 +391,33 @@ services:
                 - action: sync
                   path: ./
                   target: /app/
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    'node -e "const http=require(\"http\"); const req=http.get(\"http://127.0.0.1:3000\", res => process.exit(res.statusCode < 500 ? 0 : 1)); req.on(\"error\", () => process.exit(1)); req.setTimeout(5000, () => { req.destroy(); process.exit(1); });"',
+                ]
+            interval: 30s
+            timeout: 10s
+            retries: 5
+            start_period: 45s
 
     mailhog:
         # email testing service for local development
-        image: mailhog/mailhog:latest
+        image: docker.io/mailhog/mailhog:latest
         container_name: sds-gateway-local-mailhog
         ports:
             - "1025:1025" # SMTP server
             - "8025:8025" # Web UI
         networks:
             - sds-network-local
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    "wget -q -O /dev/null http://localhost:8025/api/v2/messages || exit 1",
+                ]
+            interval: 30s
+            timeout: 5s
+            retries: 5
+            start_period: 10s
diff --git a/gateway/compose.production.yaml b/gateway/compose.production.yaml
index 64922c875..26dac6185 100644
--- a/gateway/compose.production.yaml
+++ b/gateway/compose.production.yaml
@@ -26,6 +26,8 @@ networks:
         driver: bridge
     sds-gateway-prod-opensearch-net:
         driver: bridge
+    sds-gateway-prod-postgres-net:
+        driver: bridge
     sds-network-prod:
         external: true
 
@@ -44,8 +46,6 @@ services:
                 condition: service_started
             redis:
                 condition: service_started
-            minio:
-                condition: service_started
         volumes:
             - source: sds-gateway-prod-app-media
               target: /app/sds_gateway/media
@@ -74,11 +74,12 @@ services:
               type: volume
               read_only: false
         post_start:
-            - command: chown -R django:django /app/sds_gateway/media/ /opt/uv-cache/ /opt/uv-venv/
+            - command: chown -R django:django /app/sds_gateway/media/ /opt/uv-cache/
+                  /opt/uv-venv/
               user: root
         env_file:
             - ./.envs/production/django.env
-            - ./.envs/production/minio.env
+            - ./.envs/production/storage.prod.env
             - ./.envs/production/postgres.env
             - ./.envs/production/opensearch.env
         ports:
@@ -88,10 +89,10 @@ services:
         command: "/start"
         networks:
             - sds-gateway-prod-opensearch-net
-            - sds-gateway-prod-minio-net
-            - sds-network-prod
+            - sds-gateway-prod-postgres-net
+            - sds-network-prod # also carries SeaweedFS S3 traffic — see seaweedfs/compose.yaml
         healthcheck:
-            test: ["CMD-SHELL", "curl -f http://localhost:18000/ || exit 1"]
+            test: [ "CMD-SHELL", "curl -f http://localhost:18000/ || exit 1" ]
             interval: 30s
             timeout: 10s
             retries: 5
@@ -116,12 +117,16 @@ services:
               read_only: true
         networks:
             - sds-network-prod
+        healthcheck:
+            test: [ "CMD-SHELL", "wget -q -O /dev/null http://localhost/healthz || exit 1" ]
+            interval: 30s
+            timeout: 5s
+            retries: 5
+            start_period: 10s
 
-    minio:
-        # main file storage for sds
-        # minio uses rolling upgrades that are non-disruptive, so we can target latest
-        # For more information on how to upgrade MinIO deployment, refer to the MinIO documentation:
-        # https://min.io/docs/minio/container/operations/install-deploy-manage/upgrade-minio-deployment.html
+    # DEPRECATED: being replaced by SeaweedFS. Keep running during migration.
+    # Remove after data migration is complete — see docs/minio-to-sfs-migration.md
+    minio-deprecated:
         image: minio/minio:latest
         container_name: sds-gateway-prod-minio
         volumes:
@@ -130,11 +135,115 @@ services:
             - "19000:9000"
             - "19001:9001"
         env_file:
-            - ./.envs/production/minio.env
+            - ./.envs/production/storage.prod.env
         restart: unless-stopped
-        command: 'server /files --console-address ":9001"'
+        healthcheck:
+            test: [ "CMD-SHELL", "curl -f http://localhost:9000/minio/health/live || exit 1" ]
+            interval: 30s
+            timeout: 5s
+            retries: 5
+            start_period: 10s
+        command: "server /files --console-address \":9001\""
+        networks:
+            - sds-gateway-prod-minio-net
+
+    # prod-secondary-minio:
+    #     # https://min.io/docs/minio/container/operations/install-deploy-manage/upgrade-minio-deployment.html
+    #     image: docker.io/minio/minio:latest
+    #     container_name: sds-gateway-prod-secondary-minio
+    #     volumes:
+    #         - /disk1:/data/disk1
+    #         - /disk2:/data/disk2
+    #         - /disk3:/data/disk3
+    #         # - ./.envs/production/minio-config.json:/tmp/.mc/config.json
+    #     ports:
+    #         - "19100:9000" # deprecated minio S3 API is 19000
+    #         - "19101:9001" # deprecated minio console is 19001
+    #     env_file:
+    #         - ./.envs/production/storage.prod.env
+    #     restart: unless-stopped
+    #     healthcheck:
+    #         test: [ "CMD-SHELL", "curl -f http://localhost:9000/minio/health/live || exit 1" ]
+    #         interval: 30s
+    #         timeout: 5s
+    #         retries: 5
+    #         start_period: 10s
+    #     command: "server --json /data/disk{1...3} --console-address \":9001\""
+    #     networks:
+    #         - sds-gateway-prod-minio-net
+    #     ulimits:
+    #         nofile:
+    #             soft: 131072
+    #             hard: 131072
+
+    # RustFS S3-compatible storage service, used as the secondary storage backend for
+    # the gateway in production. The primary S3 storage backend in production is
+    # SeaweedFS, defined in ../seaweedfs/compose.production.yaml .
+    # At the time of writing, RustFS is not yet ready for production use, so we keep it
+    # as our secondary backend, as redundancy.
+    prod-secondary-rustfs:
+        image: docker.io/rustfs/rustfs:latest
+        container_name: sds-gateway-prod-secondary-rustfs
+        security_opt:
+            - "no-new-privileges:true"
+        ports:
+            - "19400:9000" # S3 API port
+            - "19401:9001" # Console port
+        env_file:
+            - ./.envs/production/storage.prod.env
+        environment:
+            - RUSTFS_VOLUMES=/data/rustfs{1...3}
+            - RUSTFS_ADDRESS=0.0.0.0:9000
+            - RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9001
+            - RUSTFS_CONSOLE_ENABLE=true
+            - RUSTFS_CORS_ALLOWED_ORIGINS=*
+            - RUSTFS_CONSOLE_CORS_ALLOWED_ORIGINS=*
+            # - RUSTFS_ACCESS_KEY=rustfsadmin # CHANGEME
+            # - RUSTFS_SECRET_KEY=rustfsadmin # CHANGEME
+            - RUSTFS_OBS_LOGGER_LEVEL=debug
+            - RUSTFS_TLS_PATH=/opt/tls
+
+        volumes:
+            - /disk6:/data/rustfs1
+            - /disk7:/data/rustfs2
+            - /disk8:/data/rustfs3
+            - sds-gateway-prod-rustfs-logs:/app/logs
         networks:
             - sds-gateway-prod-minio-net
+        ulimits:
+            nofile:
+                soft: 131072
+                hard: 131072
+        restart: unless-stopped
+        healthcheck:
+            test:
+                [
+                    "CMD",
+                    "sh",
+                    "-c",
+                    "curl -f http://127.0.0.1:9000/health && curl -f
+                      http://127.0.0.1:9001/rustfs/console/health",
+                ]
+            interval: 30s
+            timeout: 10s
+            retries: 3
+            start_period: 40s
+
+    # RustFS volume permissions fixer service
+    rustfs-volume-permission-helper:
+        image: alpine
+        volumes:
+            - /disk6:/data1
+            - /disk7:/data2
+            - /disk8:/data3
+            - sds-gateway-prod-rustfs-logs:/logs
+        command: >
+            sh -c "
+              chown -R 10001:10001 /data1 /data2 /data3 /logs &&
+              echo 'Volume Permissions fixed' &&
+              exit 0
+            "
+        restart: "no"
 
     opensearch:
         # used for indexing and searching documents
@@ -185,7 +294,9 @@ services:
             test:
                 [
                     "CMD-SHELL",
-                    'curl -k -u "$OPENSEARCH_ADMIN_USER:$OPENSEARCH_INITIAL_ADMIN_PASSWORD" https://localhost:9200/_cluster/health || exit 1',
+                    "curl -k -u
+                      \"$OPENSEARCH_ADMIN_USER:$OPENSEARCH_INITIAL_ADMIN_PASSWO\
+                      RD\" https://localhost:9200/_cluster/health || exit 1",
                 ]
             interval: 5s
             timeout: 5s
@@ -206,7 +317,19 @@ services:
         env_file:
             - ./.envs/production/postgres.env
         networks:
-            - sds-gateway-prod-minio-net
+            - sds-gateway-prod-postgres-net
+            - sds-gateway-prod-opensearch-net
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\" -h
+                      localhost",
+                ]
+            interval: 10s
+            timeout: 5s
+            retries: 5
+            start_period: 10s
 
     redis:
         # used as caching layer for the gateway app
@@ -217,6 +340,12 @@ services:
             - sds-gateway-prod-redis-data:/data
         networks:
             - sds-network-prod
+        healthcheck:
+            test: [ "CMD", "redis-cli", "ping" ]
+            interval: 10s
+            timeout: 5s
+            retries: 5
+            start_period: 5s
 
     # ===================
     # Celery services for background tasks
@@ -254,19 +383,30 @@ services:
               type: volume
               read_only: false
         post_start:
-            - command: chown -R django:django /app/sds_gateway/media/ /opt/uv-cache/ /opt/uv-venv/
+            - command: chown -R django:django /app/sds_gateway/media/ /opt/uv-cache/
+                  /opt/uv-venv/
               user: root
         env_file:
             - ./.envs/production/django.env
-            - ./.envs/production/minio.env
-            - ./.envs/production/postgres.env
+            - ./.envs/production/storage.prod.env
             - ./.envs/production/opensearch.env
         command: "/worker-start"
         restart: unless-stopped
         networks:
             - sds-gateway-prod-opensearch-net
-            - sds-gateway-prod-minio-net
-            - sds-network-prod
+            - sds-gateway-prod-postgres-net
+            - sds-network-prod # also carries SeaweedFS S3 traffic
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    "uv run celery -A config.celery_app inspect ping -d
+                      \"celery@$$HOSTNAME\" | grep -q \"OK\"",
+                ]
+            interval: 30s
+            timeout: 30s
+            retries: 5
+            start_period: 30s
 
     celery-beat:
         # Celery Beat scheduler for periodic tasks
@@ -302,19 +442,34 @@ services:
               type: volume
               read_only: false
         post_start:
-            - command: chown -R django:django /app/sds_gateway/media/ /opt/uv-cache/ /opt/uv-venv/
+            - command: chown -R django:django /app/sds_gateway/media/ /opt/uv-cache/
+                  /opt/uv-venv/
               user: root
         env_file:
             - ./.envs/production/django.env
-            - ./.envs/production/minio.env
-            - ./.envs/production/postgres.env
+            - ./.envs/production/storage.prod.env
             - ./.envs/production/opensearch.env
         command: "/beat-start"
         restart: unless-stopped
         networks:
             - sds-gateway-prod-opensearch-net
-            - sds-gateway-prod-minio-net
-            - sds-network-prod
+            - sds-gateway-prod-postgres-net
+            - sds-network-prod # also carries SeaweedFS S3 traffic
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    "uv run python -c \"import pathlib,sys;
+                      ok=any((b\\\"beat\\\" in data) and ((b\\\"celery\\\" in
+                      data) or (b\\\"watchfiles\\\" in data)) for data in
+                      (path.read_bytes() for path in
+                      pathlib.Path(\\\"/proc\\\").glob(\\\"[0-9]*/cmdline\\\"))\
+                      ); sys.exit(0 if ok else 1)\"",
+                ]
+            interval: 30s
+            timeout: 30s
+            retries: 5
+            start_period: 30s
 
     celery-flower:
         # Celery monitoring and administration tool
@@ -345,18 +500,30 @@ services:
               type: volume
               read_only: false
         post_start:
-            - command: chown -R django:django /app/sds_gateway/media/ /opt/uv-cache/ /opt/uv-venv/
+            - command: chown -R django:django /app/sds_gateway/media/ /opt/uv-cache/
+                  /opt/uv-venv/
               user: root
         env_file:
             - ./.envs/production/django.env
-            - ./.envs/production/minio.env
-            - ./.envs/production/postgres.env
+            - ./.envs/production/storage.prod.env
             - ./.envs/production/opensearch.env
         command: "/flower-start"
         restart: unless-stopped
         ports:
             - "15555:5555" # Flower web interface
         networks:
-            - sds-gateway-prod-minio-net
             - sds-gateway-prod-opensearch-net
-            - sds-network-prod
+            - sds-gateway-prod-postgres-net
+            - sds-network-prod # also carries SeaweedFS S3 traffic
+        healthcheck:
+            test:
+                [
+                    "CMD-SHELL",
+                    "curl -f --header \"Authorization: Basic $(echo -n
+                      \"$$CELERY_FLOWER_USER:$$CELERY_FLOWER_PASSWORD\" |
+                      base64)\" http://localhost:5555/api/workers || exit 1",
+                ]
+            interval: 30s
+            timeout: 30s
+            retries: 5
+            start_period: 30s
diff --git a/gateway/compose/production/django/celery/worker-start b/gateway/compose/production/django/celery/worker-start
index 1caba3f8e..d2ab19bdc 100644
--- a/gateway/compose/production/django/celery/worker-start
+++ b/gateway/compose/production/django/celery/worker-start
@@ -4,4 +4,4 @@ set -o errexit
 set -o pipefail
 set -o nounset
 
-exec uv run celery -A config.celery_app worker -l INFO
+exec uv run celery -A config.celery_app worker -l INFO --concurrency "${CELERY_WORKER_CONCURRENCY:-4}"
diff --git a/gateway/compose/production/nginx/nginx-default.conf b/gateway/compose/production/nginx/nginx-default.conf
index 69fd8339a..d0503de26 100644
--- a/gateway/compose/production/nginx/nginx-default.conf
+++ b/gateway/compose/production/nginx/nginx-default.conf
@@ -1,22 +1,68 @@
-error_log  /var/log/nginx/error.log debug;
+error_log  /var/log/nginx/error.log warn;
 
 server {
-    # serving static files
-    # TLS is handled by Traefik
     listen       80;
     server_name  localhost;
+    server_tokens off;
 
-    # Set MIME types
     include /etc/nginx/mime.types;
     default_type application/octet-stream;
+    add_header X-Content-Type-Options nosniff always;
 
     location /static/ {
         alias /usr/share/nginx/static/;
+        autoindex off;
+
+        if ($request_method = OPTIONS) {
+            add_header Access-Control-Allow-Origin *;
+            add_header Access-Control-Allow-Methods 'GET, HEAD, OPTIONS';
+            add_header Access-Control-Allow-Headers 'Range';
+            add_header Access-Control-Max-Age 86400;
+            add_header Content-Length 0;
+            add_header Content-Type 'text/plain; charset=utf-8';
+            add_header X-Content-Type-Options nosniff always;
+            return 204;
+        }
+
+        limit_except GET HEAD {
+            deny all;
+        }
 
-        # Add CORS headers
         add_header Access-Control-Allow-Origin *;
-        add_header Access-Control-Allow-Methods 'GET, POST, OPTIONS';
-        add_header Access-Control-Allow-Headers 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range';
+        add_header Access-Control-Allow-Methods 'GET, HEAD, OPTIONS';
+        add_header Access-Control-Allow-Headers 'Range';
+        add_header X-Content-Type-Options nosniff always;
+
+        location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
+            if ($request_method = OPTIONS) {
+                add_header Access-Control-Allow-Origin *;
+                add_header Access-Control-Allow-Methods 'GET, HEAD, OPTIONS';
+                add_header Access-Control-Allow-Headers 'Range';
+                add_header Access-Control-Max-Age 86400;
+                add_header Content-Length 0;
+                add_header Content-Type 'text/plain; charset=utf-8';
+                add_header X-Content-Type-Options nosniff always;
+                return 204;
+            }
+
+            limit_except GET HEAD {
+                deny all;
+            }
+
+            expires 1d;
+            add_header Cache-Control "public, immutable";
+            add_header Access-Control-Allow-Origin *;
+            add_header Access-Control-Allow-Methods 'GET, HEAD, OPTIONS';
+            add_header Access-Control-Allow-Headers 'Range';
+            add_header X-Content-Type-Options nosniff always;
+        }
+    }
+
+    location = /healthz {
+        access_log off;
+        add_header Content-Type 'text/plain; charset=utf-8';
+        add_header X-Content-Type-Options nosniff always;
+        return 200 'OK';
     }
 
 }
diff --git a/gateway/config/settings/base.py b/gateway/config/settings/base.py
index f95d18945..9b0fa4508 100644
--- a/gateway/config/settings/base.py
+++ b/gateway/config/settings/base.py
@@ -5,6 +5,7 @@
 import string
 from pathlib import Path
 from typing import Any
+from urllib.parse import urlparse
 
 from celery.schedules import crontab
 from environs import env
@@ -48,25 +49,123 @@ def __get_random_token(length: int) -> str:
 OPENSEARCH_VERIFY_CERTS: bool = env.bool("OPENSEARCH_VERIFY_CERTS", default=False)
 OPENSEARCH_CA_CERTS: str | None = env.str("OPENSEARCH_CA_CERTS", default=None)
 
-# MinIO configuration
+# S3-compatible object storage (MinIO + SeaweedFS)
+
+
+def _build_endpoint_url(endpoint: str, *, secure: bool) -> str:
+    """Build endpoint URL with scheme if endpoint does not include one."""
+    parsed_endpoint = urlparse(endpoint)
+    if parsed_endpoint.scheme:
+        return endpoint
+
+    protocol = "https" if secure else "http"
+    return f"{protocol}://{endpoint}"
+
+
+def _strip_endpoint_scheme(endpoint_url: str) -> str:
+    """Strip scheme from endpoint URL for MinIO client compatibility."""
+    parsed_endpoint = urlparse(endpoint_url)
+    if parsed_endpoint.netloc:
+        return parsed_endpoint.netloc
+    return endpoint_url
+
+
 STORAGES = {
     "default": {
-        "BACKEND": "storages.backends.s3boto3.S3Boto3Storage",
+        "BACKEND": (
+            "sds_gateway.api_methods.utils."
+            "dual_object_store_storage.DualObjectStoreS3Storage"
+        ),
     },
     "staticfiles": {
         "BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage",
     },
 }
-MINIO_ENDPOINT_URL = env.str("MINIO_ENDPOINT_URL", default="minio:9000")
-MINIO_STORAGE_USE_HTTPS = env.bool("MINIO_STORAGE_USE_HTTPS", default=False)
-
-AWS_ACCESS_KEY_ID: str = env.str("AWS_ACCESS_KEY_ID", default="minioadmin")
-AWS_SECRET_ACCESS_KEY: str = env.str("AWS_SECRET_ACCESS_KEY", default="miniopassword")
-AWS_STORAGE_BUCKET_NAME: str = env.str("AWS_STORAGE_BUCKET_NAME", default="spectrumx")
-AWS_S3_ENDPOINT_URL: str = env.str(
+# env var names kept for backward compatibility with existing deployments
+LEGACY_AWS_ACCESS_KEY_ID: str = env.str("AWS_ACCESS_KEY_ID", default="admin")
+LEGACY_AWS_SECRET_ACCESS_KEY: str = env.str("AWS_SECRET_ACCESS_KEY", default="admin")
+LEGACY_AWS_STORAGE_BUCKET_NAME: str = env.str(
+    "AWS_STORAGE_BUCKET_NAME", default="spectrumx"
+)
+LEGACY_AWS_S3_ENDPOINT_URL: str = env.str(
     "AWS_S3_ENDPOINT_URL",
-    default="http://minio:9000",
+    default="http://sds-gateway-local-sfs-s3:8333",
+)
+
+# Primary (SeaweedFS)
+PRIMARY_ACCESS_KEY_ID: str = env.str(
+    "PRIMARY_ACCESS_KEY_ID",
+    default=LEGACY_AWS_ACCESS_KEY_ID,
+)
+PRIMARY_SECRET_ACCESS_KEY: str = env.str(
+    "PRIMARY_SECRET_ACCESS_KEY",
+    default=LEGACY_AWS_SECRET_ACCESS_KEY,
+)
+PRIMARY_STORAGE_BUCKET_NAME: str = env.str(
+    "PRIMARY_STORAGE_BUCKET_NAME",
+    default=LEGACY_AWS_STORAGE_BUCKET_NAME,
+)
+PRIMARY_S3_ENDPOINT_URL: str = env.str(
+    "PRIMARY_S3_ENDPOINT_URL",
+    default=LEGACY_AWS_S3_ENDPOINT_URL,
+)
+PRIMARY_STORAGE_USE_HTTPS: bool = env.bool(
+    "PRIMARY_STORAGE_USE_HTTPS",
+    default=PRIMARY_S3_ENDPOINT_URL.startswith("https://"),
+)
+PRIMARY_ENDPOINT_URL: str = env.str(
+    "PRIMARY_ENDPOINT_URL",
+    default=_strip_endpoint_scheme(PRIMARY_S3_ENDPOINT_URL),
 )
+
+# Secondary (minio/rustfs)
+SECONDARY_STORAGE_USE_HTTPS: bool = env.bool(
+    "SECONDARY_STORAGE_USE_HTTPS", default=False
+)
+SECONDARY_ENDPOINT_URL: str = env.str(
+    "SECONDARY_ENDPOINT_URL",
+    default="sds-gateway-local-sfs-s3:8333",
+)
+SECONDARY_S3_ENDPOINT_URL: str = env.str(
+    "SECONDARY_S3_ENDPOINT_URL",
+    default=_build_endpoint_url(
+        SECONDARY_ENDPOINT_URL,
+        secure=SECONDARY_STORAGE_USE_HTTPS,
+    ),
+)
+SECONDARY_ACCESS_KEY_ID: str = env.str(
+    "SECONDARY_ACCESS_KEY_ID",
+    default=LEGACY_AWS_ACCESS_KEY_ID,
+)
+SECONDARY_SECRET_ACCESS_KEY: str = env.str(
+    "SECONDARY_SECRET_ACCESS_KEY",
+    default=LEGACY_AWS_SECRET_ACCESS_KEY,
+)
+SECONDARY_STORAGE_BUCKET_NAME: str = env.str(
+    "SECONDARY_STORAGE_BUCKET_NAME",
+    default=LEGACY_AWS_STORAGE_BUCKET_NAME,
+)
+
+# transition controls
+OBJECT_STORE_WRITE_BOTH_ENABLED: bool = env.bool(
+    "OBJECT_STORE_WRITE_BOTH_ENABLED",
+    default=False,
+)
+OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED: bool = env.bool(
+    "OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED",
+    default=False,
+)
+OBJECT_STORE_DUAL_WRITE_STRICT: bool = env.bool(
+    "OBJECT_STORE_DUAL_WRITE_STRICT",
+    default=False,
+)
+
+# keep AWS_* aliases mapped to primary store for backward compatibility
+# django-storages expects these values
+AWS_S3_ACCESS_KEY_ID: str = PRIMARY_ACCESS_KEY_ID
+AWS_S3_SECRET_ACCESS_KEY: str = PRIMARY_SECRET_ACCESS_KEY
+AWS_STORAGE_BUCKET_NAME: str = PRIMARY_STORAGE_BUCKET_NAME
+AWS_S3_ENDPOINT_URL: str = PRIMARY_S3_ENDPOINT_URL
 AWS_S3_REGION_NAME: str = "us-east-1"
 AWS_S3_SIGNATURE_VERSION: str = "s3v4"
 AWS_S3_FILE_OVERWRITE: bool = False
diff --git a/gateway/config/settings/local.py b/gateway/config/settings/local.py
index 78458464c..876afee07 100644
--- a/gateway/config/settings/local.py
+++ b/gateway/config/settings/local.py
@@ -116,6 +116,8 @@
 
 # CELERY
 # ------------------------------------------------------------------------------
+# Worker concurrency; override with env var CELERY_WORKER_CONCURRENCY
+CELERY_WORKER_CONCURRENCY: int = env.int("CELERY_WORKER_CONCURRENCY", default=1)
 
 # https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-eager-propagates
 # CELERY_TASK_EAGER_PROPAGATES: bool = True  # noqa: ERA001
diff --git a/gateway/config/settings/production.py b/gateway/config/settings/production.py
index d5303363e..b3da8967d 100644
--- a/gateway/config/settings/production.py
+++ b/gateway/config/settings/production.py
@@ -1,6 +1,8 @@
 """⚠️ Setting overrides for PRODUCTION ⚠️"""
 # ruff: noqa: F405, ERA001
 
+import os
+
 import sentry_sdk
 from django.utils.log import DEFAULT_LOGGING
 from loguru import logger as log
@@ -199,6 +201,14 @@
         send_default_pii=False,
     )
 
+# CELERY
+# ------------------------------------------------------------------------------
+# Worker concurrency: override with env CELERY_WORKER_CONCURRENCY.
+_nproc = os.cpu_count() or 1
+CELERY_WORKER_CONCURRENCY: int = env.int(
+    "CELERY_WORKER_CONCURRENCY", default=min(8, _nproc)
+)
+
 # DJANGO-REST-FRAMEWORK
 # -------------------------------------------------------------------------------
 # Tools that generate code samples can use SERVERS to point to the correct domain
diff --git a/gateway/docs/detailed-deploy.md b/gateway/docs/detailed-deploy.md
index 8f0026097..8a44a2843 100644
--- a/gateway/docs/detailed-deploy.md
+++ b/gateway/docs/detailed-deploy.md
@@ -103,8 +103,8 @@ Then proceed to the [first deployment steps](#first-deployment-automated) below.
     # manually set the secrets in .envs/local/*.env files
     ```
 
-    > [!NOTE]
-    > In `minio.env`, set `AWS_SECRET_ACCESS_KEY == MINIO_ROOT_PASSWORD`;
+> [!NOTE]
+> In `storage.env`, set `AWS_SECRET_ACCESS_KEY == SECONDARY_ROOT_PASSWORD`;
     >
     > In `django.env`, to generate the `API_KEY` get it running first, then navigate to
     > [localhost:8000/users/generate-api-key](http://localhost:8000/users/generate-api-key).
@@ -166,10 +166,10 @@ differ.
 
     This also tests the connection between the application and the OpenSearch instance.
 
-3. Create the MinIO bucket:
+3. Create the storage bucket:
 
     Go to [localhost:9001](http://localhost:9001) (or `localhost:19001` in production)
-    and create a bucket named `spectrumx` with the credentials set in `minio.env`.
+    and create a bucket named `spectrumx` with the credentials set in `storage.env`.
     Optionally apply a storage quota to this bucket (you can modify it later if needed).
 
 ## First deployment: not automated
@@ -267,8 +267,8 @@ rsync -aP ./.envs/example/ ./.envs/production
     echo $(head /dev/urandom | tr -dc 'a-zA-Z0-9' | head -c 40)
     ```
 
-+ In `minio.env`, **`AWS_SECRET_ACCESS_KEY` must be equal to
-  `MINIO_ROOT_PASSWORD`**;
++ In `storage.env`, **`AWS_SECRET_ACCESS_KEY` must be equal to
+  `SECONDARY_ROOT_PASSWORD`**;
 + In `django.env`, the **`DJANGO_ADMIN_URL` must end with a slash `/`**.
 + In `django.env`, to generate the `API_KEY` get it running first, then navigate to
   [localhost:18000/users/generate-api-key-form](http://localhost:18000/users/generate-api-key-form/)
@@ -380,37 +380,62 @@ production hosts.
     Open the web interface at [localhost:18000](http://localhost:18000). You can create
     regular users by signing up there.
 
-    You can sign in with the superuser credentials at `localhost:18000/<admin path set
-    in django.env>` to access the admin interface.
+    You can sign in with the superuser credentials at
+    `localhost:18000/<admin_path_set_in_django.env>`
+    to access the admin interface.
 
-4. MinIO setup:
+4. RustFS setup:
 
-    This is a multi-drive, single-node setup of MinIO. For a distributed setup
-    (multi-node), see the [MinIO
-    documentation](https://min.io/docs/minio/linux/operations/install-deploy-manage/deploy-minio-multi-node-multi-drive.html#deploy-minio-distributed).
+    > [!NOTE]
+    > As of May 2026, RustFS is used as a secondary storage for production deployments
+    > of SDS, and the primary is SeaweedFS. MinIO was replaced by a combination of
+    > SeaweedFS (primary) and RustFS (secondary) after project maintainers abandoned the
+    > open source community version of MinIO. For more details, see the [MinIO to
+    > SeaweedFS migration documentation](./migration-minio-to-seaweedfs.md).
+
+    The instructions below are for setting up the RustFS instance if you choose to use
+    it, and instructions are very similar to the pre-existing ones for MinIO. This is a
+    multi-drive, single-node setup of RustFS. For other kinds of deployment, check their
+    documentation.
+
+    The `mc` commands below refer to the MinIO CLI client, which can be used with RustFS
+    endpoints. Unfortunately it also seems unmaintained, so you may want to use a
+    community fork or the RustFS CLI instead:
+
+    + Official `mc` repo: <https://github.com/minio/mc>
+    + Pigsty community fork of `mc`: <https://github.com/pgsty/mc> (most starred fork)
+        + Docker Hub mirror <https://hub.docker.com/r/pgsty/mc>
+    + RustFS CLI (alpha): <https://github.com/rustfs/cli>
+        + Most `mc` commands can be replaced with `rc`, as they are, but the API is not
+          exactly a drop-in replacement.
 
     >[!NOTE]
     >
-    > We're using `local` in the example commands below as our MinIO alias. Change it
-    > accordingly if you're using a different alias in your MinIO configuration.
+    > We're using `prod-secondary-rustfs` in the example commands below as our mc alias.
+    > Change it accordingly if you're using a different alias in your config.
+    > To see all aliases, run `mc alias list`.
 
     1. Establish the connection alias:
 
         ```bash
-        just dc exec minio mc alias set local http://127.0.0.1:9000 minioadmin
-        # paste your MinIO credentials from .envs/production/minio.env;
-        # change `minioadmin` above to match that file, if needed.
+        mc alias set prod-secondary-rustfs http://127.0.0.1:9000 rustfsadmin
+        # paste your storage credentials from .envs/production/storage.env;
+        # change `rustfsadmin` above to match that file, if needed.
 
         # in prod, that is equivalent to:
-        # docker exec -it sds-gateway-prod-minio mc alias set local http://127.0.0.1:9000 minioadmin
+        # docker exec -it sds-gateway-prod-secondary-rustfs mc alias set prod-secondary-rustfs http://127.0.0.1:9000 rustfsadmin
         ```
 
-        Optionally, set up a local `mc` client if you're managing the cluster remotely:
+        Optionally, set up a `prod-secondary-rustfs` `mc` client if you're managing the
+        cluster remotely:
 
         ```bash
-        mc alias set local http://<minio_host>:19000 <minio_user> <minio_password>
+        mc alias set prod-secondary-rustfs http://localhost:19000 rustfsadmin <password>
         ```
 
+        When running from another docker container, you can use the container name in
+        the stack instead of `localhost`.
+
     2. Set admin settings:
 
         + [MinIO reference
@@ -419,7 +444,7 @@ production hosts.
         ```bash
         # enable object compression for all objects, except the ones excluded by default
         # NOTE: compression is not recommended by MinIO when also using encryption.
-        mc admin config set local compression enable=on extensions= mime_types=
+        mc admin config set prod-secondary-rustfs compression enable=on extensions= mime_types=
 
         # https://min.io/docs/minio/container/administration/object-management/data-compression.html#id6
 
@@ -432,36 +457,41 @@ production hosts.
         # References:
         # https://min.io/docs/minio/linux/reference/minio-server/settings/storage-class.html#mc-conf.storage_class.standard
         # https://min.io/product/erasure-code-calculator
-        mc admin config set local storage_class standard=EC:2
-        mc admin config set local storage_class rrs=EC:1
+        mc admin config set prod-secondary-rustfs storage_class standard=EC:2
+        mc admin config set prod-secondary-rustfs storage_class rrs=EC:1
 
         ```
 
-    3. Create the MinIO bucket:
+    3. Create the bucket:
 
         ```bash
-        mc mb local/spectrumx
+        mc mb --ignore-existing "prod-secondary-rustfs/spectrumx"
         ```
 
     4. (Optional) Diagnostic checks:
 
+        > [!TIP]
+        > If using `rc`, check their documentation. They have additional commands like:
+        > `rc admin info disk prod-secondary-rustfs` and
+        > `rc admin info cluster prod-secondary-rustfs`
+
         Check the output of these commands to make sure everything is as expected:
 
         ```bash
-        mc admin info local
-        mc admin config get local
+        mc admin info prod-secondary-rustfs
+        mc admin config get prod-secondary-rustfs
 
         # --- cluster health
 
         # liveness check
         curl -I "http://localhost:19000/minio/health/live"
-        # A response code of 200 OK indicates the MinIO server is online and functional.
+        # A response code of 200 OK indicates the server is online and functional.
         # Any other HTTP codes indicate an issue with reaching the server, such as a
         # transient network issue or potential downtime.
 
         # write quorum check
         curl -I "http://localhost:19000/minio/health/cluster"
-        # a response code of 200 OK indicates that the MinIO cluster has sufficient MinIO
+        # a response code of 200 OK indicates that the cluster has sufficient MinIO
         # servers online to meet write quorum. A response code of 503 Service Unavailable
         # indicates the cluster does not currently have write quorum.
 
diff --git a/gateway/docs/github-actions-ephemeral-env.md b/gateway/docs/github-actions-ephemeral-env.md
index c930b646c..533783cd2 100644
--- a/gateway/docs/github-actions-ephemeral-env.md
+++ b/gateway/docs/github-actions-ephemeral-env.md
@@ -101,8 +101,8 @@ The CI environment uses safe, deterministic values:
 | Service       | Variable                            | Value                           |
 | ------------- | ----------------------------------- | ------------------------------- |
 | Postgres      | `POSTGRES_PASSWORD`                 | `ci-postgres-pass`              |
-| MinIO         | `MINIO_ROOT_PASSWORD`               | `ci-minio-secret`               |
-| MinIO         | `AWS_SECRET_ACCESS_KEY`             | `ci-minio-secret`               |
+| Secondary | `SECONDARY_ROOT_PASSWORD`               | `ci-minio-secret`               |
+| Secondary | `AWS_SECRET_ACCESS_KEY`                 | `ci-minio-secret`               |
 | OpenSearch    | `OPENSEARCH_INITIAL_ADMIN_PASSWORD` | `CiAdmin123!`                   |
 | OpenSearch    | `OPENSEARCH_PASSWORD`               | `CiDjango123!`                  |
 | Celery Flower | `CELERY_FLOWER_PASSWORD`            | `ci-flower-pass`                |
@@ -180,7 +180,7 @@ Check that all env files were generated:
 
 ```bash
 ls -la .envs/ci/
-# Should show: django.env, minio.env, opensearch.env, postgres.env
+# Should show: django.env, storage.env, opensearch.env, postgres.env
 ```
 
 ### Secrets not populated
diff --git a/gateway/docs/migration-minio-to-seaweedfs.md b/gateway/docs/migration-minio-to-seaweedfs.md
index 2f03b3827..ce20e4f74 100644
--- a/gateway/docs/migration-minio-to-seaweedfs.md
+++ b/gateway/docs/migration-minio-to-seaweedfs.md
@@ -7,6 +7,7 @@ SeaweedFS setup is fully automated. This document covers data migration from a r
 MinIO instance and production-specific configuration.
 
 + [Migration: MinIO → SeaweedFS](#migration-minio--seaweedfs)
+    + [Diagram](#diagram)
     + [Prerequisites](#prerequisites)
     + [1. Start both stacks](#1-start-both-stacks)
     + [2. Configure `mc` aliases](#2-configure-mc-aliases)
@@ -22,6 +23,56 @@ MinIO instance and production-specific configuration.
 
 ---
 
+## Diagram
+
+```mermaid
+timeline
+    title CRC SDS storage backend migration (2026)
+    March Week 2    : ✅ Run a standalone prototype for SeaweedFS
+                    : ✅ Initial SFS configuration
+    April Week 2    : ✅ Draft the data migration plan
+    April Week 3    : ✅ Automate deployment (local/ci/production)
+                    : ✅ Integrate SFS as an additional storage backend
+                    : ✅ Create backup deployment of MinIO on NFS for the transition period
+    April Week 4    : ✅ Verify backup integrity
+                    : ⬜ Unmount 3 (/8) MinIO drives (entering RO mode); rsync data in them to separate location
+                    : ⬜ Deploy a new MinIO instance on those 3 drives with `EC:1`
+                    : ⬜ Mirror data from RO MinIO to the new instance
+                    : ⬜ Check data integrity of new instance
+                    : ⬜ Switch production to use the new instance (leaving RO mode)
+    April Week 5    : ⬜ Stop older MinIO instance; wipe drives
+                    : ⬜ Repurpose drives for SeaweedFS
+                    : ⬜ Mirror existing production data to SeaweedFS
+                    : ⬜ Switch production primary to SeaweedFS, leave MinIO as secondary; monitor stability
+    May Week 1      : ⬜ Remove `prod-backup`; finalize migration; keep monitoring
+```
+
++ March Week 2
+    + [x] Run a standalone prototype for SeaweedFS
+    + [x] Initial SFS configuration
++ April Week 2
+    + [x] Draft the data migration plan
++ April Week 3
+    + [x] Automate deployment (local/ci/production)
+    + [x] Integrate SFS as an additional storage backend
+    + [x] Create backup deployment of MinIO on NFS for the transition period
++ April Week 4
+    + [x] Verify backup integrity
+    + [ ] Unmount 3 (/8) MinIO drives (entering RO mode); rsync data in them to separate location
+    + [ ] Deploy a new MinIO instance on those 3 drives with `EC:1`
+    + [ ] Mirror data from RO MinIO to the new instance
+    + [ ] Check data integrity of new instance
+    + [ ] Switch production to use the new instance (leaving RO mode)
++ April Week 5
+    + [ ] Stop older MinIO instance; wipe drives
+    + [ ] Repurpose drives for SeaweedFS
+    + [ ] Mirror existing production data to SeaweedFS
+    + [ ] Switch production primary to SeaweedFS, leave MinIO as secondary; monitor stability
++ May Week 1
+    + [ ] Remove `prod-backup`; finalize migration; keep monitoring
+
+---
+
 ## Prerequisites
 
 | Tool                    | Purpose                      |
@@ -59,13 +110,13 @@ curl -s http://localhost:8333/healthz   # SFS S3 endpoint: expected empty 200
 
 ```bash
 # read credentials from env files
-MINIO_USER=$(grep MINIO_ROOT_USER .envs/local/minio.env | cut -d= -f2)
-MINIO_PASS=$(grep MINIO_ROOT_PASSWORD .envs/local/minio.env | cut -d= -f2)
-SFS_KEY=$(grep AWS_ACCESS_KEY_ID .envs/local/sfs.env | cut -d= -f2)
-SFS_SECRET=$(grep AWS_SECRET_ACCESS_KEY .envs/local/sfs.env | cut -d= -f2)
+SECONDARY_USER=$(grep SECONDARY_ROOT_USER .envs/local/storage.env | cut -d= -f2)
+SECONDARY_PASS=$(grep SECONDARY_ROOT_PASSWORD .envs/local/storage.env | cut -d= -f2)
+PRIMARY_KEY=$(grep PRIMARY_ACCESS_KEY_ID .envs/local/storage.env | cut -d= -f2)
+PRIMARY_SECRET=$(grep PRIMARY_SECRET_ACCESS_KEY .envs/local/storage.env | cut -d= -f2)
 
-mc alias set minio http://localhost:9000 "${MINIO_USER}" "${MINIO_PASS}"
-mc alias set sfs   http://localhost:8333 "${SFS_KEY}"    "${SFS_SECRET}"
+mc alias set minio http://localhost:9000 "${SECONDARY_USER}" "${SECONDARY_PASS}"
+mc alias set sfs   http://localhost:8333 "${PRIMARY_KEY}"    "${PRIMARY_SECRET}"
 ```
 
 Verify:
@@ -103,7 +154,7 @@ mc diff minio/spectrumx sfs/spectrumx
 
 ## 5. Switch the application to SFS
 
-The compose files already reference `sfs.env` instead of `minio.env`. Restart the
+The compose files already reference `storage.env` for both backends. Restart the
 gateway to confirm:
 
 ```bash
@@ -118,7 +169,7 @@ curl -s http://localhost:8000/api/v1/files/ | head
 Once migration is verified:
 
 1. Stop MinIO: `just dc stop minio`
-2. Remove `minio.env` entries from `env_file` lists in the compose file (lines marked `# legacy`).
+2. Remove `storage.env` entries from `env_file` lists in the compose file (lines marked `# legacy`).
 3. Remove the `minio:` service block.
 4. Remove the `sds-gateway-<env>-minio-net` network and `sds-gateway-<env>-minio-files` volume.
 5. Restart: `just down && just up`
@@ -163,12 +214,15 @@ Generate production credentials and keep both files in sync:
 ACCESS_KEY=$(openssl rand -hex 16)
 SECRET_KEY=$(openssl rand -base64 32 | tr -d '=+/')
 
-sed -i "s/^AWS_ACCESS_KEY_ID=.*/AWS_ACCESS_KEY_ID=${ACCESS_KEY}/" \
-    gateway/.envs/production/sfs.env \
+ACCESS_KEY=$(grep PRIMARY_ACCESS_KEY_ID .envs/local/storage.env | cut -d= -f2)
+SECRET_KEY=$(grep PRIMARY_SECRET_ACCESS_KEY .envs/local/storage.env | cut -d= -f2)
+
+sed -i "s/^PRIMARY_ACCESS_KEY_ID=.*/PRIMARY_ACCESS_KEY_ID=${ACCESS_KEY}/" \
+    gateway/.envs/production/storage.env \
     seaweedfs/.envs/production/sfs.env
 
-sed -i "s/^AWS_SECRET_ACCESS_KEY=.*/AWS_SECRET_ACCESS_KEY=${SECRET_KEY}/" \
-    gateway/.envs/production/sfs.env \
+sed -i "s/^PRIMARY_SECRET_ACCESS_KEY=.*/PRIMARY_SECRET_ACCESS_KEY=${SECRET_KEY}/" \
+    gateway/.envs/production/storage.env \
     seaweedfs/.envs/production/sfs.env
 ```
 
@@ -177,7 +231,7 @@ sed -i "s/^AWS_SECRET_ACCESS_KEY=.*/AWS_SECRET_ACCESS_KEY=${SECRET_KEY}/" \
 1. Add the server hostname to `seaweedfs/scripts/prod-hostnames.env` and
    `gateway/scripts/prod-hostnames.env` — deploy scripts validate this.
 
-2. Confirm `seaweedfs/.envs/production/sfs.env` and `gateway/.envs/production/sfs.env`
+2. Confirm `seaweedfs/.envs/production/sfs.env` and `gateway/.envs/production/storage.env`
    have matching non-empty credentials.
 
 3. The `sds-network-prod` Docker network must exist (the deploy script creates it
@@ -193,5 +247,5 @@ sed -i "s/^AWS_SECRET_ACCESS_KEY=.*/AWS_SECRET_ACCESS_KEY=${SECRET_KEY}/" \
 
 ## Rollback
 
-Replace `sfs.env` with `minio.env` in the `env_file` lists of the compose file, then
-restart the gateway. MinIO data is untouched until its volume is explicitly deleted.
+Replace `storage.prod.env` with `storage.env` in the `env_file` lists of the compose file, then
+restart the gateway.
diff --git a/gateway/justfile b/gateway/justfile
index 7def1e43a..6569c19ca 100644
--- a/gateway/justfile
+++ b/gateway/justfile
@@ -14,7 +14,7 @@ app_container := shell(env_selection_script + ' $1', "app_container")
 compose_file := shell(env_selection_script + ' $1', "compose_file")
 env := shell(env_selection_script + ' $1', "env")
 env_file := shell(env_selection_script + ' $1', "env_file")
-docker_compose := "COMPOSE_FILE=" + compose_file + " docker compose --env-file " + env_file
+docker_compose := "COMPOSE_FILE=" + compose_file + " docker compose --env-file " + env_file + " --env-file ./.envs/" + env + "/storage.env"
 gwy_root := justfile_directory()
 git_root := gwy_root + "/.."
 uv_cmd := docker_compose + " run '" + app_container + "' uv"
@@ -146,7 +146,6 @@ dev-setup:
 [group('utilities')]
 env:
     #!/usr/bin/env bash
-    set -euo pipefail
     echo -e "\nSelected env:\n"
     echo -e "\tEnvironment: \e[34m             '{{ env }}'\e[0m"
     echo -e "\tEnvironment file: \e[34m        '{{ env_file }}'\e[0m"
diff --git a/gateway/scripts/deploy.sh b/gateway/scripts/deploy.sh
index dfe3adb83..31dcbbb8c 100755
--- a/gateway/scripts/deploy.sh
+++ b/gateway/scripts/deploy.sh
@@ -8,473 +8,530 @@
 #   SDS_FORCE_SECRETS  - Set to 'true' to overwrite existing secrets (default: false)
 #   SDS_SKIP_SECRETS   - Set to 'true' to skip secret generation (default: false)
 #   SDS_SKIP_NETWORK   - Set to 'true' to skip network creation (default: false)
+#   SDS_SKIP_SFS       - Set to 'true' to skip SeaweedFS stack deployment (default: false)
 #   SDS_DETACH         - Set to 'true' to run in detached mode (default: true for prod)
 #
 # USAGE EXAMPLES:
 #   ./deploy.sh [OPTIONS] <local|production|ci>
 #   SDS_SKIP_SECRETS=true ./deploy.sh local
 #   SDS_FORCE_SECRETS=true SDS_DETACH=false ./deploy.sh production
+#   SDS_SKIP_SFS=true ./deploy.sh local
 
 set -euo pipefail
 
 SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 PROJECT_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
+SFS_ROOT=$(cd "${PROJECT_ROOT}/../seaweedfs" 2>/dev/null && pwd) || SFS_ROOT=""
 
 # shellcheck disable=SC1091
 source "${SCRIPT_DIR}/common.sh"
 
 function show_usage() {
-    echo -e "Usage: ${0} [OPTIONS] <local|production|ci>"
-    echo ""
-    echo "Deploy the SDS Gateway environment following README instructions."
-    echo ""
-    echo -e "\e[34mThis is a high level script that automates:\e[0m"
-    echo "  1. Secret generation"
-    echo "  2. Docker network creation"
-    echo "  3. Service deployment"
-    echo "  4. Database migrations"
-    echo "  5. Superuser creation (interactive)"
-    echo "  6. MinIO bucket creation"
-    echo ""
-    echo -e "\e[34mOPTIONS:\e[0m"
-    echo "    -f, --force         Overwrite existing env files when generating secrets"
-    echo "    -s, --skip-secrets  Skip secret generation (use existing secrets)"
-    echo "    -n, --skip-network  Skip network creation"
-    echo "    -d, --detach        Run services in detached mode (default for prod)"
-    echo "    -h, --help          Show this help message"
-    echo ""
-    echo -e "\e[34mARGUMENTS:\e[0m"
-    echo "    <local|production|ci>   Target environment to deploy"
-    echo ""
-    echo -e "\e[34mENVIRONMENT VARIABLES:\e[0m"
-    echo "    SDS_FORCE_SECRETS   Overwrite existing secrets (true/false, default: false)"
-    echo "    SDS_SKIP_SECRETS    Skip secret generation (true/false, default: false)"
-    echo "    SDS_SKIP_NETWORK    Skip network creation (true/false, default: false)"
-    echo "    SDS_DETACH          Run in detached mode (true/false, default: true for prod)"
-    echo ""
-    echo "    Note: Command-line options take precedence over environment variables."
-    echo ""
-    echo -e "\e[34mEXAMPLES:\e[0m"
-    echo "    ${0} local                            # Quick local deploy"
-    echo "    ${0} --force production               # Production deploy, regenerate secrets"
-    echo "    ${0} --skip-secrets ci                # CI deploy using existing secrets"
-    echo "    SDS_SKIP_SECRETS=true ${0} local      # Use env var to skip secrets"
-    echo "    SDS_DETACH=false ${0} production      # Production in foreground mode"
-    echo ""
-    echo -e "\e[34mNOTES:\e[0m"
-    echo "    - For production, ensure prod-hostnames.env is configured first"
-    echo "    - Superuser creation is interactive by default"
-    echo "    - MinIO bucket must be created manually via web UI (localhost:9001 or 19001)"
-    echo "    - Use 'just redeploy' for quick rebuilds after initial deploy"
-    exit 0
+	echo -e "Usage: ${0} [OPTIONS] <local|production|ci>"
+	echo ""
+	echo "Deploy the SDS Gateway environment following README instructions."
+	echo ""
+	echo -e "\e[34mThis is a high level script that automates:\e[0m"
+	echo "  1. Secret generation"
+	echo "  2. Docker network creation"
+	echo "  3. SeaweedFS stack deployment (start + configure credentials + create bucket)"
+	echo "  4. Gateway service deployment"
+	echo "  5. Database migrations"
+	echo "  6. Superuser creation (interactive)"
+	echo ""
+	echo -e "\e[34mOPTIONS:\e[0m"
+	echo "    -f, --force         Overwrite existing env files when generating secrets"
+	echo "    -s, --skip-secrets  Skip secret generation (use existing secrets)"
+	echo "    -n, --skip-network  Skip network creation"
+	echo "    --skip-sfs          Skip SeaweedFS stack deployment"
+	echo "    -d, --detach        Run services in detached mode (default for prod)"
+	echo "    -h, --help          Show this help message"
+	echo ""
+	echo -e "\e[34mARGUMENTS:\e[0m"
+	echo "    <local|production|ci>   Target environment to deploy"
+	echo ""
+	echo -e "\e[34mENVIRONMENT VARIABLES:\e[0m"
+	echo "    SDS_FORCE_SECRETS   Overwrite existing secrets (true/false, default: false)"
+	echo "    SDS_SKIP_SECRETS    Skip secret generation (true/false, default: false)"
+	echo "    SDS_SKIP_NETWORK    Skip network creation (true/false, default: false)"
+	echo "    SDS_SKIP_SFS        Skip SeaweedFS deployment (true/false, default: false)"
+	echo "    SDS_DETACH          Run in detached mode (true/false, default: true for prod)"
+	echo ""
+	echo "    Note: Command-line options take precedence over environment variables."
+	echo ""
+	echo -e "\e[34mEXAMPLES:\e[0m"
+	echo "    ${0} local                            # Quick local deploy"
+	echo "    ${0} --force production               # Production deploy, regenerate secrets"
+	echo "    ${0} --skip-secrets ci                # CI deploy using existing secrets"
+	echo "    SDS_SKIP_SECRETS=true ${0} local      # Use env var to skip secrets"
+	echo "    SDS_DETACH=false ${0} production      # Production in foreground mode"
+	echo ""
+	echo -e "\e[34mNOTES:\e[0m"
+	echo "    - For production, ensure prod-hostnames.env is configured first"
+	echo "    - Superuser creation is interactive by default"
+	echo "    - S3 credentials are read from PRIMARY_* vars in .envs/<env>/storage.env"
+	echo "      and configured automatically via SeaweedFS weed shell"
+	echo "    - Use 'just redeploy' for quick rebuilds after initial deploy"
+	exit 0
 }
 
 function setup_prod_hostnames() {
-    local script_dir="$1"
-    local env_type="$2"
-    local example_file="${script_dir}/prod-hostnames.example.env"
-    local target_file="${script_dir}/prod-hostnames.env"
-
-    if [[ -f "${example_file}" && ! -f "${target_file}" ]]; then
-        log_msg "Creating prod-hostnames.env from example..."
-        cp "${example_file}" "${target_file}"
-        log_success "Created: ${target_file}"
-
-        if [[ "${env_type}" == "production" ]]; then
-            local current_hostname
-            current_hostname=$(hostname)
-            if [[ -n "${current_hostname}" ]]; then
-                echo "${current_hostname}" >> "${target_file}"
-                log_success "Appended hostname to ${target_file}: ${current_hostname}"
-            else
-                log_warning "Could not determine current hostname; skipping append"
-            fi
-        fi
-    fi
-
-    # if we're running a production deploy, check the hostname is
-    # listed in the file first, otherwise abort the deployment
-    if [[ "${env_type}" == "production" && -f "${target_file}" ]]; then
-        local current_hostname
-        local target_file_cur_dir
-        current_hostname=$(hostname)
-        target_file_cur_dir=$(realpath --relative-to="." "${target_file}")
-        if [[ -n "${current_hostname}" ]]; then
-            if ! grep -Fxq "${current_hostname}" "${target_file}"; then
-                log_error "Current hostname '${current_hostname}' not a production host listed in '${target_file_cur_dir}'."
-                log_msg "Add it manually:\n\n\techo '${current_hostname}' >> ${target_file_cur_dir}"
-                exit 1
-            fi
-        else
-            log_warning "Could not determine current hostname; cannot validate ${target_file_cur_dir}"
-        fi
-    fi
+	local script_dir="$1"
+	local env_type="$2"
+	local example_file="${script_dir}/prod-hostnames.example.env"
+	local target_file="${script_dir}/prod-hostnames.env"
+
+	if [[ -f "${example_file}" && ! -f "${target_file}" ]]; then
+		log_msg "Creating prod-hostnames.env from example..."
+		cp "${example_file}" "${target_file}"
+		log_success "Created: ${target_file}"
+
+		if [[ "${env_type}" == "production" ]]; then
+			local current_hostname
+			current_hostname=$(hostname)
+			if [[ -n "${current_hostname}" ]]; then
+				echo "${current_hostname}" >>"${target_file}"
+				log_success "Appended hostname to ${target_file}: ${current_hostname}"
+			else
+				log_warning "Could not determine current hostname; skipping append"
+			fi
+		fi
+	fi
+
+	# if we're running a production deploy, check the hostname is
+	# listed in the file first, otherwise abort the deployment
+	if [[ "${env_type}" == "production" && -f "${target_file}" ]]; then
+		local current_hostname
+		local target_file_cur_dir
+		current_hostname=$(hostname)
+		target_file_cur_dir=$(realpath --relative-to="." "${target_file}")
+		if [[ -n "${current_hostname}" ]]; then
+			if ! grep -Fxq "${current_hostname}" "${target_file}"; then
+				log_error "Current hostname '${current_hostname}' not a production host listed in '${target_file_cur_dir}'."
+				log_msg "Add it manually:\n\n\techo '${current_hostname}' >> ${target_file_cur_dir}"
+				exit 1
+			fi
+		else
+			log_warning "Could not determine current hostname; cannot validate ${target_file_cur_dir}"
+		fi
+	fi
 }
 
 function create_docker_network() {
-    local env_type="$1"
-    local network_name="sds-network-${env_type}"
-
-    log_header "Docker Network Setup"
-
-    if docker network inspect "${network_name}" &>/dev/null; then
-        log_msg "Network '${network_name}' already exists"
-    else
-        log_msg "Creating Docker network: ${network_name}"
-        docker network create "${network_name}" --driver=bridge
-        log_success "Network created: ${network_name}"
-    fi
+	local env_type="$1"
+	local network_name="sds-network-${env_type}"
+
+	log_header "Docker Network Setup"
+
+	if docker network inspect "${network_name}" &>/dev/null; then
+		log_msg "Network '${network_name}' already exists"
+	else
+		log_msg "Creating Docker network: ${network_name}"
+		docker network create "${network_name}" --driver=bridge
+		log_success "Network created: ${network_name}"
+	fi
 }
 
 function generate_secrets() {
-    local env_type="$1"
-    local force="$2"
+	local env_type="$1"
+	local force="$2"
 
-    log_header "Secret Generation"
+	log_header "Secret Generation"
 
-    local force_flag=""
-    if [[ "${force}" == "true" ]]; then
-        force_flag="--force"
-    fi
+	local force_flag=""
+	if [[ "${force}" == "true" ]]; then
+		force_flag="--force"
+	fi
 
-    log_msg "Generating secrets for '${env_type}' environment..."
-    just generate-secrets "${env_type}" ${force_flag}
+	log_msg "Generating secrets for '${env_type}' environment..."
+	just generate-secrets "${env_type}" ${force_flag}
 }
 
 function build_app() {
-    local service_name
-    service_name="$1"
-    log_header "Building stack"
-    if [[ -n "${service_name}" ]]; then
-        log_msg "Pulling images and building only service: ${service_name}"
-    else
-        log_msg "Pulling images and building all services"
-    fi
-    just build "${service_name}"
+	local service_name
+	service_name="$1"
+	log_header "Building stack"
+	if [[ -n "${service_name}" ]]; then
+		log_msg "Pulling images and building only service: ${service_name}"
+	else
+		log_msg "Pulling images and building all services"
+	fi
+	just build "${service_name}"
 }
 
 function first_start() {
-    log_header "First Stack Startup"
+	log_header "First Stack Startup"
 
-    log_msg "Building images"
-    just build
+	log_msg "Building images"
+	just build
 
-    log_msg "Starting opensearch"
-    just up opensearch
+	log_msg "Starting opensearch"
+	just up opensearch
 
-    log_msg "Waiting for OpenSearch to be healthy..."
-    wait_for_service "opensearch" 60 || {
-        log_warning "OpenSearch health check timed out, tearing down anyway"
-    }
-    just up || true
+	log_msg "Waiting for OpenSearch to be healthy..."
+	wait_for_service "opensearch" 60 || {
+		log_warning "OpenSearch health check timed out, tearing down anyway"
+	}
+	just up || true
 }
 
 function start_stack() {
-    log_header "Starting SDS stack"
-    log_msg "Starting stack..."
-    {
-        just build
-        just up
-    } &>/dev/null &
+	log_header "Starting SDS stack"
+	log_msg "Starting stack..."
+	{
+		just build
+		just up
+	} &>/dev/null &
 }
 
 function stop_stack() {
-    log_msg "Stopping stack..."
-    just down
+	log_msg "Stopping stack..."
+	just down
 }
 
 function wait_for_service() {
-    local container_name="$1"
-    local max_attempts="${2:-30}"
-    local attempt=1
-
-    log_msg "Waiting for container '${container_name}' to be ready..."
-
-    while [[ ${attempt} -le ${max_attempts} ]]; do
-        if just dc exec "${container_name}" echo "ready" &>/dev/null; then
-            log_success "Container '${container_name}' is ready"
-            return 0
-        fi
-
-        if [[ $((attempt % 5)) -eq 0 ]]; then
-            log_msg "Still waiting... (attempt ${attempt}/${max_attempts})"
-        fi
-
-        sleep 2
-        attempt=$((attempt + 1))
-    done
-
-    log_error "Container '${container_name}' did not become ready in time"
-    return 1
+	local container_name="$1"
+	local max_attempts="${2:-30}"
+	local attempt=1
+
+	log_msg "Waiting for container '${container_name}' to be ready..."
+
+	while [[ ${attempt} -le ${max_attempts} ]]; do
+		if just dc exec "${container_name}" echo "ready" &>/dev/null; then
+			log_success "Container '${container_name}' is ready"
+			return 0
+		fi
+
+		if [[ $((attempt % 5)) -eq 0 ]]; then
+			log_msg "Still waiting... (attempt ${attempt}/${max_attempts})"
+			log_msg "=== Container logs (last 20 lines) ==="
+			docker logs --tail 20 "${container_name}" 2>&1 | while IFS= read -r line; do
+				log_msg "  ${line}"
+			done
+			log_msg "=========================================="
+		fi
+
+		sleep 2
+		attempt=$((attempt + 1))
+	done
+
+	log_error "Container '${container_name}' did not become ready in time"
+	return 1
 }
 
 function run_migrations() {
-    local container_name="$1"
+	local container_name="$1"
 
-    log_header "Database Migrations"
+	log_header "Database Migrations"
 
-    log_msg "Running Django migrations..."
-    # you probably don't need/want makemigrations at this stage; here for documentation
-    # just uv run manage.py makemigrations
-    just uv run manage.py migrate
-    log_success "Migrations applied"
+	log_msg "Running Django migrations..."
+	# you probably don't need/want makemigrations at this stage; here for documentation
+	# just uv run manage.py makemigrations
+	just uv run manage.py migrate
+	log_success "Migrations applied"
 }
 
 function create_superuser() {
-    local container_name="$1"
-    local env_type="$2"
-
-    log_header "Superuser Creation"
-
-    local has_superuser
-    has_superuser=$(just uv run manage.py check_superuser_exists 2>/dev/null | tail -n1 | tr -d '[:space:]')
-
-    case "${has_superuser}" in
-        yes|no) ;;
-        *)
-            log_error "Unexpected output from check_superuser_exists: '${has_superuser}'"
-            return 1
-            ;;
-    esac
-
-    if [[ "${has_superuser}" == "yes" ]]; then
-        log_msg "Superuser already exists, skipping creation"
-        return 0
-    fi
-
-    if [[ "${env_type}" == "ci" ]]; then
-        log_msg "Creating superuser for CI environment (non-interactive)..."
-        just uv run manage.py create_ci_superuser
-    else
-        log_msg "Creating superuser (interactive)..."
-        log_msg "You will be prompted for username, email, and password"
-        echo ""
-        just uv run manage.py createsuperuser || {
-            log_warning "Superuser creation skipped or failed"
-            log_msg "You can create it later with: just uv run manage.py createsuperuser"
-        }
-    fi
+	local container_name="$1"
+	local env_type="$2"
+
+	log_header "Superuser Creation"
+
+	local has_superuser
+	has_superuser=$(just uv run manage.py check_superuser_exists 2>/dev/null | tail -n1 | tr -d '[:space:]')
+
+	case "${has_superuser}" in
+	yes | no) ;;
+	*)
+		log_error "Unexpected output from check_superuser_exists: '${has_superuser}'"
+		return 1
+		;;
+	esac
+
+	if [[ "${has_superuser}" == "yes" ]]; then
+		log_msg "Superuser already exists, skipping creation"
+		return 0
+	fi
+
+	if [[ "${env_type}" == "ci" ]]; then
+		log_msg "Creating superuser for CI environment (non-interactive)..."
+		just uv run manage.py create_ci_superuser
+	else
+		log_msg "Creating superuser (interactive)..."
+		log_msg "You will be prompted for username, email, and password"
+		echo ""
+		just uv run manage.py createsuperuser || {
+			log_warning "Superuser creation skipped or failed"
+			log_msg "You can create it later with: just uv run manage.py createsuperuser"
+		}
+	fi
 }
 
 function show_next_steps() {
-    local env_type="$1"
-    local port_prefix=""
-
-    if [[ "${env_type}" == "production" ]]; then
-        port_prefix="1"
-    fi
-
-    log_header "Deployment Complete!"
-
-    echo ""
-    echo "🎉 Gateway deployed successfully!"
-    echo ""
-    echo "Next steps:"
-    echo ""
-    echo "  1. Access the web interface:"
-    echo "     - Gateway: http://localhost:${port_prefix}8000"
-    echo "     - Admin panel: http://localhost:${port_prefix}8000/admin"
-    echo ""
-    echo "  2. Run tests to verify installation:"
-    echo "     just test"
-    echo ""
-    echo "  3. For production SDK API key generation:"
-    echo "     - Visit http://localhost:${port_prefix}8000/users/generate-api-key-form/"
-    echo "     - Copy the key to .envs/${env_type}/django.env"
-    echo ""
-
-    if [[ "${env_type}" == "local" ]]; then
-        echo "  4. Check webpack dev server:"
-        echo "     http://localhost:3000/webpack-dev-server"
-        echo ""
-    fi
-
-    echo "📚 For more information, see gateway/README.md"
-    echo ""
+	local env_type="$1"
+	local port_prefix=""
+
+	if [[ "${env_type}" == "production" ]]; then
+		port_prefix="1"
+	fi
+
+	log_header "Deployment Complete!"
+
+	echo ""
+	echo "🎉 Gateway deployed successfully!"
+	echo ""
+	echo "Next steps:"
+	echo ""
+	echo "  1. Access the web interface:"
+	echo "     - Gateway: http://localhost:${port_prefix}8000"
+	echo "     - Admin panel: http://localhost:${port_prefix}8000/admin"
+	echo ""
+	echo "  2. Run tests to verify installation:"
+	echo "     just test"
+	echo ""
+	echo "  3. For production SDK API key generation:"
+	echo "     - Visit http://localhost:${port_prefix}8000/users/generate-api-key-form/"
+	echo "     - Copy the key to .envs/${env_type}/django.env"
+	echo ""
+
+	if [[ "${env_type}" == "local" ]]; then
+		echo "  4. Check webpack dev server:"
+		echo "     http://localhost:3000/webpack-dev-server"
+		echo ""
+	fi
+
+	echo "📚 For more information, see gateway/README.md"
+	echo ""
 }
 
 function parse_arguments() {
-    local -n args_ref=$1
-    shift
-
-    # read from environment variables first (command-line args will override)
-    if [[ "${SDS_FORCE_SECRETS:-}" == "true" ]]; then
-        args_ref[force_secrets]="true"
-    fi
-    if [[ "${SDS_SKIP_SECRETS:-}" == "true" ]]; then
-        args_ref[skip_secrets]="true"
-    fi
-    if [[ "${SDS_SKIP_NETWORK:-}" == "true" ]]; then
-        args_ref[skip_network]="true"
-    fi
-    if [[ "${SDS_DETACH:-}" == "true" ]]; then
-        args_ref[detach]="true"
-    elif [[ "${SDS_DETACH:-}" == "false" ]]; then
-        args_ref[detach]="false"
-    fi
-
-    # parse command-line arguments (these override env vars)
-    while [[ $# -gt 0 ]]; do
-        case "$1" in
-            -f|--force)
-                args_ref[force_secrets]="true"
-                shift
-                ;;
-            -s|--skip-secrets)
-                args_ref[skip_secrets]="true"
-                shift
-                ;;
-            -n|--skip-network)
-                args_ref[skip_network]="true"
-                shift
-                ;;
-            -d|--detach)
-                args_ref[detach]="true"
-                shift
-                ;;
-            -h|--help)
-                show_usage
-                ;;
-            local|production|ci)
-                args_ref[env_type]="$1"
-                shift
-                ;;
-            *)
-                log_error "Unknown argument: $1"
-                show_usage
-                ;;
-        esac
-    done
-
-    if [[ -z "${args_ref[env_type]}" ]]; then
-        log_error "Environment type required (local, production, or ci)"
-        show_usage
-    fi
-
-    # auto-detach for production unless explicitly overridden
-    if [[ "${args_ref[env_type]}" == "production" && "${SDS_DETACH:-}" != "false" ]]; then
-        args_ref[detach]="true"
-    fi
+	local -n _args_ref=$1
+	shift
+
+	# Ensure all keys exist (shellcheck can't follow nameref)
+	if [[ -z "${_args_ref[force_secrets]+x}" ]]; then
+		_args_ref[force_secrets]="false"
+	fi
+	if [[ -z "${_args_ref[skip_secrets]+x}" ]]; then
+		_args_ref[skip_secrets]="false"
+	fi
+	if [[ -z "${_args_ref[skip_network]+x}" ]]; then
+		_args_ref[skip_network]="false"
+	fi
+	if [[ -z "${_args_ref[skip_sfs]+x}" ]]; then
+		_args_ref[skip_sfs]="false"
+	fi
+	if [[ -z "${_args_ref[detach]+x}" ]]; then
+		_args_ref[detach]="false"
+	fi
+	# read from environment variables first (command-line args will override)
+	if [[ "${SDS_FORCE_SECRETS:-}" == "true" ]]; then
+		_args_ref[force_secrets]="true"
+	fi
+	if [[ "${SDS_SKIP_SECRETS:-}" == "true" ]]; then
+		_args_ref[skip_secrets]="true"
+	fi
+	if [[ "${SDS_SKIP_NETWORK:-}" == "true" ]]; then
+		_args_ref[skip_network]="true"
+	fi
+	if [[ "${SDS_SKIP_SFS:-}" == "true" ]]; then
+		_args_ref[skip_sfs]="true"
+	fi
+	if [[ "${SDS_DETACH:-}" == "true" ]]; then
+		_args_ref[detach]="true"
+	elif [[ "${SDS_DETACH:-}" == "false" ]]; then
+		_args_ref[detach]="false"
+	fi
+
+	# parse command-line arguments (these override env vars)
+	while [[ $# -gt 0 ]]; do
+		case "$1" in
+		-f | --force)
+			_args_ref[force_secrets]="true"
+			shift
+			;;
+		-s | --skip-secrets)
+			_args_ref[skip_secrets]="true"
+			shift
+			;;
+		-n | --skip-network)
+			_args_ref[skip_network]="true"
+			shift
+			;;
+		--skip-sfs)
+			_args_ref[skip_sfs]="true"
+			shift
+			;;
+		-d | --detach)
+			_args_ref[detach]="true"
+			shift
+			;;
+		-h | --help)
+			show_usage
+			;;
+		local | production | ci)
+			_args_ref[env_type]="$1"
+			shift
+			;;
+		*)
+			log_error "Unknown argument: $1"
+			show_usage
+			;;
+		esac
+	done
+
+	if [[ -z "${_args_ref[env_type]}" ]]; then
+		log_error "Environment type required (local, production, or ci)"
+		show_usage
+	fi
+
+	# auto-detach for production unless explicitly overridden
+	if [[ "${_args_ref[env_type]}" == "production" && "${SDS_DETACH:-}" != "false" ]]; then
+		_args_ref[detach]="true"
+	fi
 }
 
 function determine_container_name() {
-    local env_type="$1"
-    if [[ "${env_type}" == "production" ]]; then
-        echo "sds-gateway-prod-app"
-    elif [[ "${env_type}" == "ci" ]]; then
-        echo "sds-gateway-ci-app"
-    elif [[ "${env_type}" == "local" ]]; then
-        echo "sds-gateway-local-app"
-    else
-        log_error "Unknown environment type: ${env_type}"
-        return 1
-    fi
+	local env_type="$1"
+	if [[ "${env_type}" == "production" ]]; then
+		echo "sds-gateway-prod-app"
+	elif [[ "${env_type}" == "ci" ]]; then
+		echo "sds-gateway-ci-app"
+	elif [[ "${env_type}" == "local" ]]; then
+		echo "sds-gateway-local-app"
+	else
+		log_error "Unknown environment type: ${env_type}"
+		return 1
+	fi
 }
 
 function setup_secrets_and_network() {
-    local env_type="$1"
-    local skip_secrets="$2"
-    local force_secrets="$3"
-    local skip_network="$4"
-
-    if [[ "${skip_secrets}" == "false" ]]; then
-        generate_secrets "${env_type}" "${force_secrets}"
-    else
-        log_msg "Skipping secret generation (using existing secrets)"
-    fi
-
-    if [[ "${skip_network}" == "false" ]]; then
-        create_docker_network "${env_type}"
-    else
-        log_msg "Skipping network creation"
-    fi
+	local env_type="$1"
+	local skip_secrets="$2"
+	local force_secrets="$3"
+	local skip_network="$4"
+
+	if [[ "${skip_secrets}" == "false" ]]; then
+		generate_secrets "${env_type}" "${force_secrets}"
+	else
+		log_msg "Skipping secret generation (using existing secrets)"
+	fi
+
+	if [[ "${skip_network}" == "false" ]]; then
+		create_docker_network "${env_type}"
+	else
+		log_msg "Skipping network creation"
+	fi
 }
 
 function setup_database() {
 
-    local container_name="$1"
-    local env_type="$2"
+	local container_name="$1"
+	local env_type="$2"
 
-    log_header "Setting up Database"
+	log_header "Setting up Database"
 
-    wait_for_service "${container_name}" 60 || {
-        log_error "Failed to start services"
-        log_msg "Check logs with: just logs"
-        exit 1
-    }
+	wait_for_service "${container_name}" 60 || {
+		log_error "Failed to start services"
+		log_msg "Check logs with: just logs"
+		exit 1
+	}
 
-    run_migrations "${container_name}"
-    create_superuser "${container_name}" "${env_type}"
+	run_migrations "${container_name}"
+	create_superuser "${container_name}" "${env_type}"
 
 }
 
-function create_minio_bucket() {
-    local env_type="$1"
-    local minio_env_file="${PROJECT_ROOT}/.envs/${env_type}/minio.env"
+function create_storage_buckets() {
+	local env_type="$1"
+	log_header "Creating Object Store Buckets"
+	log_msg "Ensuring storage buckets exist on configured object stores..."
+	set +e
+	just uv run manage.py create_storage_buckets
+	local mgmt_exit=$?
+	set -e
+	if [[ ${mgmt_exit} -ne 0 ]]; then
+		log_warning "Bucket creation had non-zero exit (may be expected if secondary is unreachable)"
+	fi
+	log_success "Storage buckets ready"
+}
+
+function deploy_sfs_stack() {
+	local env_type="$1"
+	local sfs_env_file="${PROJECT_ROOT}/.envs/${env_type}/storage.env"
 
-    log_header "MinIO Bucket Setup"
+	log_header "SeaweedFS Stack Deployment"
 
-    if [[ ! -f "${minio_env_file}" ]]; then
-        log_error "MinIO environment file not found: ${minio_env_file}"
-        return 1
-    fi
+	if [[ -z "${SFS_ROOT}" || ! -d "${SFS_ROOT}" ]]; then
+		log_warning "SeaweedFS directory not found at '${PROJECT_ROOT}/../seaweedfs' — skipping SFS deployment"
+		log_msg "Run the SFS stack manually from the seaweedfs/ directory before starting the gateway."
+		return 0
+	fi
 
-    local minio_user
-    local minio_password
-    minio_user=$(grep -E '^MINIO_ROOT_USER=' "${minio_env_file}" | cut -d'=' -f2)
-    minio_password=$(grep -E '^MINIO_ROOT_PASSWORD=' "${minio_env_file}" | cut -d'=' -f2)
+	if [[ ! -f "${SFS_ROOT}/scripts/deploy.sh" ]]; then
+		log_warning "SeaweedFS deploy script not found at '${SFS_ROOT}/scripts/deploy.sh' — skipping"
+		return 0
+	fi
 
-    if [[ -z "${minio_user}" || -z "${minio_password}" ]]; then
-        log_error "Failed to extract MinIO credentials from ${minio_env_file}"
-        return 1
-    fi
+	# ensure the shared network exists before SFS references it as external (CI/prod)
+	create_docker_network "${env_type}"
 
-    local alias_name="local"    # always "local", doesn't depend on env_type
+	log_msg "Deploying SeaweedFS stack (env: ${env_type})..."
+	"${SFS_ROOT}/scripts/deploy.sh" \
+		--sfs-env "${sfs_env_file}" \
+		"${env_type}"
 
-    just dc exec -it minio mc alias set "${alias_name}" "http://localhost:9000" "${minio_user}" "${minio_password}"
-    just dc exec -it minio mc mb --ignore-existing "${alias_name}/spectrumx"
+	log_success "SeaweedFS stack deployed"
 }
 
 function finalize_deployment() {
-    local env_type="$1"
-    local detach="$2"
+	local env_type="$1"
+	local detach="$2"
 
-    log_header "Finalizing Deployment"
-    start_stack
-    show_next_steps "${env_type}"
+	log_header "Finalizing Deployment"
+	start_stack
+	show_next_steps "${env_type}"
 }
 
 function main() {
-    declare -A args=(
-        [force_secrets]="false"
-        [skip_secrets]="false"
-        [skip_network]="true"   # usually works when skipped
-        [detach]="false"
-        [env_type]=""
-    )
+	declare -A args=(
+		[force_secrets]="false"
+		[skip_secrets]="false"
+		[skip_network]="false"
+		[skip_sfs]="false"
+		[detach]="false"
+		[env_type]=""
+	)
+
+	parse_arguments args "$@"
+
+	cd "${PROJECT_ROOT}"
+	log_header "SDS Gateway Deployment - ${args[env_type]} environment"
 
-    parse_arguments args "$@"
+	local container_name
+	container_name=$(determine_container_name "${args[env_type]}")
 
-    cd "${PROJECT_ROOT}"
-    log_header "SDS Gateway Deployment - ${args[env_type]} environment"
+	setup_secrets_and_network \
+		"${args[env_type]}" \
+		"${args[skip_secrets]}" \
+		"${args[force_secrets]}" \
+		"${args[skip_network]}"
 
-    local container_name
-    container_name=$(determine_container_name "${args[env_type]}")
+	setup_prod_hostnames "${SCRIPT_DIR}" "${args[env_type]}"
 
-    setup_secrets_and_network \
-        "${args[env_type]}" \
-        "${args[skip_secrets]}" \
-        "${args[force_secrets]}" \
-        "${args[skip_network]}"
+	if [[ "${args[skip_sfs]}" == "false" ]]; then
+		deploy_sfs_stack "${args[env_type]}"
+	else
+		log_msg "Skipping SeaweedFS stack deployment (--skip-sfs)"
+	fi
 
-    setup_prod_hostnames "${SCRIPT_DIR}" "${args[env_type]}"
+	build_app "${container_name}"
+	first_start
 
-    build_app "${container_name}"
-    first_start
+	create_storage_buckets "${args[env_type]}"
 
-    setup_database "${container_name}" "${args[env_type]}"
-    create_minio_bucket "${args[env_type]}"
-    finalize_deployment "${args[env_type]}" "${args[detach]}"
+	setup_database "${container_name}" "${args[env_type]}"
+	finalize_deployment "${args[env_type]}" "${args[detach]}"
 }
 
 main "$@"
diff --git a/gateway/scripts/env-selection.sh b/gateway/scripts/env-selection.sh
index 57e35a96e..2073cb053 100755
--- a/gateway/scripts/env-selection.sh
+++ b/gateway/scripts/env-selection.sh
@@ -2,132 +2,142 @@
 set -euo pipefail
 IFS=$'\n\t'
 
-is_production_host() {
-    local script_dir
-    script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-    local host
-    host=$(hostname)
-    local prod_hosts_file="${script_dir}/prod-hostnames.env"
+function is_production_host() {
+	local script_dir
+	script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+	local host
+	host=$(hostname)
+	local prod_hosts_file="${script_dir}/prod-hostnames.env"
 
-    if [[ ! -f "${prod_hosts_file}" ]]; then
-        printf '\033[33mProduction host list not found at %s: defaulting to local\033[0m\n' "${prod_hosts_file}" >&2
-        printf 'Create this file to make the warning go away:\n\n\tcp %s/prod-hostnames.example.env %s\n\n' "${script_dir}" "${prod_hosts_file}" >&2
-        return 1
-    fi
+	if [[ ! -f "${prod_hosts_file}" ]]; then
+		printf '\033[33mProduction host list not found at %s: defaulting to local\033[0m\n' "${prod_hosts_file}" >&2
+		printf 'Create this file to make the warning go away:\n\n\tcp %s/prod-hostnames.example.env %s\n\n' "${script_dir}" "${prod_hosts_file}" >&2
+		return 1
+	fi
 
-    while read -r line; do
-        # trim leading/trailing whitespace
-        line=$(echo "${line}" | xargs)
-        # skip comments
-        [[ -z "${line}" || ${line:0:1} == '#' ]] && continue
-        # check if the line matches the current host
-        if [[ "${line}" == "${host}" ]]; then
-            return 0
-        fi
-    done < "${prod_hosts_file}"
+	while read -r line; do
+		# trim leading/trailing whitespace
+		line=$(echo "${line}" | xargs)
+		# skip comments
+		[[ -z "${line}" || ${line:0:1} == '#' ]] && continue
+		# check if the line matches the current host
+		if [[ "${line}" == "${host}" ]]; then
+			return 0
+		fi
+	done <"${prod_hosts_file}"
 
-    return 1
+	return 1
 }
 
-is_ci_env() {
-    if [[ -n "${CI:-}" ]] || [[ -n "${GITHUB_ACTIONS:-}" ]] || [[ -n "${GITLAB_CI:-}" ]] || [[ -n "${BUILD_ID:-}" ]] || [[ -n "${JENKINS_URL:-}" ]]; then
-        return 0
-    fi
-    return 1
+function is_ci_env() {
+	if [[ -n "${CI:-}" ]] || [[ -n "${GITHUB_ACTIONS:-}" ]] || [[ -n "${GITLAB_CI:-}" ]] || [[ -n "${BUILD_ID:-}" ]] || [[ -n "${JENKINS_URL:-}" ]]; then
+		return 0
+	fi
+	return 1
 }
 
-get_target_value() {
-    local target=$1
-    local env_type=$2
-    local local_env_file=".envs/local/opensearch.env"
-    local production_env_file=".envs/production/opensearch.env"
-    local ci_env_file=".envs/ci/opensearch.env"
-    local value
+function get_target_value() {
+	local target=$1
+	local env_type=$2
+	local local_env_file=".envs/local/opensearch.env"
+	local production_env_file=".envs/production/opensearch.env"
+	local ci_env_file=".envs/ci/opensearch.env"
+	local value
 
-    case "${target}" in
-        env)
-            value="${env_type}"
-            ;;
-        compose_file)
-            case "${env_type}" in
-                production)
-                    value='compose.production.yaml'
-                    ;;
-                local)
-                    value='compose.local.yaml'
-                    ;;
-                ci)
-                    value='compose.ci.yaml'
-                    ;;
-            esac
-            ;;
-        app_container)
-            case "${env_type}" in
-                ci)
-                    value='sds-gateway-ci-app'
-                    ;;
-                local)
-                    value='sds-gateway-local-app'
-                    ;;
-                production)
-                    value='sds-gateway-prod-app'
-                    ;;
-                *)
-                    printf 'unsupported environment type: %s\n' "${env_type}" >&2
-                    exit 1
-                    ;;
-            esac
-            ;;
-        env_file)
-            case "${env_type}" in
-                ci)
-                    value="${ci_env_file}"
-                    ;;
-                local)
-                    value="${local_env_file}"
-                    ;;
-                production)
-                    value="${production_env_file}"
-                    ;;
-                *)
-                    printf 'unsupported environment type: %s\n' "${env_type}" >&2
-                    exit 1
-                    ;;
-            esac
-            ;;
-        *)
-            printf 'unsupported target: %s\n' "${target}" >&2
-            exit 1
-            ;;
-    esac
+	case "${target}" in
+	env)
+		value="${env_type}"
+		;;
+	compose_file)
+		case "${env_type}" in
+		production)
+			value='compose.production.yaml'
+			;;
+		local)
+			value='compose.local.yaml'
+			;;
+		ci)
+			value='compose.ci.yaml'
+			;;
+		esac
+		;;
+	app_container)
+		case "${env_type}" in
+		ci)
+			value='sds-gateway-ci-app'
+			;;
+		local)
+			value='sds-gateway-local-app'
+			;;
+		production)
+			value='sds-gateway-prod-app'
+			;;
+		*)
+			printf 'unsupported environment type: %s\n' "${env_type}" >&2
+			exit 1
+			;;
+		esac
+		;;
+	env_file)
+		case "${env_type}" in
+		ci)
+			value="${ci_env_file}"
+			;;
+		local)
+			value="${local_env_file}"
+			;;
+		production)
+			value="${production_env_file}"
+			;;
+		*)
+			printf 'unsupported environment type: %s\n' "${env_type}" >&2
+			exit 1
+			;;
+		esac
+		;;
+	*)
+		printf 'unsupported target: %s\n' "${target}" >&2
+		exit 1
+		;;
+	esac
 
-    if [[ "${target}" == "compose_file" && ! -f "${value}" ]]; then
-        printf '\033[31mERROR: selected compose file "%s" does not exist\033[0m\n' "${value}" >&2
-    fi
-    if [[ "${target}" == "env_file" && ! -f "${value}" ]]; then
-        printf '\033[31mERROR: selected env file "%s" does not exist\033[0m\n' "${value}" >&2
-    fi
+	if [[ "${target}" == "compose_file" && ! -f "${value}" ]]; then
+		printf '\033[31mERROR: selected compose file "%s" does not exist\033[0m\n' "${value}" >&2
+	fi
+	if [[ "${target}" == "env_file" && ! -f "${value}" ]]; then
+		printf '\033[31mERROR: selected env file "%s" does not exist\033[0m\n' "${value}" >&2
+	fi
 
-    printf '%s\n' "${value}"
+	printf '%s\n' "${value}"
 }
 
-main() {
-    if [[ $# -ne 1 ]]; then
-        printf 'usage: %s <env|compose_file|app_container|env_file>\n' "${0}" >&2
-        exit 1
-    fi
+function main() {
+	if [[ $# -ne 1 ]]; then
+		printf 'usage: %s <env|compose_file|app_container|env_file>\n' "${0}" >&2
+		exit 1
+	fi
 
-    local target=$1
-    local env_type
-    if is_ci_env; then
-        env_type='ci'
-    elif is_production_host; then
-        env_type='production'
-    else
-        env_type='local'
-    fi
+	local target=${1:-}
+	local env_type
 
-    get_target_value "${target}" "${env_type}"
+	# allow explicit override via SDS_ENV (e.g., SDS_ENV=ci just env)
+	if [[ -n "${SDS_ENV:-}" ]]; then
+		case "${SDS_ENV}" in
+		ci | local | production) env_type="${SDS_ENV}" ;;
+		*)
+			printf '\033[33mUnknown SDS_ENV="%s": must be ci, local, or production\033[0m\n' "${SDS_ENV}" >&2
+			exit 1
+			;;
+		esac
+	elif is_ci_env; then
+		env_type='ci'
+	elif is_production_host; then
+		env_type='production'
+	else
+		env_type='local'
+	fi
+
+	get_target_value "${target}" "${env_type}"
 }
 
 main "$@"
diff --git a/gateway/scripts/generate-secrets.sh b/gateway/scripts/generate-secrets.sh
index 73757980d..690926173 100755
--- a/gateway/scripts/generate-secrets.sh
+++ b/gateway/scripts/generate-secrets.sh
@@ -1,12 +1,25 @@
 #!/usr/bin/env bash
-set -euo pipefail
+set -Eeuo pipefail
 
 SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-PROJECT_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
-EXAMPLE_DIR="${PROJECT_ROOT}/.envs/example"
+GATEWAY_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
+SFS_ROOT=$(cd "${GATEWAY_ROOT}/../seaweedfs" && pwd)
+EXAMPLE_DIR="${GATEWAY_ROOT}/.envs/example"
 
-usage() {
-    cat << EOF
+# PRIMARY (RustFS or SeaweedFS)
+PRIMARY_ACCESS_KEY_ID=""
+PRIMARY_SECRET_ACCESS_KEY=""
+PRIMARY_ENDPOINT_URL=""
+PRIMARY_S3_ENDPOINT_URL=""
+
+# SECONDARY (RustFS or SeaweedFS) — only for production
+SECONDARY_ACCESS_KEY_ID=""
+SECONDARY_SECRET_ACCESS_KEY=""
+SECONDARY_ROOT_USER="minioadmin"
+SECONDARY_ROOT_PASSWORD=""
+
+function usage() {
+	cat <<EOF
 Usage: ${0} [OPTIONS] <local|production|ci>
 
 Generate environment secrets for the gateway component.
@@ -23,162 +36,292 @@ EXAMPLES:
     ${0} --force ci         # Generate CI env files (overwrite if exist)
     ${0} production         # Generate production env files
 
-NOTES:
-    - Generated files are placed in .envs/<env>/ directory
-    - Example templates are read from .envs/example/
-    - Secrets are randomly generated using OpenSSL
-    - CI environment uses insecure but deterministic values for ephemeral usage
+    NOTES:
+        - Generated files are placed in .envs/<env>/ directory
+        - Example templates are read from .envs/example/
+        - Secrets are randomly generated using OpenSSL
+        - CI environment uses insecure but deterministic values for ephemeral usage
+        - local: PRIMARY (RustFS) + SECONDARY (SeaweedFS)
+        - production: PRIMARY (SeaweedFS) + SECONDARY (RustFS)
+        - ci: PRIMARY only (RustFS). No secondary storage.
 EOF
-    exit 0
+	exit 0
 }
 
-generate_secret() {
-    local length="${1:-40}"
-    openssl rand -base64 48 | tr -d "=+/" | cut -c1-"${length}"
+function configure_object_store_defaults() {
+	local env_type="$1"
+
+	if [[ -n "${PRIMARY_ENDPOINT_URL}" ]]; then
+		return 0
+	fi
+
+	case "${env_type}" in
+	local)
+		PRIMARY_ENDPOINT_URL="sds-gateway-local-rustfs:9000"
+		PRIMARY_ACCESS_KEY_ID=$(generate_secret 32)
+		PRIMARY_SECRET_ACCESS_KEY=$(generate_secret 32)
+		# SECONDARY = SeaweedFS (S3 gateway)
+		SECONDARY_ENDPOINT_URL="sds-gateway-local-sfs-s3:8333"
+		SECONDARY_ACCESS_KEY_ID=$(generate_secret 32)
+		SECONDARY_SECRET_ACCESS_KEY=$(generate_secret 32)
+		;;
+	ci)
+		PRIMARY_ENDPOINT_URL="sds-gateway-ci-rustfs:9000"
+		;;
+	production)
+		PRIMARY_ENDPOINT_URL="sds-gateway-prod-sfs-s3:8333"
+		;;
+	*)
+		echo "ERROR: Unsupported environment type: ${env_type}" >&2
+		return 1
+		;;
+	esac
+
+	PRIMARY_S3_ENDPOINT_URL="http://${PRIMARY_ENDPOINT_URL}"
+
+	# Set SECONDARY S3 endpoint URL for environments that have a secondary
+	if [[ -n "${SECONDARY_ENDPOINT_URL:-}" ]]; then
+		SECONDARY_S3_ENDPOINT_URL="http://${SECONDARY_ENDPOINT_URL}"
+	fi
+
+	# SECONDARY only in local and production (no secondary for CI)
+	if [[ "${env_type}" == "ci" ]]; then
+		PRIMARY_ACCESS_KEY_ID="ci-rustfs-access-key"
+		PRIMARY_SECRET_ACCESS_KEY="ci-rustfs-secret-key"
+		return 0
+	fi
+
+	if [[ "${env_type}" == "production" ]]; then
+		SECONDARY_ACCESS_KEY_ID="rustfs-secondary-access-key"
+		SECONDARY_SECRET_ACCESS_KEY="rustfs-secondary-secret-key"
+		SECONDARY_ROOT_USER="minioadmin"
+	fi
 }
 
-generate_django_secret_key() {
-    # Django needs 50+ chars with special characters
-    openssl rand -base64 64 | tr -d "\n"
+function generate_secret() {
+	local length="${1:-40}"
+	openssl rand -base64 48 | tr -d "=+/" | cut -c1-"${length}"
 }
 
-process_env_file() {
-    local template="$1"
-    local output="$2"
-    local env_type="$3"
-    local force="$4"
-
-    if [[ -f "${output}" && "${force}" != "true" ]]; then
-        echo "  ⏭  ${output} already exists (use --force to overwrite)"
-        return 0
-    fi
-
-    echo "  ✓  Generating ${output}"
-
-    local content
-    content=$(cat "${template}")
-
-    # calculate WEB_CONCURRENCY based on CPU cores: (2 x num_cores) + 1
-    local num_cores
-    num_cores=$(nproc 2>/dev/null || echo "2")
-    local web_concurrency=$(( (num_cores * 2) + 1 ))
-
-    # generate secrets based on environment type
-    if [[ "${env_type}" == "ci" ]]; then
-        # CI: use predictable but acceptable secrets for ephemeral environments
-        content="${content//DJANGO_SECRET_KEY=/DJANGO_SECRET_KEY=ci-django-secret-key-insecure-for-testing-only}"
-        content="${content//DJANGO_ADMIN_URL=/DJANGO_ADMIN_URL=ci-admin/}"
-        content="${content//CELERY_FLOWER_PASSWORD=/CELERY_FLOWER_PASSWORD=ci-flower-pass}"
-        content="${content//SVI_SERVER_API_KEY=/SVI_SERVER_API_KEY=ci-svi-api-key-01234567890123456789abcde}"   # 40 chars
-        content="${content//MINIO_ROOT_PASSWORD=<SAME AS AWS_SECRET_ACCESS_KEY>/MINIO_ROOT_PASSWORD=ci-minio-secret}"
-        content="${content//AWS_SECRET_ACCESS_KEY=<SAME AS MINIO_ROOT_PASSWORD>/AWS_SECRET_ACCESS_KEY=ci-minio-secret}"
-        content="${content//POSTGRES_PASSWORD=your-specific-password/POSTGRES_PASSWORD=ci-postgres-pass}"
-        content="${content//:your-specific-password@/:ci-postgres-pass@}"
-        content="${content//OPENSEARCH_INITIAL_ADMIN_PASSWORD=/OPENSEARCH_INITIAL_ADMIN_PASSWORD=CiAdmin123!}"
-        content="${content//OPENSEARCH_PASSWORD=/OPENSEARCH_PASSWORD=CiDjango123!}"
-    else
-        # local/production: generate random secure secrets
-        local django_secret_key django_admin_url flower_pass minio_pass postgres_pass opensearch_admin_pass opensearch_user_pass svi_api_key
-        django_secret_key=$(generate_django_secret_key)
-        django_admin_url="$(generate_secret 16)/"
-        flower_pass=$(generate_secret 32)
-        minio_pass=$(generate_secret 40)
-        postgres_pass=$(generate_secret 32)
-        opensearch_admin_pass=$(generate_secret 32)
-        opensearch_user_pass=$(generate_secret 32)
-        svi_api_key=$(generate_secret 40)
-
-        content="${content//DJANGO_SECRET_KEY=/DJANGO_SECRET_KEY=${django_secret_key}}"
-        content="${content//DJANGO_ADMIN_URL=/DJANGO_ADMIN_URL=${django_admin_url}}"
-        content="${content//CELERY_FLOWER_PASSWORD=/CELERY_FLOWER_PASSWORD=${flower_pass}}"
-        content="${content//SVI_SERVER_API_KEY=/SVI_SERVER_API_KEY=${svi_api_key}}"
-        content="${content//MINIO_ROOT_PASSWORD=<SAME AS AWS_SECRET_ACCESS_KEY>/MINIO_ROOT_PASSWORD=${minio_pass}}"
-        content="${content//AWS_SECRET_ACCESS_KEY=<SAME AS MINIO_ROOT_PASSWORD>/AWS_SECRET_ACCESS_KEY=${minio_pass}}"
-        content="${content//POSTGRES_PASSWORD=your-specific-password/POSTGRES_PASSWORD=${postgres_pass}}"
-        content="${content//:your-specific-password@/:${postgres_pass}@}"
-        content="${content//OPENSEARCH_INITIAL_ADMIN_PASSWORD=/OPENSEARCH_INITIAL_ADMIN_PASSWORD=${opensearch_admin_pass}}"
-        content="${content//OPENSEARCH_PASSWORD=/OPENSEARCH_PASSWORD=${opensearch_user_pass}}"
-    fi
-
-    # set WEB_CONCURRENCY based on CPU cores (applies to all environments)
-    content="${content//WEB_CONCURRENCY=4/WEB_CONCURRENCY=${web_concurrency}}"
-
-    # write to output
-    mkdir -p "$(dirname "${output}")"
-    echo "${content}" > "${output}"
+function generate_django_secret_key() {
+	# Django needs 50+ chars with special characters
+	openssl rand -base64 64 | tr -d "\n"
 }
 
-main() {
-    local force="false"
-    local env_type=""
-
-    # parse arguments
-    while [[ $# -gt 0 ]]; do
-        case "$1" in
-            -f|--force)
-                force="true"
-                shift
-                ;;
-            -h|--help)
-                usage
-                ;;
-            local|production|ci)
-                env_type="$1"
-                shift
-                ;;
-            *)
-                echo "ERROR: Unknown argument: $1" >&2
-                usage
-                ;;
-        esac
-    done
-
-    if [[ -z "${env_type}" ]]; then
-        echo "ERROR: Environment type required (local, production, or ci)" >&2
-        usage
-    fi
-
-    echo "🔐 Generating secrets for '${env_type}' environment..."
-
-    local target_dir="${PROJECT_ROOT}/.envs/${env_type}"
-
-    # process each env file from examples
-    for template in "${EXAMPLE_DIR}"/*.env; do
-        local filename
-        filename=$(basename "${template}")
-
-        # skip production-specific example files for non-production envs
-        if [[ "${filename}" == *.prod-example.env ]]; then
-            if [[ "${env_type}" == "production" ]]; then
-                # use prod-example for production django.env
-                if [[ "${filename}" == "django.prod-example.env" ]]; then
-                    process_env_file "${template}" "${target_dir}/django.env" "${env_type}" "${force}"
-                fi
-            fi
-            continue
-        fi
-
-        # skip regular django.env for production (we use prod-example instead)
-        if [[ "${env_type}" == "production" && "${filename}" == "django.env" ]]; then
-            continue
-        fi
-
-        local output="${target_dir}/${filename}"
-        process_env_file "${template}" "${output}" "${env_type}" "${force}"
-    done
-
-    echo ""
-    echo "✅ Secrets generated successfully in ${target_dir}/"
-    echo ""
-    echo "Next steps:"
-    if [[ "${env_type}" == "ci" ]]; then
-        echo "  - Review generated secrets (safe for ephemeral CI usage)"
-    else
-        echo "  - Review and customize ${target_dir}/*.env as needed"
-        echo "  - Set additional optional vars (AUTH0, SENTRY, etc.)"
-    fi
-    echo "  - Use 'just env' to check the environment setup"
-    echo "  - Use 'just up' to start the stack"
+function process_env_file() {
+	local template="$1"
+	local output="$2"
+	local env_type="$3"
+	local force="$4"
+	local filename
+	filename=$(basename "${template}")
+
+	configure_object_store_defaults "${env_type}"
+
+	if [[ -f "${output}" && "${force}" != "true" ]]; then
+		echo "  ⏭  ${output} already exists (use --force to overwrite)"
+		return 0
+	fi
+
+	echo "  ✓  Generating ${output}"
+
+	local content
+	content=$(cat "${template}")
+
+	# calculate WEB_CONCURRENCY based on CPU cores: (2 x num_cores) + 1
+	local num_cores
+	num_cores=$(nproc 2>/dev/null || echo "2")
+	local web_concurrency=$(((num_cores * 2) + 1))
+
+	# generate secrets based on environment type
+	if [[ "${env_type}" == "ci" ]]; then
+		# CI: use predictable but acceptable secrets for ephemeral environments
+		content="${content//:your-specific-password@/:ci-postgres-pass@}"
+		content="${content//AWS_SECRET_ACCESS_KEY=<SAME AS PRIMARY>/AWS_SECRET_ACCESS_KEY=ci-rustfs-secret}"
+		content="${content//CELERY_FLOWER_PASSWORD=/CELERY_FLOWER_PASSWORD=ci-flower-pass}"
+		content="${content//DJANGO_ADMIN_URL=/DJANGO_ADMIN_URL=ci-admin/}"
+		content="${content//DJANGO_SECRET_KEY=/DJANGO_SECRET_KEY=ci-django-secret-key-insecure-for-testing-only}"
+		content="${content//OPENSEARCH_INITIAL_ADMIN_PASSWORD=/OPENSEARCH_INITIAL_ADMIN_PASSWORD=CiAdmin123!}"
+		content="${content//OPENSEARCH_PASSWORD=/OPENSEARCH_PASSWORD=CiDjango123!}"
+		content="${content//POSTGRES_PASSWORD=your-specific-password/POSTGRES_PASSWORD=ci-postgres-pass}"
+		content="${content//SVI_SERVER_API_KEY=/SVI_SERVER_API_KEY=ci-svi-api-key-01234567890123456789abcde}" # 40 chars
+	else
+		# local/production: generate random secure secrets
+		local django_secret_key django_admin_url flower_pass postgres_pass opensearch_admin_pass opensearch_user_pass svi_api_key
+		django_secret_key=$(generate_django_secret_key)
+		django_admin_url="$(generate_secret 16)/"
+		flower_pass=$(generate_secret 32)
+		postgres_pass=$(generate_secret 32)
+		opensearch_admin_pass=$(generate_secret 32)
+		opensearch_user_pass=$(generate_secret 32)
+		svi_api_key=$(generate_secret 40)
+
+		content="${content//:your-specific-password@/:${postgres_pass}@}"
+		content="${content//AWS_SECRET_ACCESS_KEY=<SAME AS PRIMARY>/AWS_SECRET_ACCESS_KEY=${PRIMARY_SECRET_ACCESS_KEY}}"
+		content="${content//CELERY_FLOWER_PASSWORD=/CELERY_FLOWER_PASSWORD=${flower_pass}}"
+		content="${content//DJANGO_ADMIN_URL=/DJANGO_ADMIN_URL=${django_admin_url}}"
+		content="${content//DJANGO_SECRET_KEY=/DJANGO_SECRET_KEY=${django_secret_key}}"
+		content="${content//OPENSEARCH_INITIAL_ADMIN_PASSWORD=/OPENSEARCH_INITIAL_ADMIN_PASSWORD=${opensearch_admin_pass}}"
+		content="${content//OPENSEARCH_PASSWORD=/OPENSEARCH_PASSWORD=${opensearch_user_pass}}"
+		content="${content//POSTGRES_PASSWORD=your-specific-password/POSTGRES_PASSWORD=${postgres_pass}}"
+		content="${content//SVI_SERVER_API_KEY=/SVI_SERVER_API_KEY=${svi_api_key}}"
+	fi
+
+	# set WEB_CONCURRENCY based on CPU cores (applies to all environments)
+	content="${content//WEB_CONCURRENCY=4/WEB_CONCURRENCY=${web_concurrency}}"
+
+	if [[ "${filename}" == "storage.env" ]]; then
+		# PRIMARY vars
+		content="${content//PRIMARY_ACCESS_KEY_ID=admin/PRIMARY_ACCESS_KEY_ID=${PRIMARY_ACCESS_KEY_ID}}"
+		content="${content//PRIMARY_S3_ENDPOINT_URL=http:\/\/sds-gateway-local-rustfs:9000/PRIMARY_S3_ENDPOINT_URL=${PRIMARY_S3_ENDPOINT_URL}}"
+		content="${content//PRIMARY_SECRET_ACCESS_KEY=admin/PRIMARY_SECRET_ACCESS_KEY=${PRIMARY_SECRET_ACCESS_KEY}}"
+		content="${content//PRIMARY_ENDPOINT_URL=sds-gateway-local-rustfs:9000/PRIMARY_ENDPOINT_URL=${PRIMARY_ENDPOINT_URL}}"
+
+		# SECONDARY vars (local only — SeaweedFS)
+		if [[ -n "${SECONDARY_ENDPOINT_URL:-}" ]]; then
+			content="${content//SECONDARY_ACCESS_KEY_ID=admin/SECONDARY_ACCESS_KEY_ID=${SECONDARY_ACCESS_KEY_ID}}"
+			content="${content//SECONDARY_S3_ENDPOINT_URL=http:\/\/sds-gateway-local-sfs-s3:8333/SECONDARY_S3_ENDPOINT_URL=${SECONDARY_S3_ENDPOINT_URL}}"
+			content="${content//SECONDARY_SECRET_ACCESS_KEY=admin/SECONDARY_SECRET_ACCESS_KEY=${SECONDARY_SECRET_ACCESS_KEY}}"
+			content="${content//SECONDARY_ENDPOINT_URL=sds-gateway-local-sfs-s3:8333/SECONDARY_ENDPOINT_URL=${SECONDARY_ENDPOINT_URL}}"
+		fi
+
+		# deprecated:
+		# content="${content//AWS_ACCESS_KEY_ID=admin/AWS_ACCESS_KEY_ID=${PRIMARY_ACCESS_KEY_ID}}"
+		# content="${content//AWS_SECRET_ACCESS_KEY=admin/AWS_SECRET_ACCESS_KEY=${PRIMARY_SECRET_ACCESS_KEY}}"
+	fi
+
+	if [[ "${filename}" == "storage.prod.env" ]]; then
+		# PRIMARY (SeaweedFS) vars
+		content="${content//PRIMARY_ACCESS_KEY_ID=admin/PRIMARY_ACCESS_KEY_ID=${PRIMARY_ACCESS_KEY_ID}}"
+		content="${content//PRIMARY_S3_ENDPOINT_URL=http:\/\/sds-gateway-prod-sfs-s3:8333/PRIMARY_S3_ENDPOINT_URL=${PRIMARY_S3_ENDPOINT_URL}}"
+		content="${content//PRIMARY_SECRET_ACCESS_KEY=admin/PRIMARY_SECRET_ACCESS_KEY=${PRIMARY_SECRET_ACCESS_KEY}}"
+		content="${content//PRIMARY_ENDPOINT_URL=sds-gateway-prod-sfs-s3:8333/PRIMARY_ENDPOINT_URL=${PRIMARY_ENDPOINT_URL}}"
+		# SECONDARY (RustFS) vars
+		content="${content//SECONDARY_ACCESS_KEY_ID=minioadmin/SECONDARY_ACCESS_KEY_ID=${SECONDARY_ACCESS_KEY_ID}}"
+		content="${content//SECONDARY_ROOT_USER=minioadmin/SECONDARY_ROOT_USER=${SECONDARY_ROOT_USER}}"
+		if [[ -n "${SECONDARY_ROOT_PASSWORD}" ]]; then
+			content="${content//SECONDARY_ROOT_PASSWORD=<GENERATED SECONDARY ROOT PASSWORD>/SECONDARY_ROOT_PASSWORD=${SECONDARY_ROOT_PASSWORD}}"
+			content="${content//SECONDARY_SECRET_ACCESS_KEY=<SAME AS SECONDARY_ROOT_PASSWORD>/SECONDARY_SECRET_ACCESS_KEY=${SECONDARY_SECRET_ACCESS_KEY}}"
+		fi
+
+		# deprecated / unused env vars safe to rename in your .env files:
+
+		# AWS_ACCESS_KEY_ID 		-> PRIMARY_ACCESS_KEY_ID and SECONDARY_ACCESS_KEY_ID
+		# AWS_SECRET_ACCESS_KEY 	-> PRIMARY_SECRET_ACCESS_KEY and SECONDARY_SECRET_ACCESS_KEY
+		# MINIO_ROOT_PASSWORD 		-> removed: MinIO is not used anymore
+		# MINIO_SECRET_ACCESS_KEY 	-> removed: MinIO is not used anymore
+		# RUSTFS_ACCESS_KEY_ID 		-> PRIMARY_ACCESS_KEY_ID or SECONDARY_ACCESS_KEY_ID depending on your setup
+		# RUSTFS_ROOT_PASSWORD 		-> PRIMARY_SECRET_ACCESS_KEY or SECONDARY_ROOT_PASSWORD depending on your setup
+		# RUSTFS_ROOT_USER 			-> PRIMARY_ROOT_USER or SECONDARY_ROOT_USER depending on your setup
+		# RUSTFS_SECRET_ACCESS_KEY 	-> PRIMARY_SECRET_ACCESS_KEY or SECONDARY_SECRET_ACCESS_KEY depending on your setup
+
+		# content="${content//AWS_ACCESS_KEY_ID=admin/AWS_ACCESS_KEY_ID=${PRIMARY_ACCESS_KEY_ID}}"
+		# content="${content//AWS_SECRET_ACCESS_KEY=admin/AWS_SECRET_ACCESS_KEY=${PRIMARY_SECRET_ACCESS_KEY}}"
+		# content="${content//MINIO_ROOT_PASSWORD=<GENERATED SECONDARY ROOT PASSWORD>/MINIO_ROOT_PASSWORD=${SECONDARY_ROOT_PASSWORD}}"
+		# content="${content//MINIO_SECRET_ACCESS_KEY=<SAME AS SECONDARY_ROOT_PASSWORD>/MINIO_SECRET_ACCESS_KEY=${SECONDARY_SECRET_ACCESS_KEY}}"
+		# content="${content//RUSTFS_ACCESS_KEY_ID=minioadmin/RUSTFS_ACCESS_KEY_ID=${SECONDARY_ACCESS_KEY_ID}}"
+		# content="${content//RUSTFS_ROOT_PASSWORD=<GENERATED SECONDARY ROOT PASSWORD>/RUSTFS_ROOT_PASSWORD=${SECONDARY_ROOT_PASSWORD}}"
+		# content="${content//RUSTFS_ROOT_USER=minioadmin/RUSTFS_ROOT_USER=${SECONDARY_ROOT_USER}}"
+		# content="${content//RUSTFS_SECRET_ACCESS_KEY=<SAME AS SECONDARY_ROOT_PASSWORD>/RUSTFS_SECRET_ACCESS_KEY=${SECONDARY_SECRET_ACCESS_KEY}}"
+	fi
+
+	# write to output
+	mkdir -p "$(dirname "${output}")"
+	echo "${content}" >"${output}"
+	chmod 600 "${output}"
+}
+
+function set_permissions() {
+	declare -a env_dirs
+	env_dirs=(
+		"${GATEWAY_ROOT}/.envs"
+		"${SFS_ROOT}/.envs"
+	)
+	for dir in "${env_dirs[@]}"; do
+		if [ -d "${dir}" ]; then
+			find "${dir}" -type f -name "*.env" -exec chmod --changes 600 {} \;
+		fi
+	done
+}
+
+function main() {
+	local force="false"
+	local env_type=""
+
+	# parse arguments
+	while [[ $# -gt 0 ]]; do
+		case "$1" in
+		-f | --force)
+			force="true"
+			shift
+			;;
+		-h | --help)
+			usage
+			;;
+		local | production | ci)
+			env_type="$1"
+			shift
+			;;
+		*)
+			echo "ERROR: Unknown argument: $1" >&2
+			usage
+			;;
+		esac
+	done
+
+	if [[ -z "${env_type}" ]]; then
+		echo "ERROR: Environment type required (local, production, or ci)" >&2
+		usage
+	fi
+
+	echo "🔐 Generating secrets for '${env_type}' environment..."
+
+	local target_dir_gwy="${GATEWAY_ROOT}/.envs/${env_type}"
+
+	# process each env file from examples
+	for template in "${EXAMPLE_DIR}"/*.env; do
+		local filename
+		filename=$(basename "${template}")
+
+		# skip production-specific example files for non-production envs
+		if [[ "${filename}" == *.prod-example.env ]]; then
+			if [[ "${env_type}" == "production" ]]; then
+				# use prod-example for production django.env
+				if [[ "${filename}" == "django.prod-example.env" ]]; then
+					process_env_file "${template}" "${target_dir_gwy}/django.env" "${env_type}" "${force}"
+				fi
+			fi
+			continue
+		fi
+
+		# skip regular django.env for production (we use prod-example instead)
+		if [[ "${env_type}" == "production" && "${filename}" == "django.env" ]]; then
+			continue
+		fi
+
+		# skip storage.prod.env for local/CI
+		if [[ "${env_type}" != "production" && "${filename}" == "storage.prod.env" ]]; then
+			continue
+		fi
+
+		local output="${target_dir_gwy}/${filename}"
+		process_env_file "${template}" "${output}" "${env_type}" "${force}"
+	done
+
+	set_permissions
+
+	echo ""
+	echo "✅ Secrets generated successfully in ${target_dir_gwy}/"
+	echo ""
+	echo "Next steps:"
+	if [[ "${env_type}" == "ci" ]]; then
+		echo "  - Review generated secrets (safe for ephemeral CI usage)"
+	else
+		echo "  - Review and customize ${target_dir_gwy}/*.env as needed"
+		echo "  - Set additional optional vars (AUTH0, SENTRY, etc.)"
+	fi
+	echo "  - Use 'just env' to check the environment setup"
+	echo "  - Use 'just up' to start the stack"
 }
 
 main "$@"
diff --git a/gateway/sds_gateway/api_methods/management/commands/create_storage_buckets.py b/gateway/sds_gateway/api_methods/management/commands/create_storage_buckets.py
new file mode 100644
index 000000000..20b2a358b
--- /dev/null
+++ b/gateway/sds_gateway/api_methods/management/commands/create_storage_buckets.py
@@ -0,0 +1,58 @@
+"""Management command to create/ensure buckets exist on configured object stores."""
+
+from django.conf import settings
+from django.core.management.base import BaseCommand
+from loguru import logger as log
+
+from sds_gateway.api_methods.utils.minio_client import _build_minio_client
+
+
+class Command(BaseCommand):
+    """Create or ensure buckets exist on primary and optional secondary stores."""
+
+    help = "Create/ensure buckets exist on configured object stores"
+
+    def handle(self, *args, **options) -> None:
+        """Execute the command."""
+        # Primary store (required)
+        primary_client = _build_minio_client(
+            endpoint=settings.PRIMARY_ENDPOINT_URL,
+            access_key=settings.PRIMARY_ACCESS_KEY_ID,
+            secret_key=settings.PRIMARY_SECRET_ACCESS_KEY,
+            secure=settings.PRIMARY_STORAGE_USE_HTTPS,
+        )
+        self._ensure_bucket(primary_client, settings.PRIMARY_STORAGE_BUCKET_NAME)
+
+        # Secondary store (optional — may be unreachable)
+        # Skip entirely if access key is still the LEGACY fallback default;
+        # that means no secondary was ever configured for this environment.
+        if settings.SECONDARY_ACCESS_KEY_ID == settings.LEGACY_AWS_ACCESS_KEY_ID:
+            log.info(
+                "Secondary object store not configured (LEGACY fallback creds), "
+                "skipping"
+            )
+        else:
+            try:
+                secondary_client = _build_minio_client(
+                    endpoint=settings.SECONDARY_ENDPOINT_URL,
+                    access_key=settings.SECONDARY_ACCESS_KEY_ID,
+                    secret_key=settings.SECONDARY_SECRET_ACCESS_KEY,
+                    secure=settings.SECONDARY_STORAGE_USE_HTTPS,
+                )
+                self._ensure_bucket(
+                    secondary_client, settings.SECONDARY_STORAGE_BUCKET_NAME
+                )
+            except Exception as exc:  # noqa: BLE001
+                log.warning(
+                    "Secondary object store unreachable or bucket creation failed: {}",
+                    exc,
+                )
+
+    def _ensure_bucket(self, client, bucket_name: str) -> None:
+        """Check if a bucket exists; create it if it does not."""
+        if client.bucket_exists(bucket_name):
+            log.info("Bucket '{}' already exists", bucket_name)
+            return
+
+        client.make_bucket(bucket_name)
+        log.success("Created bucket '{}'", bucket_name)
diff --git a/gateway/sds_gateway/api_methods/tests/test_object_store_migration.py b/gateway/sds_gateway/api_methods/tests/test_object_store_migration.py
new file mode 100644
index 000000000..189e37c30
--- /dev/null
+++ b/gateway/sds_gateway/api_methods/tests/test_object_store_migration.py
@@ -0,0 +1,361 @@
+"""Tests for object-store migration adapter and dual Django storage backend."""
+
+# ruff: noqa: SLF001
+# pyright: reportPrivateUsage=false
+
+import logging
+from unittest.mock import MagicMock
+
+import pytest
+from django.core.files.base import ContentFile
+
+from sds_gateway.api_methods.utils.dual_object_store_storage import (
+    DualObjectStoreS3Storage,
+)
+from sds_gateway.api_methods.utils.minio_client import ObjectStoreFacade
+
+EXPECTED_SIZE = 42
+
+
+class MissingObjectError(Exception):
+    """Test-only exception to simulate missing-object failures."""
+
+    code = "NoSuchKey"
+
+
+def _configure_bucket_settings(settings) -> None:
+    settings.PRIMARY_STORAGE_BUCKET_NAME = "sfs-bucket"
+    settings.SECONDARY_STORAGE_BUCKET_NAME = "secondary-bucket"
+
+
+def _build_storage_with_mocks(
+    *,
+    monkeypatch: pytest.MonkeyPatch,
+    settings,
+    primary_storage: MagicMock,
+    secondary_storage: MagicMock,
+    read_fallback_enabled: bool,
+    write_both_enabled: bool,
+    dual_write_strict: bool,
+) -> DualObjectStoreS3Storage:
+    settings.OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED = read_fallback_enabled
+    settings.OBJECT_STORE_WRITE_BOTH_ENABLED = write_both_enabled
+    settings.OBJECT_STORE_DUAL_WRITE_STRICT = dual_write_strict
+
+    backends = [primary_storage, secondary_storage]
+
+    def _create_backend(_self, *, store_prefix: str):
+        _ = store_prefix
+        return backends.pop(0)
+
+    monkeypatch.setattr(DualObjectStoreS3Storage, "_create_backend", _create_backend)
+    return DualObjectStoreS3Storage()
+
+
+def test_adapter_read_falls_back_on_missing(settings) -> None:
+    _configure_bucket_settings(settings)
+
+    primary_client = MagicMock()
+    secondary_client = MagicMock()
+
+    expected_response = object()
+    primary_client.get_object.side_effect = MissingObjectError("missing")
+    secondary_client.get_object.return_value = expected_response
+
+    facade = ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=True,
+        write_both_enabled=False,
+        dual_write_strict=False,
+    )
+
+    result = facade.get_object(bucket_name="bucket", object_name="path/to/object")
+
+    assert result is expected_response
+    secondary_client.get_object.assert_called_once_with(
+        bucket_name="secondary-bucket",
+        object_name="path/to/object",
+    )
+
+
+def test_adapter_does_not_fallback_on_non_missing_errors(settings) -> None:
+    """Only missing-object errors should trigger fallback when enabled, other errors
+    should raise immediately."""
+    _configure_bucket_settings(settings)
+
+    primary_client = MagicMock()
+    secondary_client = MagicMock()
+
+    primary_client.get_object.side_effect = RuntimeError("boom")
+
+    facade = ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=True,
+        write_both_enabled=False,
+        dual_write_strict=False,
+    )
+
+    with pytest.raises(RuntimeError, match="boom"):
+        facade.get_object(bucket_name="bucket", object_name="path/to/object")
+
+    secondary_client.get_object.assert_not_called()
+
+
+def test_adapter_dual_write_non_strict_allows_secondary_failure(settings) -> None:
+    """In non-strict dual-write mode, secondary write failures should not raise and
+    should be logged."""
+    _configure_bucket_settings(settings)
+
+    primary_client = MagicMock()
+    secondary_client = MagicMock()
+
+    primary_client.put_object.return_value = "primary-result"
+    secondary_client.put_object.side_effect = RuntimeError("secondary write failed")
+
+    facade = ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=False,
+        write_both_enabled=True,
+        dual_write_strict=False,
+    )
+
+    result = facade.put_object(bucket_name="bucket", object_name="path/to/object")
+
+    assert result == "primary-result"
+
+
+def test_adapter_dual_write_strict_raises_on_secondary_failure(settings) -> None:
+    _configure_bucket_settings(settings)
+
+    primary_client = MagicMock()
+    secondary_client = MagicMock()
+
+    primary_client.put_object.return_value = "primary-result"
+    secondary_client.put_object.side_effect = RuntimeError("secondary write failed")
+
+    facade = ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=False,
+        write_both_enabled=True,
+        dual_write_strict=True,
+    )
+
+    with pytest.raises(RuntimeError, match="secondary write failed"):
+        facade.put_object(bucket_name="bucket", object_name="path/to/object")
+
+
+def test_adapter_maps_bucket_name_kwargs_per_store(settings) -> None:
+    _configure_bucket_settings(settings)
+
+    primary_client = MagicMock()
+    secondary_client = MagicMock()
+
+    primary_client.put_object.return_value = "primary-result"
+
+    facade = ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=False,
+        write_both_enabled=True,
+        dual_write_strict=False,
+    )
+
+    facade.put_object(bucket_name="caller-bucket", object_name="path/to/object")
+
+    primary_client.put_object.assert_called_once_with(
+        bucket_name="sfs-bucket",
+        object_name="path/to/object",
+    )
+    secondary_client.put_object.assert_called_once_with(
+        bucket_name="secondary-bucket",
+        object_name="path/to/object",
+    )
+
+
+def test_adapter_maps_bucket_name_positionally_per_store(settings) -> None:
+    _configure_bucket_settings(settings)
+
+    primary_client = MagicMock()
+    secondary_client = MagicMock()
+
+    facade = ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=False,
+        write_both_enabled=True,
+        dual_write_strict=False,
+    )
+
+    facade.remove_object("caller-bucket", "path/to/object")
+
+    primary_client.remove_object.assert_called_once_with(
+        "sfs-bucket",
+        "path/to/object",
+    )
+    secondary_client.remove_object.assert_called_once_with(
+        "secondary-bucket",
+        "path/to/object",
+    )
+
+
+def test_adapter_remove_object_is_strict_when_fallback_is_enabled(settings) -> None:
+    _configure_bucket_settings(settings)
+
+    primary_client = MagicMock()
+    secondary_client = MagicMock()
+
+    secondary_client.remove_object.side_effect = RuntimeError("secondary delete failed")
+
+    facade = ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=True,
+        write_both_enabled=False,
+        dual_write_strict=False,
+    )
+
+    with pytest.raises(RuntimeError, match="secondary delete failed"):
+        facade.remove_object(bucket_name="bucket", object_name="path/to/object")
+
+
+def test_adapter_fallback_logging_redacts_object_key(
+    caplog: pytest.LogCaptureFixture,
+    settings,
+) -> None:
+    _configure_bucket_settings(settings)
+
+    primary_client = MagicMock()
+    secondary_client = MagicMock()
+
+    full_key = "customers/acme-corp/private/export-2026-04-14.csv"
+    primary_client.get_object.side_effect = MissingObjectError("missing")
+    secondary_client.get_object.return_value = object()
+
+    facade = ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=True,
+        write_both_enabled=False,
+        dual_write_strict=False,
+    )
+
+    with caplog.at_level(
+        logging.WARNING,
+        logger="sds_gateway.api_methods.utils.minio_client",
+    ):
+        facade.get_object(bucket_name="bucket", object_name=full_key)
+
+    logged_messages = " ".join(record.getMessage() for record in caplog.records)
+    assert full_key not in logged_messages
+    assert "sha256=" in logged_messages
+    assert "len=" in logged_messages
+
+
+def test_storage_open_falls_back_on_missing(
+    monkeypatch: pytest.MonkeyPatch,
+    settings,
+) -> None:
+    primary_storage = MagicMock()
+    secondary_storage = MagicMock()
+
+    expected_file = MagicMock()
+    primary_storage._open.side_effect = MissingObjectError("missing")
+    secondary_storage._open.return_value = expected_file
+
+    storage = _build_storage_with_mocks(
+        monkeypatch=monkeypatch,
+        settings=settings,
+        primary_storage=primary_storage,
+        secondary_storage=secondary_storage,
+        read_fallback_enabled=True,
+        write_both_enabled=False,
+        dual_write_strict=False,
+    )
+
+    result = storage._open("path/to/object", mode="rb")
+
+    assert result is expected_file
+    secondary_storage._open.assert_called_once_with("path/to/object", mode="rb")
+
+
+def test_storage_save_dual_write_non_strict(
+    monkeypatch: pytest.MonkeyPatch,
+    settings,
+) -> None:
+    primary_storage = MagicMock()
+    secondary_storage = MagicMock()
+
+    primary_storage._save.return_value = "saved/name.bin"
+    secondary_storage._save.side_effect = RuntimeError("secondary save failed")
+
+    storage = _build_storage_with_mocks(
+        monkeypatch=monkeypatch,
+        settings=settings,
+        primary_storage=primary_storage,
+        secondary_storage=secondary_storage,
+        read_fallback_enabled=False,
+        write_both_enabled=True,
+        dual_write_strict=False,
+    )
+
+    content = ContentFile(b"payload", name="name.bin")
+    saved_name = storage._save("name.bin", content)
+
+    assert saved_name == "saved/name.bin"
+    secondary_storage._save.assert_called_once()
+
+
+def test_storage_delete_is_strict_when_fallback_is_enabled(
+    monkeypatch: pytest.MonkeyPatch,
+    settings,
+) -> None:
+    primary_storage = MagicMock()
+    secondary_storage = MagicMock()
+
+    secondary_storage.delete.side_effect = RuntimeError("secondary delete failed")
+
+    storage = _build_storage_with_mocks(
+        monkeypatch=monkeypatch,
+        settings=settings,
+        primary_storage=primary_storage,
+        secondary_storage=secondary_storage,
+        read_fallback_enabled=True,
+        write_both_enabled=False,
+        dual_write_strict=False,
+    )
+
+    with pytest.raises(RuntimeError, match="secondary delete failed"):
+        storage.delete("path/to/object")
+
+
+def test_storage_size_delegates_to_primary(
+    monkeypatch: pytest.MonkeyPatch,
+    settings,
+) -> None:
+    """DualObjectStoreS3Storage.size() must be implemented so Django's
+    FileField run_validation can read file size without raising
+    NotImplementedError."""
+    primary_storage = MagicMock()
+    secondary_storage = MagicMock()
+
+    primary_storage.size.return_value = EXPECTED_SIZE
+
+    storage = _build_storage_with_mocks(
+        monkeypatch=monkeypatch,
+        settings=settings,
+        primary_storage=primary_storage,
+        secondary_storage=secondary_storage,
+        read_fallback_enabled=False,
+        write_both_enabled=False,
+        dual_write_strict=False,
+    )
+
+    result = storage.size("path/to/object")
+
+    assert result == EXPECTED_SIZE
+    primary_storage.size.assert_called_once_with("path/to/object")
+    secondary_storage.size.assert_not_called()
diff --git a/gateway/sds_gateway/api_methods/utils/dual_object_store_storage.py b/gateway/sds_gateway/api_methods/utils/dual_object_store_storage.py
new file mode 100644
index 000000000..f58aba53e
--- /dev/null
+++ b/gateway/sds_gateway/api_methods/utils/dual_object_store_storage.py
@@ -0,0 +1,172 @@
+"""Dual-store Django storage backend for primary + secondary.
+
+Primary and secondary backends might be any S3-compatible object store, usually among:
+- Primary:      RustFS (local/CI), SeaweedFS (production), or MinIO (deprecated)
+- Secondary:    RustFS, Garage, or MinIO (deprecated)
+
+Sec is optional, unless any of these are True:
+    - OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED
+    - OBJECT_STORE_WRITE_BOTH_ENABLED
+    - OBJECT_STORE_DUAL_WRITE_STRICT
+"""
+
+import hashlib
+import logging
+from typing import Any
+
+from django.conf import settings
+from django.core.files.base import ContentFile
+from django.core.files.base import File
+from django.core.files.storage import Storage
+from storages.backends.s3boto3 import S3Boto3Storage
+
+log = logging.getLogger(__name__)
+
+_MISSING_OBJECT_ERROR_CODES = {
+    "404",
+    "NoSuchBucket",
+    "NoSuchKey",
+    "NoSuchObject",
+    "NoSuchVersion",
+    "NotFound",
+}
+
+
+def _is_missing_object_error(error: Exception) -> bool:
+    """Return True when error represents missing object/bucket condition."""
+    error_code = str(getattr(error, "code", ""))
+    if error_code in _MISSING_OBJECT_ERROR_CODES:
+        return True
+
+    response = getattr(error, "response", None)
+    if isinstance(response, dict):
+        response_error = response.get("Error", {})
+        code = str(response_error.get("Code", ""))
+        if code in _MISSING_OBJECT_ERROR_CODES:
+            return True
+
+    status_code = str(getattr(error, "status", ""))
+    return status_code == "404"
+
+
+def _build_storage_options(store_prefix: str) -> dict[str, Any]:
+    """Build S3Boto3Storage options for a configured object store prefix."""
+    return {
+        "access_key": getattr(settings, f"{store_prefix}_ACCESS_KEY_ID"),
+        "secret_key": getattr(settings, f"{store_prefix}_SECRET_ACCESS_KEY"),
+        "bucket_name": getattr(settings, f"{store_prefix}_STORAGE_BUCKET_NAME"),
+        "endpoint_url": getattr(settings, f"{store_prefix}_S3_ENDPOINT_URL"),
+        "region_name": settings.AWS_S3_REGION_NAME,
+        "signature_version": settings.AWS_S3_SIGNATURE_VERSION,
+        "default_acl": settings.AWS_DEFAULT_ACL,
+        "file_overwrite": settings.AWS_S3_FILE_OVERWRITE,
+    }
+
+
+def _safe_object_reference(name: str) -> str:
+    """Return a non-reversible identifier suitable for operational logs."""
+    object_name_digest = hashlib.sha256(name.encode()).hexdigest()[:12]
+    return f"sha256={object_name_digest} len={len(name)}"
+
+
+class DualObjectStoreS3Storage(Storage):
+    """Django storage backend with primary and fallback."""
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__()
+        self._primary_storage = self._create_backend(store_prefix="PRIMARY")
+        self._secondary_storage = self._create_backend(store_prefix="SECONDARY")
+
+    def _create_backend(self, *, store_prefix: str) -> S3Boto3Storage:
+        """Create storage backend for a given settings prefix."""
+        return S3Boto3Storage(**_build_storage_options(store_prefix=store_prefix))
+
+    def _clone_content(self, content: File[Any]) -> ContentFile[Any]:
+        """Clone content for secondary writes while preserving the primary stream."""
+        if hasattr(content, "seek"):
+            content.seek(0)
+        payload = content.read()
+        if isinstance(payload, str):
+            payload = payload.encode()
+        if hasattr(content, "seek"):
+            content.seek(0)
+
+        return ContentFile(payload, name=getattr(content, "name", None))
+
+    def _open(self, name: str, mode: str = "rb") -> File[Any]:
+        try:
+            return self._primary_storage._open(name, mode=mode)  # pyright: ignore[reportPrivateUsage] # noqa: SLF001
+        except Exception as error:
+            if not settings.OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED:
+                raise
+            if not _is_missing_object_error(error):
+                raise
+
+            log.warning(
+                "Object %s not in primary storage, falling back to secondary",
+                _safe_object_reference(name),
+            )
+            return self._secondary_storage._open(name, mode=mode)  # pyright: ignore[reportPrivateUsage] # noqa: SLF001
+
+    def _save(self, name: str, content: File[Any]) -> str:
+        if not settings.OBJECT_STORE_WRITE_BOTH_ENABLED:
+            return self._primary_storage._save(name, content)  # pyright: ignore[reportPrivateUsage] # noqa: SLF001
+
+        secondary_content = self._clone_content(content)
+        saved_name = self._primary_storage._save(name, content)  # pyright: ignore[reportPrivateUsage] # noqa: SLF001
+
+        try:
+            self._secondary_storage._save(saved_name, secondary_content)  # pyright: ignore[reportPrivateUsage] # noqa: SLF001
+        except Exception:
+            if settings.OBJECT_STORE_DUAL_WRITE_STRICT:
+                raise
+
+            log.exception(
+                "Secondary storage write failed in non-strict dual-write mode"
+            )
+
+        return saved_name
+
+    def exists(self, name: str) -> bool:
+        if self._primary_storage.exists(name):
+            return True
+
+        if settings.OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED:
+            return self._secondary_storage.exists(name)
+
+        return False
+
+    def delete(self, name: str) -> None:
+        self._primary_storage.delete(name)
+        if not (
+            settings.OBJECT_STORE_WRITE_BOTH_ENABLED
+            or settings.OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED
+        ):
+            return
+
+        try:
+            self._secondary_storage.delete(name)
+        except Exception:
+            if (
+                settings.OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED
+                or settings.OBJECT_STORE_DUAL_WRITE_STRICT
+            ):
+                raise
+
+            log.exception(
+                "Secondary storage delete failed in non-strict dual-write mode"
+            )
+
+    def size(self, name: str) -> int:
+        """Return the size of the file in the primary storage."""
+        return self._primary_storage.size(name)
+
+    def path(self, name: str) -> str:
+        """Return the absolute path of the file in the primary storage."""
+        return self._primary_storage.path(name)  # pyright: ignore[reportUnknownMemberType]
+
+    def url(self, name: str) -> str:
+        return self._primary_storage.url(name)
+
+    def __getattr__(self, name: str) -> Any:
+        return getattr(self._primary_storage, name)
diff --git a/gateway/sds_gateway/api_methods/utils/minio_client.py b/gateway/sds_gateway/api_methods/utils/minio_client.py
index 3c1606926..74c0a416d 100644
--- a/gateway/sds_gateway/api_methods/utils/minio_client.py
+++ b/gateway/sds_gateway/api_methods/utils/minio_client.py
@@ -1,12 +1,291 @@
+"""Object storage client facade for SeaweedFS + MinIO migration."""
+
+import hashlib
+import logging
+from typing import Any
+from urllib.parse import urlparse
+
 from django.conf import settings
 from minio import Minio
 
+log = logging.getLogger(__name__)
+
+_MISSING_OBJECT_ERROR_CODES = {
+    "404",
+    "NoSuchBucket",
+    "NoSuchKey",
+    "NoSuchObject",
+    "NoSuchVersion",
+    "NotFound",
+}
+
+_BUCKET_NAME_POSITION = 0
+_OBJECT_NAME_POSITION = 1
+_BUCKET_AND_OBJECT_ARGUMENT_COUNT = 2
+
+
+def _is_missing_object_error(error: Exception) -> bool:
+    """Return True when error represents a missing object/bucket condition."""
+    error_code = str(getattr(error, "code", ""))
+    if error_code in _MISSING_OBJECT_ERROR_CODES:
+        return True
+
+    status_code = str(getattr(error, "status", ""))
+    return status_code == "404"
+
+
+def _normalize_endpoint(endpoint: str) -> str:
+    """Convert endpoint URL to host:port format accepted by MinIO client."""
+    parsed_endpoint = urlparse(endpoint)
+    if parsed_endpoint.netloc:
+        return parsed_endpoint.netloc
+    return endpoint
+
+
+def _safe_object_reference(object_name: Any) -> str:
+    """Return a non-reversible identifier suitable for operational logs."""
+    object_name_text = str(object_name)
+    object_name_digest = hashlib.sha256(object_name_text.encode()).hexdigest()[:12]
+    return f"sha256={object_name_digest} len={len(object_name_text)}"
+
 
-def get_minio_client() -> Minio:
-    # Initialize MinIO client
+def _build_minio_client(
+    *,
+    endpoint: str,
+    access_key: str,
+    secret_key: str,
+    secure: bool,
+) -> Minio:
+    """Build a MinIO API-compatible client."""
     return Minio(
-        settings.MINIO_ENDPOINT_URL,
-        access_key=settings.AWS_ACCESS_KEY_ID,
-        secret_key=settings.AWS_SECRET_ACCESS_KEY,
-        secure=settings.MINIO_STORAGE_USE_HTTPS,
+        _normalize_endpoint(endpoint),
+        access_key=access_key,
+        secret_key=secret_key,
+        secure=secure,
+    )
+
+
+class ObjectStoreFacade:
+    """Facade exposing MinIO-compatible methods with primary/fallback behavior.
+
+    It encapsulates two storage clients (primary and secondary) and provides
+    methods that implement the desired read/write behavior based on
+    configuration flags. The facade also handles argument rewriting to target
+    the correct buckets for each store and provides safe object references
+    for logging.
+    """
+
+    def __init__(
+        self,
+        *,
+        primary_client: Minio,
+        secondary_client: Minio,
+        read_fallback_to_secondary_enabled: bool,
+        write_both_enabled: bool,
+        dual_write_strict: bool,
+    ) -> None:
+        """Initialize the ObjectStoreFacade with given clients and behavior flags.
+
+        Args:
+            primary_client:     MinIO client for the primary object store (SeaweedFS).
+            secondary_client:   MinIO client for the secondary object store (secondary).
+            read_fallback_to_secondary_enabled: Whether to fallback to secondary on
+                read errors.
+            write_both_enabled: Whether to perform writes on both stores.
+            dual_write_strict:  Requires both writes to succeed, raises otherwise.
+        """
+        self._primary_client = primary_client
+        self._secondary_client = secondary_client
+        self._read_fallback_to_secondary_enabled = read_fallback_to_secondary_enabled
+        self._write_both_enabled = write_both_enabled
+        self._dual_write_strict = dual_write_strict
+
+    def _rewrite_bucket_name(
+        self,
+        bucket_name: str,
+        *args: Any,
+        **kwargs: Any,
+    ) -> tuple[tuple[Any, ...], dict[str, Any]]:
+        """Return arguments rewritten for the target store bucket."""
+        rewritten_args = list(args)
+        rewritten_kwargs = dict(kwargs)
+
+        if "bucket_name" in rewritten_kwargs or not rewritten_args:
+            rewritten_kwargs["bucket_name"] = bucket_name
+        else:
+            rewritten_args[0] = bucket_name
+
+        return tuple(rewritten_args), rewritten_kwargs
+
+    def _primary_call_arguments(
+        self,
+        *args: Any,
+        **kwargs: Any,
+    ) -> tuple[tuple[Any, ...], dict[str, Any]]:
+        """Build call arguments targeting the primary object-store bucket."""
+        kwargs.pop("bucket_name", None)
+        return self._rewrite_bucket_name(
+            settings.PRIMARY_STORAGE_BUCKET_NAME,
+            *args,
+            **kwargs,
+        )
+
+    def _secondary_call_arguments(
+        self,
+        *args: Any,
+        **kwargs: Any,
+    ) -> tuple[tuple[Any, ...], dict[str, Any]]:
+        """Build call arguments targeting the secondary object-store bucket."""
+        kwargs.pop("bucket_name", None)
+        return self._rewrite_bucket_name(
+            settings.SECONDARY_STORAGE_BUCKET_NAME,
+            *args,
+            **kwargs,
+        )
+
+    def _object_reference(self, *args: Any, **kwargs: Any) -> str:
+        """Return a safe object identifier for logs."""
+        object_name = kwargs.get("object_name")
+        if object_name is None:
+            if len(args) >= _BUCKET_AND_OBJECT_ARGUMENT_COUNT:
+                object_name = args[_OBJECT_NAME_POSITION]
+            elif args and "bucket_name" not in kwargs:
+                object_name = args[_BUCKET_NAME_POSITION]
+            else:
+                object_name = "unknown"
+
+        return _safe_object_reference(object_name)
+
+    def _read_with_optional_fallback(
+        self,
+        method_name: str,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Any:
+        primary_method = getattr(self._primary_client, method_name)
+        primary_args, primary_kwargs = self._primary_call_arguments(*args, **kwargs)
+        try:
+            return primary_method(*primary_args, **primary_kwargs)
+        except Exception as error:
+            if not self._read_fallback_to_secondary_enabled:
+                raise
+            if not _is_missing_object_error(error):
+                raise
+
+            log.warning(
+                "Object %s not found in primary store, falling back to secondary",
+                self._object_reference(*args, **kwargs),
+            )
+            secondary_method = getattr(self._secondary_client, method_name)
+            secondary_args, secondary_kwargs = self._secondary_call_arguments(
+                *args,
+                **kwargs,
+            )
+            return secondary_method(*secondary_args, **secondary_kwargs)
+
+    def _write_with_optional_dual_write(
+        self,
+        method_name: str,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Any:
+        primary_method = getattr(self._primary_client, method_name)
+        primary_args, primary_kwargs = self._primary_call_arguments(*args, **kwargs)
+        primary_result = primary_method(*primary_args, **primary_kwargs)
+
+        if not self._write_both_enabled:
+            return primary_result
+
+        secondary_method = getattr(self._secondary_client, method_name)
+        secondary_args, secondary_kwargs = self._secondary_call_arguments(
+            *args,
+            **kwargs,
+        )
+        try:
+            secondary_method(*secondary_args, **secondary_kwargs)
+        except Exception:
+            if self._dual_write_strict:
+                raise
+
+            log.exception(
+                "Secondary object-store write failed in non-strict dual-write mode"
+            )
+
+        return primary_result
+
+    def _delete_from_both_stores(self, *args: Any, **kwargs: Any) -> Any:
+        """Delete from primary and, when needed, from secondary store too."""
+        primary_args, primary_kwargs = self._primary_call_arguments(*args, **kwargs)
+        primary_result = self._primary_client.remove_object(
+            *primary_args,
+            **primary_kwargs,
+        )
+
+        if not (self._write_both_enabled or self._read_fallback_to_secondary_enabled):
+            return primary_result
+
+        secondary_args, secondary_kwargs = self._secondary_call_arguments(
+            *args,
+            **kwargs,
+        )
+        try:
+            self._secondary_client.remove_object(*secondary_args, **secondary_kwargs)
+        except Exception:
+            if self._read_fallback_to_secondary_enabled or self._dual_write_strict:
+                raise
+
+            log.exception(
+                "Secondary object-store delete failed in non-strict dual-write mode"
+            )
+
+        return primary_result
+
+    def get_object(self, *args: Any, **kwargs: Any) -> Any:
+        """Get object stream from primary store with optional fallback."""
+        return self._read_with_optional_fallback("get_object", *args, **kwargs)
+
+    def fget_object(self, *args: Any, **kwargs: Any) -> Any:
+        """Download object to local file from primary store with optional fallback."""
+        return self._read_with_optional_fallback("fget_object", *args, **kwargs)
+
+    def put_object(self, *args: Any, **kwargs: Any) -> Any:
+        """Upload object from stream with optional dual-write behavior."""
+        return self._write_with_optional_dual_write("put_object", *args, **kwargs)
+
+    def fput_object(self, *args: Any, **kwargs: Any) -> Any:
+        """Upload object from local file with optional dual-write behavior."""
+        return self._write_with_optional_dual_write("fput_object", *args, **kwargs)
+
+    def remove_object(self, *args: Any, **kwargs: Any) -> Any:
+        """Remove object from primary store with optional dual-write behavior."""
+        return self._delete_from_both_stores(*args, **kwargs)
+
+    def __getattr__(self, name: str) -> Any:
+        """Delegate unknown methods to the primary client for compatibility."""
+        return getattr(self._primary_client, name)
+
+
+def get_minio_client() -> ObjectStoreFacade:
+    """Return migration-aware object store facade while keeping API name stable."""
+    primary_client = _build_minio_client(
+        endpoint=settings.PRIMARY_ENDPOINT_URL,
+        access_key=settings.PRIMARY_ACCESS_KEY_ID,
+        secret_key=settings.PRIMARY_SECRET_ACCESS_KEY,
+        secure=settings.PRIMARY_STORAGE_USE_HTTPS,
+    )
+    secondary_client = _build_minio_client(
+        endpoint=settings.SECONDARY_ENDPOINT_URL,
+        access_key=settings.SECONDARY_ACCESS_KEY_ID,
+        secret_key=settings.SECONDARY_SECRET_ACCESS_KEY,
+        secure=settings.SECONDARY_STORAGE_USE_HTTPS,
+    )
+
+    return ObjectStoreFacade(
+        primary_client=primary_client,
+        secondary_client=secondary_client,
+        read_fallback_to_secondary_enabled=(
+            settings.OBJECT_STORE_READ_FALLBACK_TO_SECONDARY_ENABLED
+        ),
+        write_both_enabled=settings.OBJECT_STORE_WRITE_BOTH_ENABLED,
+        dual_write_strict=settings.OBJECT_STORE_DUAL_WRITE_STRICT,
     )
diff --git a/gateway/sds_gateway/monitoring/services.py b/gateway/sds_gateway/monitoring/services.py
index 8600234c6..edce635c9 100644
--- a/gateway/sds_gateway/monitoring/services.py
+++ b/gateway/sds_gateway/monitoring/services.py
@@ -50,21 +50,25 @@ def _split_host_port(endpoint: str, *, default_port: int) -> tuple[str, int]:
 def get_default_service_definitions() -> list[ServiceDefinition]:
     services: list[ServiceDefinition] = []
 
-    sfs_endpoint = getattr(settings, "SFS_ENDPOINT_URL", None)
-    if sfs_endpoint is not None:
-        sfs_host, sfs_port = _split_host_port(sfs_endpoint, default_port=8333)
+    primary_endpoint = getattr(settings, "PRIMARY_ENDPOINT_URL", None)
+    if primary_endpoint is not None:
+        primary_host, primary_port = _split_host_port(
+            primary_endpoint, default_port=9000
+        )
         services.append(
             ServiceDefinition(
-                name="seaweedfs", kind="tcp", host=sfs_host, port=sfs_port
+                name="primary-storage", kind="tcp", host=primary_host, port=primary_port
             )
         )
 
-    minio_endpoint = getattr(settings, "MINIO_ENDPOINT_URL", None)
-    if minio_endpoint is not None:
-        minio_host, minio_port = _split_host_port(minio_endpoint, default_port=9000)
+    secondary_endpoint = getattr(settings, "SECONDARY_ENDPOINT_URL", None)
+    if secondary_endpoint is not None:
+        secondary_host, secondary_port = _split_host_port(
+            secondary_endpoint, default_port=9000
+        )
         services.append(
             ServiceDefinition(
-                name="minio", kind="tcp", host=minio_host, port=minio_port
+                name="secondary", kind="tcp", host=secondary_host, port=secondary_port
             )
         )
 
diff --git a/jupyter/docs/agents.md b/jupyter/docs/agents.md
new file mode 100644
index 000000000..c8ad22479
--- /dev/null
+++ b/jupyter/docs/agents.md
@@ -0,0 +1,65 @@
+# JupyterHub Agent Documentation
+
+## Purpose
+
+JupyterHub deployment for SDS: spawns per-user notebook containers with spectrumx SDK access via custom Docker spawner.
+
+## Architecture
+
+- **Base image**: `quay.io/jupyterhub/jupyterhub:<version>` (JUPYTERHUB_VERSION arg)
+- **Spawner**: Custom `MyDockerSpawner` → `dockerspawner.DockerSpawner` subclass
+- **Auth**: Auth0OAuthenticator in prod; `DummyAuthenticator(admin=admin)` locally
+- **Notebook image**: `quay.io/jupyter/base-notebook:latest` (DOCKER_NOTEBOOK_IMAGE env)
+- **Lab interface**: JupyterLab via `jupyter-labhub` command + `JUPYTER_ENABLE_LAB=yes`
+- **Idle culling**: `jupyterhub-idle-culler` service
+- **DB**: SQLite at `/data/jupyterhub.sqlite`
+- **Cookie secret**: Generated on build, stored at `/data/jupyterhub_cookie_secret` (600 perms)
+
+## Key Configuration (`jupyterhub_config.py`)
+
+- `hub_connect_ip` → container name (env-driven)
+- `hub_ip/port` → bound to container interface
+- `notebook_dir` → `/home/jovyan/work`
+- All other settings (limits, timeouts, active_server_limit, cpu/mem limits) are environment-specific and vary by deployment
+
+### MyDockerSpawner overrides
+
+- Sets `CHOWN_HOME=yes`, `CHOWN_HOME_OPTS=-R`, `NB_GROUP=nb_users`
+- Post-start: `pip install ipywidgets spectrumx`
+- Network prefix: `sds-jupyter-local_` + `DOCKER_NETWORK_NAME`
+- Volume mounts: `{username}` named volume → `/home/jovyan/work`; `sample_scripts/` → `/home/jovyan/work/sample_scripts` (ro)
+- Prefix for user containers: `sds-jupyter-user`
+
+Docker socket `/var/run/docker.sock` bind-mounted ro into hub (but `sudo` granted for chown/chmod).
+
+## Deployment
+
+- Local compose: `compose.local.yaml`
+- Prod compose: `compose.production.yaml`
+- Hub service image: `sds-jupyter-local`, port `8888:8000` (Traefik reverse proxy)
+- Traefik labels configured for `/notebook` prefix strip on `sds-dev.crc.nd.edu`
+- Env file: `.envs/local/jupyterhub.env`
+- Networks: `sds-jupyter-local-net-clients` (bridge, alias `jupyterhub`)
+
+## Directory Structure
+
+- `compose/local/` → local dev compose files + Dockerfile
+- `compose/production/` → prod compose files + Dockerfile + jupyterhub_config override
+- `scripts/` → deployment utilities (`env-selection.sh`, `prod-hostnames.env`)
+- `.envs/local/` → local env vars
+- `.envs/example/` → env var template
+
+## Key Files
+
+| Path | Purpose |
+|--|-|
+| `compose.local.yaml` | Local compose stack definition |
+| `compose.production.yaml` | Production compose stack |
+| `compose/local/jupyter/Dockerfile` | Hub image build — installs docker.io, sudo, curl; creates users/groups |
+| `compose/production/jupyter/Dockerfile` | Prod hub Dockerfile (same base + chown fix) |
+| `compose/local/jupyter/jupyterhub_config.py` | Local dev Hub config + spawner override |
+| `compose/production/jupyter/jupyterhub_config.py` | Prod-specific Hub config override |
+| `scripts/env-selection.sh` | Staging env file selector (local vs prod) |
+| `scripts/prod-hostnames.env` | Production hostname overrides |
+| `.envs/local/jupyterhub.env` | Local environment variables |
+| `.envs/example/jupyterhub.env` | Template for all required env vars |
diff --git a/jupyter/scripts/env-selection.sh b/jupyter/scripts/env-selection.sh
index 36861829d..86ad64004 100755
--- a/jupyter/scripts/env-selection.sh
+++ b/jupyter/scripts/env-selection.sh
@@ -6,99 +6,109 @@ script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 prod_hosts_file="${script_dir}/prod-hostnames.env"
 
 is_production_host() {
-    local host
-    host=$(hostname)
-
-    if [[ ! -f "${prod_hosts_file}" ]]; then
-        return 1
-    fi
-
-    while read -r line || [[ -n "${line}" ]]; do
-        line=$(echo "${line}" | xargs)
-        [[ -z "${line}" || ${line:0:1} == '#' ]] && continue
-        if [[ "${line}" == "${host}" ]]; then
-            return 0
-        fi
-    done < "${prod_hosts_file}"
-
-    return 1
+	local host
+	host=$(hostname)
+
+	if [[ ! -f "${prod_hosts_file}" ]]; then
+		return 1
+	fi
+
+	while read -r line || [[ -n "${line}" ]]; do
+		line=$(echo "${line}" | xargs)
+		[[ -z "${line}" || ${line:0:1} == '#' ]] && continue
+		if [[ "${line}" == "${host}" ]]; then
+			return 0
+		fi
+	done <"${prod_hosts_file}"
+
+	return 1
 }
 
 get_target_value() {
-    local target=$1
-    local is_prod=$2
-
-    local local_env_file=".envs/local/jupyterhub.env"
-    local production_env_file=".envs/production/jupyterhub.env"
-
-    local output
-
-    case "${target}" in
-        env)
-            if [[ "${is_prod}" == true ]]; then
-                output='production'
-            else
-                output='local'
-            fi
-            ;;
-        compose_file)
-            if [[ "${is_prod}" == true ]]; then
-                output='compose.production.yaml'
-            else
-                output='compose.local.yaml'
-            fi
-            ;;
-        env_file)
-            if [[ "${is_prod}" == true ]]; then
-                output="${production_env_file}"
-            else
-                output="${local_env_file}"
-            fi
-            ;;
-        client_network)
-            if [[ "${is_prod}" == true ]]; then
-                output='sds-jupyter-prod-net-clients'
-            else
-                output='sds-jupyter-local-net-clients'
-            fi
-            ;;
-        compose_project_name)
-            if [[ "${is_prod}" == true ]]; then
-                output='sds-jupyter-prod'
-            else
-                output='sds-jupyter-local'
-            fi
-            ;;
-        *)
-            printf 'unsupported target: %s\n' "${target}" >&2
-            exit 1
-            ;;
-    esac
-
-    if [[ "${target}" == "compose_file" && ! -f "${output}" ]]; then
-        printf '\033[31mERROR: selected compose file "%s" does not exist\033[0m\n' "${output}" >&2
-    fi
-    if [[ "${target}" == "env_file" && ! -f "${output}" ]]; then
-        printf '\033[31mERROR: selected env file "%s" does not exist\033[0m\n' "${output}" >&2
-    fi
-
-    printf '%s\n' "${output}"
+	local target=$1
+	local is_prod=$2
+
+	local local_env_file=".envs/local/jupyterhub.env"
+	local production_env_file=".envs/production/jupyterhub.env"
+
+	local output
+
+	case "${target}" in
+	env)
+		if [[ "${is_prod}" == true ]]; then
+			output='production'
+		else
+			output='local'
+		fi
+		;;
+	compose_file)
+		if [[ "${is_prod}" == true ]]; then
+			output='compose.production.yaml'
+		else
+			output='compose.local.yaml'
+		fi
+		;;
+	env_file)
+		if [[ "${is_prod}" == true ]]; then
+			output="${production_env_file}"
+		else
+			output="${local_env_file}"
+		fi
+		;;
+	client_network)
+		if [[ "${is_prod}" == true ]]; then
+			output='sds-jupyter-prod-net-clients'
+		else
+			output='sds-jupyter-local-net-clients'
+		fi
+		;;
+	compose_project_name)
+		if [[ "${is_prod}" == true ]]; then
+			output='sds-jupyter-prod'
+		else
+			output='sds-jupyter-local'
+		fi
+		;;
+	*)
+		printf 'unsupported target: %s\n' "${target}" >&2
+		exit 1
+		;;
+	esac
+
+	if [[ "${target}" == "compose_file" && ! -f "${output}" ]]; then
+		printf '\033[31mERROR: selected compose file "%s" does not exist\033[0m\n' "${output}" >&2
+	fi
+	if [[ "${target}" == "env_file" && ! -f "${output}" ]]; then
+		printf '\033[31mERROR: selected env file "%s" does not exist\033[0m\n' "${output}" >&2
+	fi
+
+	printf '%s\n' "${output}"
 }
 
 main() {
-    if [[ $# -ne 1 ]]; then
-        printf 'usage: %s <env|compose_file|env_file|client_network|compose_project_name>\n' "$0" >&2
-        exit 1
-    fi
-
-    local target=$1
-    local is_prod=false
-
-    if is_production_host; then
-        is_prod=true
-    fi
-
-    get_target_value "${target}" "${is_prod}"
+	if [[ $# -ne 1 ]]; then
+		printf 'usage: %s <env|compose_file|env_file|client_network|compose_project_name>\n' "$0" >&2
+		exit 1
+	fi
+
+	local target=$1
+	local is_prod=false
+
+	# allow explicit override via SDS_ENV (e.g., SDS_ENV=prod just env)
+	if [[ -n "${SDS_ENV:-}" ]]; then
+		case "${SDS_ENV}" in
+		local) is_prod=false ;;
+		prod | production) is_prod=true ;;
+		*)
+			printf '\033[33mUnknown SDS_ENV="%s": must be local, prod, or production\033[0m\n' "${SDS_ENV}" >&2
+			exit 1
+			;;
+		esac
+	elif is_production_host; then
+		is_prod=true
+	fi
+
+	get_target_value "${target}" "${is_prod}"
 }
 
 main "$@"
diff --git a/sdk/README.md b/sdk/README.md
index f1f086902..5070c03d7 100644
--- a/sdk/README.md
+++ b/sdk/README.md
@@ -121,7 +121,7 @@ components, create a test user, and set up the integration test environment:
 2. Follow the Gateway instructions in the [Gateway README](../gateway/README.md); In
    summary:
     1. Deploy the Docker Compose stack;
-    2. Create a MinIO user and bucket with same credentials as in `minio.env`;
+     1. Create a storage user and bucket with same credentials as in `storage.env`;
 3. Create a test user and API key:
     1. Create a Gateway superuser and a regular user (they may be the same);
     2. Enable their `is_approved` flag in the [admin
diff --git a/sdk/config/nginx/nginx.conf b/sdk/config/nginx/nginx.conf
index d6f74b54d..54dff35d1 100644
--- a/sdk/config/nginx/nginx.conf
+++ b/sdk/config/nginx/nginx.conf
@@ -47,7 +47,7 @@ http {
 
             # Cache configuration for static assets
             location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
-                expires 30d;
+                expires 1d;
                 add_header Cache-Control "public, immutable";
             }
         }
@@ -58,6 +58,13 @@ http {
             expires -1;
             add_header Cache-Control "no-cache, no-store, must-revalidate";
         }
+
+        # health check endpoint
+        location = /healthz {
+            access_log off;
+            return 200 'OK';
+        }
+
     } # end server
 
 } # end http
diff --git a/sds-code.code-workspace b/sds-code.code-workspace
index 04bdb837c..54231c410 100644
--- a/sds-code.code-workspace
+++ b/sds-code.code-workspace
@@ -36,6 +36,10 @@
 			"name": "jupyter",
 			"path": "./jupyter"
 		},
+		{
+			"name": "seaweedfs",
+			"path": "./seaweedfs"
+		},
 	],
 	"settings": {
 		"[python]": {
diff --git a/seaweedfs/.envs/example/seaweedfs.env b/seaweedfs/.envs/example/seaweedfs.env
new file mode 100644
index 000000000..2d194af88
--- /dev/null
+++ b/seaweedfs/.envs/example/seaweedfs.env
@@ -0,0 +1,53 @@
+# ─────────────────────────────────────────────────────────
+# SeaweedFS Example Environment Variables
+# ─────────────────────────────────────────────────────────
+# Copy this to .envs/<environment>/sfs.env and fill in secrets.
+# Never commit .env files to git.
+#
+# Generate secrets:
+#   JWT_SIGNING_KEY=$(openssl rand -hex 32)
+#   JWT_FILER_SIGNING_KEY=$(openssl rand -hex 32)
+#   S3_SSE_KEK=$(openssl rand -hex 32)
+#   GRAFANA_PASSWORD=<choose a strong password>
+
+# User / Group for file ownership inside containers
+UID=1000
+GID=1000
+
+# ── Ports ──────────────────────────────────────────────
+SFS_MASTER_PORT=9333
+SFS_MASTER_GRPC_PORT=19333
+SFS_MASTER_METRICS_PORT=9324
+
+SFS_VOLUME_PORT=8080
+SFS_VOLUME_GRPC_PORT=18080
+SFS_VOLUME_METRICS_PORT=9325
+
+SFS_FILER_PORT=8888
+SFS_FILER_GRPC_PORT=18888
+SFS_FILER_METRICS_PORT=9326
+
+SFS_S3_PORT=8333
+SFS_S3_METRICS_PORT=9327
+
+SFS_WEBDAV_PORT=7333
+
+SFS_PROMETHEUS_HOST_PORT=9000
+SFS_PROMETHEUS_CONTAINER_PORT=9090
+
+# ── Secrets (set real values, never commit this file) ──
+# JWT signing key for volume write authorization.
+JWT_SIGNING_KEY=
+
+# JWT signing key for filer HTTP write/read authorization.
+JWT_FILER_SIGNING_KEY=
+
+# SSE-S3 Key Encryption Key (KEK).
+S3_SSE_KEK=
+
+# Grafana admin password.
+GRAFANA_PASSWORD=
+
+# MinIO backup credentials (for filer.backup S3 sink).
+MINIO_BACKUP_ACCESS_KEY=
+MINIO_BACKUP_SECRET_KEY=
diff --git a/seaweedfs/.gitignore b/seaweedfs/.gitignore
new file mode 100644
index 000000000..acae57224
--- /dev/null
+++ b/seaweedfs/.gitignore
@@ -0,0 +1,4 @@
+.env
+data/
+.envs/*
+!.envs/example/
diff --git a/seaweedfs/compose.ci.yaml b/seaweedfs/compose.ci.yaml
new file mode 100644
index 000000000..0aa24d06f
--- /dev/null
+++ b/seaweedfs/compose.ci.yaml
@@ -0,0 +1,157 @@
+# CI COMPOSE — SeaweedFS stack (minimal subset for CI/testing)
+# 4 services only: master, single volume, filer, s3 gateway.
+# Uses bind mounts under ./data/ (ephemeral). No JWT, no metrics infra.
+
+x-logging: &default-logging
+  driver: "json-file"
+  options:
+    max-size: "100m"
+    max-file: "3"
+
+networks:
+  sds-gateway-ci-seaweed-net:
+    driver: bridge
+  sds-network-ci:
+    external: true
+
+services:
+  # ─────────────────────────────────────────────────────────
+  # MASTER
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-ci-sfs-master:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-ci-sfs-master
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    networks:
+      - sds-gateway-ci-seaweed-net
+    ports:
+      - "${SFS_MASTER_PORT:-9333}:${SFS_MASTER_PORT:-9333}"
+      - "${SFS_MASTER_GRPC_PORT:-19333}:${SFS_MASTER_GRPC_PORT:-19333}"
+    volumes:
+      - ./data/master:/data
+      - ./config/master.toml:/etc/seaweedfs/master.toml:ro
+      - ./config/security.toml:/etc/seaweedfs/security.toml:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:${SFS_MASTER_PORT:-9333}/cluster/status >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      master
+      -mdir=/data
+      -ip=sds-gateway-ci-sfs-master
+      -ip.bind=0.0.0.0
+      -port=${SFS_MASTER_PORT:-9333}
+      -metricsPort=${SFS_MASTER_METRICS_PORT:-9324}
+
+  # ─────────────────────────────────────────────────────────
+  # VOLUME
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-ci-sfs-volume:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-ci-sfs-volume
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-ci-sfs-master
+    networks:
+      - sds-gateway-ci-seaweed-net
+    ports:
+      - "${SFS_VOLUME_PORT:-8080}:${SFS_VOLUME_PORT:-8080}"
+      - "${SFS_VOLUME_GRPC_PORT:-18080}:${SFS_VOLUME_GRPC_PORT:-18080}"
+    volumes:
+      - ./data/volumes:/data
+      - ./config/security.toml:/etc/seaweedfs/security.toml:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:${SFS_VOLUME_PORT:-8080}/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      volume
+      -master=sds-gateway-ci-sfs-master:${SFS_MASTER_PORT:-9333}
+      -ip=sds-gateway-ci-sfs-volume
+      -ip.bind=0.0.0.0
+      -port=${SFS_VOLUME_PORT:-8080}
+      -max=0
+      -dir=/data
+      -index=leveldb
+      -metricsPort=${SFS_VOLUME_METRICS_PORT:-9325}
+
+  # ─────────────────────────────────────────────────────────
+  # FILER
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-ci-sfs-filer:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-ci-sfs-filer
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-ci-sfs-master
+      - sds-gateway-ci-sfs-volume
+    networks:
+      - sds-gateway-ci-seaweed-net
+    ports:
+      - "${SFS_FILER_PORT:-8888}:${SFS_FILER_PORT:-8888}"
+      - "${SFS_FILER_GRPC_PORT:-18888}:${SFS_FILER_GRPC_PORT:-18888}"
+    volumes:
+      - ./data/filer:/data
+      - ./config/filer.toml:/etc/seaweedfs/filer.toml:ro
+      - ./config/security.toml:/etc/seaweedfs/security.toml:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:${SFS_FILER_PORT:-8888}/ >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      filer
+      -master=sds-gateway-ci-sfs-master:${SFS_MASTER_PORT:-9333}
+      -ip=sds-gateway-ci-sfs-filer
+      -ip.bind=0.0.0.0
+      -port=${SFS_FILER_PORT:-8888}
+      -metricsPort=${SFS_FILER_METRICS_PORT:-9326}
+
+  # ─────────────────────────────────────────────────────────
+  # S3 GATEWAY
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-ci-sfs-s3:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-ci-sfs-s3
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-ci-sfs-master
+      - sds-gateway-ci-sfs-volume
+      - sds-gateway-ci-sfs-filer
+    networks:
+      - sds-gateway-ci-seaweed-net
+      - sds-network-ci
+    ports:
+      - "${SFS_S3_PORT:-8333}:${SFS_S3_PORT:-8333}"
+    volumes:
+      - ./config/s3-config.json:/etc/seaweedfs/s3.json:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:${SFS_S3_PORT:-8333}/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      s3
+      -filer=sds-gateway-ci-sfs-filer:${SFS_FILER_PORT:-8888}
+      -ip.bind=0.0.0.0
+      -port=${SFS_S3_PORT:-8333}
+      -config=/etc/seaweedfs/s3.json
+      -metricsPort=${SFS_S3_METRICS_PORT:-9327}
diff --git a/seaweedfs/compose.local.yaml b/seaweedfs/compose.local.yaml
new file mode 100644
index 000000000..67ce323e4
--- /dev/null
+++ b/seaweedfs/compose.local.yaml
@@ -0,0 +1,212 @@
+x-logging: &default-logging
+  driver: "json-file"
+  options:
+    max-size: "100m"
+    max-file: "3"
+
+networks:
+  sds-gateway-local-seaweed-net:
+    driver: bridge
+  sds-network-local:
+    external: true
+
+volumes:
+  prometheus-data:
+
+services:
+  # ─────────────────────────────────────────────────────────
+  # MASTER
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-local-sfs-master:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-local-sfs-master
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    networks:
+      - sds-gateway-local-seaweed-net
+    ports:
+      - "${SFS_MASTER_PORT:-9333}:${SFS_MASTER_PORT:-9333}"
+      - "${SFS_MASTER_GRPC_PORT:-19333}:${SFS_MASTER_GRPC_PORT:-19333}"
+    volumes:
+      - ./data/master:/data
+      - ./config/master.toml:/etc/seaweedfs/master.toml:ro
+      - ./config/security.toml:/etc/seaweedfs/security.toml:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:${SFS_MASTER_PORT:-9333}/cluster/status >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      master
+      -mdir=/data
+      -ip=sds-gateway-local-sfs-master
+      -ip.bind=0.0.0.0
+      -port=${SFS_MASTER_PORT:-9333}
+      -metricsPort=${SFS_MASTER_METRICS_PORT:-9324}
+
+  # ─────────────────────────────────────────────────────────
+  # VOLUME — single volume server
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-local-sfs-volume:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-local-sfs-volume
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-local-sfs-master
+    networks:
+      - sds-gateway-local-seaweed-net
+    ports:
+      - "${SFS_VOLUME_PORT:-8080}:${SFS_VOLUME_PORT:-8080}"
+      - "${SFS_VOLUME_GRPC_PORT:-18080}:${SFS_VOLUME_GRPC_PORT:-18080}"
+    volumes:
+      - ./data/volumes:/data
+      - ./config/security.toml:/etc/seaweedfs/security.toml:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:${SFS_VOLUME_PORT:-8080}/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      volume
+      -master=sds-gateway-local-sfs-master:${SFS_MASTER_PORT:-9333}
+      -ip=sds-gateway-local-sfs-volume
+      -ip.bind=0.0.0.0
+      -port=${SFS_VOLUME_PORT:-8080}
+      -max=0
+      -dir=/data
+      -index=leveldb
+      -metricsPort=${SFS_VOLUME_METRICS_PORT:-9325}
+
+  # ─────────────────────────────────────────────────────────
+  # FILER
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-local-sfs-filer:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-local-sfs-filer
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-local-sfs-master
+      - sds-gateway-local-sfs-volume
+    networks:
+      - sds-gateway-local-seaweed-net
+    ports:
+      - "${SFS_FILER_PORT:-8888}:${SFS_FILER_PORT:-8888}"
+      - "${SFS_FILER_GRPC_PORT:-18888}:${SFS_FILER_GRPC_PORT:-18888}"
+    volumes:
+      - ./data/filer:/data
+      - ./config/filer.toml:/etc/seaweedfs/filer.toml:ro
+      - ./config/security.toml:/etc/seaweedfs/security.toml:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:${SFS_FILER_PORT:-8888}/ >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      filer
+      -master=sds-gateway-local-sfs-master:${SFS_MASTER_PORT:-9333}
+      -ip=sds-gateway-local-sfs-filer
+      -ip.bind=0.0.0.0
+      -port=${SFS_FILER_PORT:-8888}
+      -metricsPort=${SFS_FILER_METRICS_PORT:-9326}
+
+  # ─────────────────────────────────────────────────────────
+  # S3 GATEWAY
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-local-sfs-s3:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-local-sfs-s3
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-local-sfs-master
+      - sds-gateway-local-sfs-volume
+      - sds-gateway-local-sfs-filer
+    networks:
+      - sds-gateway-local-seaweed-net
+      - sds-network-local
+    ports:
+      - "${SFS_S3_PORT:-8333}:${SFS_S3_PORT:-8333}"
+    volumes:
+      - ./config/s3-config.json:/etc/seaweedfs/s3.json:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:${SFS_S3_PORT:-8333}/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      s3
+      -filer=sds-gateway-local-sfs-filer:${SFS_FILER_PORT:-8888}
+      -ip.bind=0.0.0.0
+      -port=${SFS_S3_PORT:-8333}
+      -config=/etc/seaweedfs/s3.json
+      -metricsPort=${SFS_S3_METRICS_PORT:-9327}
+
+  # ─────────────────────────────────────────────────────────
+  # WEBDAV
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-local-sfs-webdav:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-local-sfs-webdav
+    user: "${UID:-1000}:${GID:-1000}"
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-local-sfs-master
+      - sds-gateway-local-sfs-volume
+      - sds-gateway-local-sfs-filer
+    networks:
+      - sds-gateway-local-seaweed-net
+    ports:
+      - "${SFS_WEBDAV_PORT:-7333}:${SFS_WEBDAV_PORT:-7333}"
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -sS -o /dev/null http://localhost:${SFS_WEBDAV_PORT:-7333}/"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      webdav
+      -filer=sds-gateway-local-sfs-filer:${SFS_FILER_PORT:-8888}
+
+  # ─────────────────────────────────────────────────────────
+  # PROMETHEUS — pull-based metrics
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-local-sfs-prometheus:
+    image: docker.io/prom/prometheus:v2.53.0
+    container_name: sds-gateway-local-sfs-prometheus
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-local-sfs-s3
+    healthcheck:
+      test: ["CMD-SHELL", "wget --spider -q http://localhost:${SFS_PROMETHEUS_CONTAINER_PORT:-9090}/-/healthy || exit 1"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    networks:
+      - sds-gateway-local-seaweed-net
+    ports:
+      - "${SFS_PROMETHEUS_HOST_PORT:-9000}:${SFS_PROMETHEUS_CONTAINER_PORT:-9090}"
+    volumes:
+      - prometheus-data:/prometheus
+      - ./prometheus/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
+    command:
+      - "--config.file=/etc/prometheus/prometheus.yaml"
+      - "--storage.tsdb.path=/prometheus"
+      - "--web.enable-lifecycle"
diff --git a/seaweedfs/compose.production.yaml b/seaweedfs/compose.production.yaml
new file mode 100644
index 000000000..35a73a58f
--- /dev/null
+++ b/seaweedfs/compose.production.yaml
@@ -0,0 +1,501 @@
+# ⚠️ PRODUCTION COMPOSE — SeaweedFS 5×22TB + EC RS(10+4) ⚠️
+# Following sfs-deployment-checklist.md for safe production deployment.
+#
+# Architecture:
+#   - Single master (restartable, light load)
+#   - 5 volume servers (1 per 22TB XFS drive, ports 8081-8085)
+#   - Filer with leveldb2 (embedded metadata store)
+#   - S3 gateway for S3-compatible access
+#   - WebDAV access
+#   - Admin + Worker for Erasure Coding (RS 10+4) + cluster maintenance
+#   - Pushgateway + Prometheus (push metrics mode) + Grafana
+#   - Async filer backup to MinIO (S3 sink)
+#
+# PRE-DEPLOYMENT (run once):
+#   docker network create sds-gateway-prod-seaweed-net
+#   mkdir -p /disk{1,2,3,4,5}/{data,idx}
+#   mkdir -p /data/seaweedfs/{master,filer}
+#
+# SECURITY: Set these in your .env file (never commit to git):
+#   JWT_SIGNING_KEY      — master signs, volumes validate on write
+#   JWT_FILER_SIGNING_KEY— S3 gateway signs, filer validates
+#   S3_SSE_KEK           — SSE-S3 encryption key
+#   GRAFANA_PASSWORD     — Grafana admin password
+#
+# IMAGE: 4.23_large_disk_full — supports large volumes, full backend suite.
+
+x-logging: &default-logging
+  driver: "json-file"
+  options:
+    max-size: "100m"
+    max-file: "3"
+
+networks:
+  # Internal SeaweedFS network (created before deploy)
+  sds-gateway-prod-seaweed-net:
+    external: true
+  # Shared network with gateway services
+  sds-network-prod:
+    external: true
+
+volumes:
+  prometheus-data:
+  grafana-data:
+
+services:
+  # ─────────────────────────────────────────────────────────
+  # MASTER — cluster coordinator, assigns volumes, signs JWTs
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-master:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-master
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "9333:9333"    # HTTP
+      - "19333:19333"  # gRPC
+    environment:
+      # JWT signing key for volume write auth
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:9333/cluster/status >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    volumes:
+      # Persistent metadata (filer store, master state)
+      - /data/seaweedfs/master:/data
+      # Config files
+      - ./config/master.toml:/etc/seaweedfs/master.toml:ro
+      - ./config/security.toml:/etc/seaweedfs/security.toml:ro
+    logging: *default-logging
+    command: |
+      master
+      -mdir=/data
+      -ip=sds-gateway-prod-sfs-master
+      -ip.bind=0.0.0.0
+      -port=9333
+      -volumePreallocate
+      -volumeSizeLimitMB=30000
+      -master.metrics.address=http://sds-gateway-prod-sfs-pushgateway:9091
+
+  # ─────────────────────────────────────────────────────────
+  # 5 VOLUME SERVERS — one per 22TB XFS drive
+  # Each has dedicated data + idx paths, leveldb index,
+  # and per-drive healthcheck.
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-volume1:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-volume1
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "8081:8081"    # HTTP
+      - "18081:18081"  # gRPC
+    environment:
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk1/data:/data
+      - /disk1/idx:/idx
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:8081/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      volume
+      -master=sds-gateway-prod-sfs-master:9333
+      -ip=sds-gateway-prod-sfs-volume1
+      -ip.bind=0.0.0.0
+      -port=8081
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  sds-gateway-prod-sfs-volume2:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-volume2
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "8082:8082"
+      - "18082:18082"
+    environment:
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk2/data:/data
+      - /disk2/idx:/idx
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:8082/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      volume
+      -master=sds-gateway-prod-sfs-master:9333
+      -ip=sds-gateway-prod-sfs-volume2
+      -ip.bind=0.0.0.0
+      -port=8082
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  sds-gateway-prod-sfs-volume3:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-volume3
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "8083:8083"
+      - "18083:18083"
+    environment:
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk3/data:/data
+      - /disk3/idx:/idx
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:8083/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      volume
+      -master=sds-gateway-prod-sfs-master:9333
+      -ip=sds-gateway-prod-sfs-volume3
+      -ip.bind=0.0.0.0
+      -port=8083
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  sds-gateway-prod-sfs-volume4:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-volume4
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "8084:8084"
+      - "18084:18084"
+    environment:
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk4/data:/data
+      - /disk4/idx:/idx
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:8084/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      volume
+      -master=sds-gateway-prod-sfs-master:9333
+      -ip=sds-gateway-prod-sfs-volume4
+      -ip.bind=0.0.0.0
+      -port=8084
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  sds-gateway-prod-sfs-volume5:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-volume5
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "8085:8085"
+      - "18085:18085"
+    environment:
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk5/data:/data
+      - /disk5/idx:/idx
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:8085/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      volume
+      -master=sds-gateway-prod-sfs-master:9333
+      -ip=sds-gateway-prod-sfs-volume5
+      -ip.bind=0.0.0.0
+      -port=8085
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  # ─────────────────────────────────────────────────────────
+  # FILER — metadata store, file namespace, HTTP file browser
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-filer:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-filer
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-prod-sfs-master
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "8888:8888"    # HTTP
+      - "18888:18888"  # gRPC
+    environment:
+      # JWT key for volume write auth
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+      # JWT key for filer HTTP write auth — S3 gateway signs, filer validates
+      WEED_JWT_FILER_SIGNING_KEY: "${JWT_FILER_SIGNING_KEY}"
+    volumes:
+      # Persistent filer metadata (leveldb2 store)
+      - /data/seaweedfs/filer:/data
+      # Config files
+      - ./config/filer.toml:/etc/seaweedfs/filer.toml:ro
+      - ./config/security.toml:/etc/seaweedfs/security.toml:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:8888/ >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      filer
+      -master=sds-gateway-prod-sfs-master:9333
+      -ip=sds-gateway-prod-sfs-filer
+      -ip.bind=0.0.0.0
+      -port=8888
+      -encryptVolumeData=false
+      -maxMB=32
+
+  # ─────────────────────────────────────────────────────────
+  # S3 GATEWAY — S3-compatible API, connects to filer
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-s3:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-s3
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-prod-sfs-filer
+    networks:
+      # Internal: connects to filer/volume
+      - sds-gateway-prod-seaweed-net
+      # External: gateway services connect here
+      - sds-network-prod
+    ports:
+      - "8333:8333"
+    environment:
+      # Must match filer's WEED_JWT_FILER_SIGNING_KEY
+      WEED_JWT_FILER_SIGNING_KEY: "${JWT_FILER_SIGNING_KEY}"
+      # SSE-S3 Key Encryption Key
+      WEED_S3_SSE_KEK: "${S3_SSE_KEK}"
+    volumes:
+      - ./config/s3-config.json:/etc/seaweedfs/s3.json:ro
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:8333/healthz >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      s3
+      -filer=sds-gateway-prod-sfs-filer:8888
+      -port=8333
+      -config=/etc/seaweedfs/s3.json
+      -domain=.s3.example.com
+
+  # ─────────────────────────────────────────────────────────
+  # WEBDAV — WebDAV access to filer namespace
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-webdav:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-webdav
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-prod-sfs-master
+      - sds-gateway-prod-sfs-filer
+    networks:
+      - sds-gateway-prod-seaweed-net
+    logging: *default-logging
+    command: |
+      webdav
+      -filer=sds-gateway-prod-sfs-filer:8888
+
+  # ─────────────────────────────────────────────────────────
+  # ADMIN — cluster admin server (EC management, maintenance)
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-admin:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-admin
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-prod-sfs-master
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "23646:23646"  # Admin HTTP
+    logging: *default-logging
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:23646/ >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    command: |
+      admin
+      -master=sds-gateway-prod-sfs-master:9333
+
+  # ─────────────────────────────────────────────────────────
+  # WORKER — runs erasure_coding plugin and maintenance scripts
+  # Continuously converts full/quiet volumes to EC shards.
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-worker:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-worker
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-prod-sfs-admin
+    networks:
+      - sds-gateway-prod-seaweed-net
+    logging: *default-logging
+    command: |
+      worker
+      -admin=sds-gateway-prod-sfs-admin:23646
+
+  # ─────────────────────────────────────────────────────────
+  # PROMETHEUS + PUSHGATEWAY — push-based metrics collection
+  # SeaweedFS components push metrics to pushgateway;
+  # Prometheus scrapes from pushgateway (simpler than
+  # dynamic target discovery for volume servers).
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-prometheus:
+    image: docker.io/prom/prometheus:v2.53.0
+    container_name: sds-gateway-prod-sfs-prometheus
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "9090:9090"
+    healthcheck:
+      test: ["CMD-SHELL", "wget --spider -q http://localhost:9090/-/healthy || exit 1"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    volumes:
+      - prometheus-data:/prometheus
+      - ./prometheus/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
+    command:
+      - "--config.file=/etc/prometheus/prometheus.yaml"
+      - "--storage.tsdb.path=/prometheus"
+      - "--web.enable-lifecycle"
+
+  sds-gateway-prod-sfs-pushgateway:
+    image: docker.io/prom/pushgateway:v1.9.0
+    container_name: sds-gateway-prod-sfs-pushgateway
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "9091:9091"
+    healthcheck:
+      test: ["CMD-SHELL", "wget --spider -q http://localhost:9091/-/healthy || exit 1"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+
+  # ─────────────────────────────────────────────────────────
+  # GRAFANA — dashboards + alerting
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-grafana:
+    image: docker.io/grafana/grafana:11.1.0
+    container_name: sds-gateway-prod-sfs-grafana
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweed-net
+    ports:
+      - "3000:3000"
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:3000/api/health >/dev/null"]
+      interval: 15s
+      retries: 5
+      start_interval: 5s
+      start_period: 30s
+      timeout: 5s
+    environment:
+      GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_PASSWORD}"
+    volumes:
+      - grafana-data:/var/lib/grafana
+
+  # ─────────────────────────────────────────────────────────
+  # FILER BACKUP — async replication to MinIO (S3 sink)
+  # Subscribes to filer metadata change log (CDC) and
+  # replicates file content to the configured S3-compatible
+  # storage (MinIO). Checkpointed for safe restarts.
+  # ─────────────────────────────────────────────────────────
+  sds-gateway-prod-sfs-filer-backup:
+    image: docker.io/chrislusf/seaweedfs:4.23_large_disk_full
+    container_name: sds-gateway-prod-sfs-filer-backup
+    restart: unless-stopped
+    depends_on:
+      - sds-gateway-prod-sfs-filer
+    networks:
+      - sds-gateway-prod-seaweed-net
+    volumes:
+      - ./config/replication.toml:/etc/seaweedfs/replication.toml:ro
+    command: |
+      filer.backup
+      -filer=sds-gateway-prod-sfs-filer:8888
+      -config=/etc/seaweedfs/replication.toml
diff --git a/seaweedfs/config/credential.toml b/seaweedfs/config/credential.toml
new file mode 100644
index 000000000..7e3bde779
--- /dev/null
+++ b/seaweedfs/config/credential.toml
@@ -0,0 +1,47 @@
+# Put this file to one of the location, with descending priority
+# Based on 'docker compose -p seaweedfs exec -it sds-gateway-local-sfs-master weed scaffold -config credential'
+#    ./credential.toml
+#    $HOME/.seaweedfs/credential.toml
+#    /etc/seaweedfs/credential.toml
+# this file is read by S3 API and IAM API servers
+
+# Choose one of the credential stores below
+# Only one store can be enabled at a time
+
+# Filer-based credential store (default, uses existing filer storage)
+[credential.filer_etc]
+    enabled = true
+    # filer address and grpc_dial_option will be automatically configured by the server
+
+    # PostgreSQL credential store (recommended for multi-node deployments)
+    # [credential.postgres]
+    #     database = "seaweedfs"
+    #     enabled  = false
+    #     hostname = "localhost"
+    #     password = "your_password"
+    #     port     = 5432
+    #     schema   = "public"
+    #     sslmode  = "disable"
+    #     username = "seaweedfs"
+    #     # Optional: table name prefix (default: "sw_")
+    #     table_prefix = "sw_"
+    #     # Connection pool settings
+    #     connection_max_idle             = 10
+    #     connection_max_lifetime_seconds = 3600
+    #     connection_max_open             = 100
+
+    # Memory credential store (for testing only, data is lost on restart)
+    # [credential.memory]
+    #     enabled = false
+
+    #     # Environment variable overrides:
+    #     # Any configuration value can be overridden by environment variables
+    #     # Rules:
+    #     #   * Prefix with "WEED_CREDENTIAL_"
+    #     #   * Convert to uppercase
+    #     #   * Replace '.' with '_'
+    #     #
+    #     # Examples:
+    #     #   export WEED_CREDENTIAL_POSTGRES_PASSWORD=secret
+    #     #   export WEED_CREDENTIAL_POSTGRES_HOSTNAME=db.example.com
+    #     #   export WEED_CREDENTIAL_FILER_ETC_ENABLED=true
diff --git a/seaweedfs/config/filer.toml b/seaweedfs/config/filer.toml
new file mode 100644
index 000000000..e57ca931b
--- /dev/null
+++ b/seaweedfs/config/filer.toml
@@ -0,0 +1,436 @@
+# A sample TOML config file for SeaweedFS filer store
+# Based on 'docker compose -p seaweedfs exec -it sds-gateway-local-sfs-volume weed scaffold -config filer'
+# Used with "weed filer" or "weed server -filer"
+# Put this file to one of the location, with descending priority
+#    ./filer.toml
+#    $HOME/.seaweedfs/filer.toml
+#    /etc/seaweedfs/filer.toml
+
+####################################################
+# Customizable filer server options
+####################################################
+[filer.options]
+    # with http DELETE, by default the filer would check whether a folder is empty.
+    # recursive_delete will delete all sub folders and files, similar to "rm -Rf"
+    recursive_delete = false
+    #max_file_name_length = 255
+
+    ####################################################
+    # The following are filer store options
+    ####################################################
+
+[leveldb2]
+    # local on disk, mostly for simple single-machine setup, fairly scalable
+    # faster than previous leveldb, recommended.
+    dir     = "/data/filer/filerldb2" # directory to store level db files
+    enabled = true
+
+[leveldb3]
+    # similar to leveldb2.
+    # each bucket has its own meta store.
+    dir     = "./filerldb3" # directory to store level db files
+    enabled = false
+
+[rocksdb]
+    # local on disk, similar to leveldb
+    # since it is using a C wrapper, you need to install rocksdb and build it by yourself
+    dir     = "./filerrdb" # directory to store rocksdb files
+    enabled = false
+
+[sqlite]
+    # local on disk, similar to leveldb
+    dbFile  = "./filer.db" # sqlite db file
+    enabled = false
+
+[mysql] # or memsql, tidb
+    # CREATE TABLE IF NOT EXISTS `filemeta` (
+    #   `dirhash`   BIGINT NOT NULL       COMMENT 'first 64 bits of MD5 hash value of directory field',
+    #   `name`      VARCHAR(766) NOT NULL COMMENT 'directory or file name',
+    #   `directory` TEXT NOT NULL         COMMENT 'full path to parent directory',
+    #   `meta`      LONGBLOB,
+    #   PRIMARY KEY (`dirhash`, `name`)
+    # ) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
+
+    enabled = false
+    # dsn will take priority over "hostname, port, username, password, database".
+    # [username[:password]@][protocol[(address)]]/dbname[?param1=value1&...&paramN=valueN]
+    ca_crt                          = ""                                                         # ca.crt dir when enable_tls set true
+    client_crt                      = ""                                                         # mysql client.crt dir when enable_tls set true
+    client_key                      = ""                                                         # mysql client.key dir when enable_tls set true
+    connection_max_idle             = 10
+    connection_max_lifetime_seconds = 300
+    connection_max_open             = 50
+    database                        = ""                                                         # create or use an existing database
+    dsn                             = "root@tcp(localhost:3306)/seaweedfs?collation=utf8mb4_bin"
+    enable_tls                      = false
+    hostname                        = "localhost"
+    interpolateParams               = false
+    password                        = ""
+    port                            = 3306
+    username                        = "root"
+    # if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
+    enableUpsert = true
+    upsertQuery  = """INSERT INTO `%s` (`dirhash`,`name`,`directory`,`meta`) VALUES (?,?,?,?) AS `new` ON DUPLICATE KEY UPDATE `meta` = `new`.`meta`"""
+
+[mysql2] # or memsql, tidb
+    connection_max_idle = 10
+    connection_max_lifetime_seconds = 300
+    connection_max_open = 50
+    createTable = """
+  CREATE TABLE IF NOT EXISTS `%s` (
+    `dirhash`   BIGINT NOT NULL,
+    `name`      VARCHAR(766) NOT NULL,
+    `directory` TEXT NOT NULL,
+    `meta`      LONGBLOB,
+    PRIMARY KEY (`dirhash`, `name`)
+  ) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
+"""
+    database = "" # create or use an existing database
+    enabled = false
+    hostname = "localhost"
+    interpolateParams = false
+    password = ""
+    port = 3306
+    username = "root"
+    # if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
+    enableUpsert = true
+    upsertQuery  = """INSERT INTO `%s` (`dirhash`,`name`,`directory`,`meta`) VALUES (?,?,?,?) AS `new` ON DUPLICATE KEY UPDATE `meta` = `new`.`meta`"""
+
+[postgres] # or cockroachdb, YugabyteDB
+    # CREATE TABLE IF NOT EXISTS filemeta (
+    #   dirhash     BIGINT,
+    #   name        VARCHAR(65535),
+    #   directory   VARCHAR(65535),
+    #   meta        bytea,
+    #   PRIMARY KEY (dirhash, name)
+    # );
+    database = "postgres"  # create or use an existing database
+    enabled  = false
+    hostname = "localhost"
+    password = ""
+    port     = 5432
+    schema   = ""
+    sslmode  = "disable"
+    username = "postgres"
+    # SSL certificate options for secure connections
+    # For sslmode=verify-full, uncomment and configure the following:
+    # sslcert = "/path/to/client.crt"     # client certificate file
+    # sslkey = "/path/to/client.key"      # client private key file
+    # sslrootcert = "/path/to/ca.crt"     # CA certificate file
+    # sslcrl = "/path/to/client.crl"      # Certificate Revocation List (CRL) (optional)
+    connection_max_idle             = 10
+    connection_max_lifetime_seconds = 300
+    connection_max_open             = 50
+    # Set to true when using PgBouncer connection pooler
+    pgbouncer_compatible = false
+    # if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
+    enableUpsert = true
+    upsertQuery = """
+  INSERT INTO "%[1]s" (dirhash, name, directory, meta)
+    VALUES($1, $2, $3, $4)
+    ON CONFLICT (dirhash, name) DO UPDATE SET
+      directory=EXCLUDED.directory,
+      meta=EXCLUDED.meta
+"""
+
+[postgres2]
+    createTable = """
+  CREATE TABLE IF NOT EXISTS "%s" (
+    dirhash   BIGINT,
+    name      VARCHAR(65535),
+    directory VARCHAR(65535),
+    meta      bytea,
+    PRIMARY KEY (dirhash, name)
+  );
+"""
+    database = "postgres" # create or use an existing database
+    enabled = false
+    hostname = "localhost"
+    password = ""
+    port = 5432
+    schema = ""
+    sslmode = "disable"
+    username = "postgres"
+    # SSL certificate options for secure connections
+    # For sslmode=verify-full, uncomment and configure the following:
+    # sslcert = "/path/to/client.crt"     # client certificate file
+    # sslkey = "/path/to/client.key"      # client private key file
+    # sslrootcert = "/path/to/ca.crt"     # CA certificate file
+    # sslcrl = "/path/to/client.crl"      # Certificate Revocation List (CRL) (optional)
+    connection_max_idle             = 10
+    connection_max_lifetime_seconds = 300
+    connection_max_open             = 50
+    # Set to true when using PgBouncer connection pooler
+    pgbouncer_compatible = false
+    # if insert/upsert failing, you can disable upsert or update query syntax to match your RDBMS syntax:
+    enableUpsert = true
+    upsertQuery = """
+  INSERT INTO "%[1]s" (dirhash, name, directory, meta)
+    VALUES($1, $2, $3, $4)
+    ON CONFLICT (dirhash, name) DO UPDATE SET
+      directory=EXCLUDED.directory,
+      meta=EXCLUDED.meta
+"""
+
+[cassandra2]
+    # CREATE TABLE filemeta (
+    #    dirhash bigint,
+    #    directory varchar,
+    #    name varchar,
+    #    meta blob,
+    #    PRIMARY KEY ((dirhash, directory), name)
+    # ) WITH CLUSTERING ORDER BY (name ASC);
+    enabled  = false
+    hosts    = ["localhost:9042"]
+    keyspace = "seaweedfs"
+    password = ""
+    username = ""
+    # Set the CA certificate path
+    ssl_ca_path = ""
+    # Set the client certificate path
+    ssl_cert_path = ""
+    # Set the client private key path
+    ssl_key_path = ""
+    # Check host name in the certificate
+    ssl_enable_host_verification = true
+    # This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
+    superLargeDirectories = []
+    # Name of the datacenter local to this filer, used as host selection fallback.
+    localDC = ""
+    # Gocql connection timeout, default: 600ms
+    connection_timeout_millisecond = 600
+
+[hbase]
+    enabled  = false
+    table    = "seaweedfs"
+    zkquorum = ""
+
+[redis2]
+    address  = "localhost:6379"
+    database = 0
+    enabled  = false
+    password = ""
+    username = ""
+    # prefix for filer redis keys
+    ca_cert_path     = ""
+    client_cert_path = ""
+    client_key_path  = ""
+    enable_tls       = false
+    keyPrefix        = ""
+    # This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
+    superLargeDirectories = []
+
+[redis2_sentinel]
+    addresses  = ["172.22.12.7:26379", "172.22.12.8:26379", "172.22.12.9:26379"]
+    database   = 0
+    enabled    = false
+    masterName = "master"
+    password   = ""
+    username   = ""
+    # prefix for filer redis keys
+    ca_cert_path     = ""
+    client_cert_path = ""
+    client_key_path  = ""
+    enable_tls       = false
+    keyPrefix        = ""
+
+[redis_cluster2]
+    addresses = [
+        "localhost:30001",
+        "localhost:30002",
+        "localhost:30003",
+        "localhost:30004",
+        "localhost:30005",
+        "localhost:30006",
+    ]
+    enabled = false
+    password = ""
+    username = ""
+    # prefix for filer redis keys
+    ca_cert_path     = ""
+    client_cert_path = ""
+    client_key_path  = ""
+    enable_tls       = false
+    keyPrefix        = ""
+    # allows reads from slave servers or the master, but all writes still go to the master
+    readOnly = false
+    # automatically use the closest Redis server for reads
+    routeByLatency = false
+    # This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
+    superLargeDirectories = []
+
+# The following lua redis stores uses lua to ensure atomicity
+[redis_lua]
+    address  = "localhost:6379"
+    database = 0
+    enabled  = false
+    password = ""
+    username = ""
+    # prefix for filer redis keys
+    ca_cert_path     = ""
+    client_cert_path = ""
+    client_key_path  = ""
+    enable_tls       = false
+    keyPrefix        = ""
+    # This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
+    superLargeDirectories = []
+
+[redis_lua_sentinel]
+    addresses  = ["172.22.12.7:26379", "172.22.12.8:26379", "172.22.12.9:26379"]
+    database   = 0
+    enabled    = false
+    masterName = "master"
+    password   = ""
+    username   = ""
+    # prefix for filer redis keys
+    ca_cert_path     = ""
+    client_cert_path = ""
+    client_key_path  = ""
+    enable_tls       = false
+    keyPrefix        = ""
+
+[redis_lua_cluster]
+    addresses = [
+        "localhost:30001",
+        "localhost:30002",
+        "localhost:30003",
+        "localhost:30004",
+        "localhost:30005",
+        "localhost:30006",
+    ]
+    enabled = false
+    password = ""
+    username = ""
+    # prefix for filer redis keys
+    ca_cert_path     = ""
+    client_cert_path = ""
+    client_key_path  = ""
+    enable_tls       = false
+    keyPrefix        = ""
+    # allows reads from slave servers or the master, but all writes still go to the master
+    readOnly = false
+    # automatically use the closest Redis server for reads
+    routeByLatency = false
+    # This changes the data layout. Only add new directories. Removing/Updating will cause data loss.
+    superLargeDirectories = []
+
+[etcd]
+    enabled    = false
+    key_prefix = "seaweedfs."
+    password   = ""
+    servers    = "localhost:2379"
+    timeout    = "3s"
+    username   = ""
+    # Set the CA certificate path
+    tls_ca_file = ""
+    # Set the client certificate path
+    tls_client_crt_file = ""
+    # Set the client private key path
+    tls_client_key_file = ""
+
+[mongodb]
+    database             = "seaweedfs"
+    enabled              = false
+    insecure_skip_verify = false
+    option_pool_size     = 0
+    password             = ""
+    ssl                  = false
+    ssl_ca_file          = ""
+    ssl_cert_file        = ""
+    ssl_key_file         = ""
+    uri                  = "mongodb://localhost:27017"
+    username             = ""
+
+[elastic7]
+    enabled             = false
+    healthcheck_enabled = false
+    password            = ""
+    servers             = ["http://localhost1:9200", "http://localhost2:9200", "http://localhost3:9200"]
+    sniff_enabled       = false
+    username            = ""
+    # increase the value is recommend, be sure the value in Elastic is greater or equal here
+    index.max_result_window = 10000
+
+
+[arangodb] # in development dont use it
+    db_name = "seaweedfs"
+    enabled = false
+    servers = ["http://localhost:8529"] # list of servers to connect to
+    # only basic auth supported for now
+    password = ""
+    username = ""
+    # skip tls cert validation
+    insecure_skip_verify = true
+
+[ydb] # https://ydb.tech/
+    dialTimeOut     = 10
+    dsn             = "grpc://localhost:2136?database=/local"
+    enabled         = false
+    poolSizeLimit   = 50
+    prefix          = "seaweedfs"
+    useBucketPrefix = true                                    # Fast Bucket Deletion
+
+    # Authenticate produced with one of next environment variables:
+    # YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS=<path/to/sa_key_file> — used service account key file by path
+    # YDB_ANONYMOUS_CREDENTIALS="1" — used for authenticate with anonymous access. Anonymous access needs for connect to testing YDB installation
+    # YDB_METADATA_CREDENTIALS="1" — used metadata service for authenticate to YDB from yandex cloud virtual machine or from yandex function
+    # YDB_ACCESS_TOKEN_CREDENTIALS=<access_token> — used for authenticate to YDB with short-life access token. For example, access token may be IAM token
+
+    ##########################
+    ##########################
+    # To add path-specific filer store:
+    #
+    # 1. Add a name following the store type separated by a dot ".". E.g., cassandra2.tmp
+    # 2. Add a location configuration. E.g., location = "/tmp/"
+    # 3. Copy and customize all other configurations.
+    #     Make sure they are not the same if using the same store type!
+    # 4. Set enabled to true
+    #
+    # The following is just using redis as an example
+    ##########################
+    [redis2.tmp]
+        address   = "localhost:6379"
+        database  = 1
+        enabled   = false
+        keyPrefix = ""
+        location  = "/tmp/"
+        password  = ""
+        username  = ""
+
+[tikv]
+    enabled = false
+    # If you have many pd address, use ',' split then:
+    #   pdaddrs = "pdhost1:2379, pdhost2:2379, pdhost3:2379"
+    pdaddrs = "localhost:2379"
+    # prefix for filer TiKV keys, useful for sharing a TiKV cluster with multiple seaweedfs clusters
+    keyPrefix = ""
+    # Enable 1PC
+    enable_1pc = false
+    # batch delete count, default 10000 in code
+    #batchdelete_count = 20000
+
+    # Set the CA certificate path
+    ca_path = ""
+    # Set the certificate path
+    cert_path = ""
+    # Set the private key path
+    key_path = ""
+    # The name list used to verify the cn name
+    verify_cn = ""
+
+[foundationdb]
+    # FoundationDB provides ACID transactions and horizontal scalability.
+    # Requires: go build -tags foundationdb
+    cluster_file = "/etc/foundationdb/fdb.cluster"
+    enabled      = false
+    # api_version = 740
+    # timeout = "5s"
+    # directory_prefix = "seaweedfs"
+    # For bulk ingestion, enable batching: batch_enabled = true
+
+[tarantool]
+    address       = "localhost:3301"
+    maxReconnects = 1000
+    password      = ""
+    timeout       = "5s"
+    user          = "guest"
diff --git a/seaweedfs/config/master.toml b/seaweedfs/config/master.toml
new file mode 100644
index 000000000..4e24ccc80
--- /dev/null
+++ b/seaweedfs/config/master.toml
@@ -0,0 +1,64 @@
+# Put this file to one of the location, with descending priority
+# Based on 'docker compose -p seaweedfs exec -it sds-gateway-local-sfs-master weed scaffold -config master'
+#    ./master.toml
+#    $HOME/.seaweedfs/master.toml
+#    /etc/seaweedfs/master.toml
+# this file is read by master
+
+[master.maintenance]
+    # periodically run these scripts are the same as running them from 'weed shell'
+    # Scripts are skipped while an admin server is connected.
+    scripts = """
+  lock
+  ec.encode -fullPercent=95 -quietFor=1h
+  ec.rebuild -apply
+  ec.balance -apply
+  fs.log.purge -daysAgo=7
+  volume.deleteEmpty -quietFor=24h -apply
+  volume.balance -apply
+  volume.fix.replication -apply
+  s3.clean.uploads -timeAgo=24h
+  unlock
+"""
+    sleep_minutes = 17 # sleep minutes between each script execution
+
+
+[master.sequencer]
+    type = "raft" # Choose [raft|snowflake] type for storing the file id sequence
+    # when sequencer.type = snowflake, the snowflake id must be different from other masters
+    sequencer_snowflake_id = 0 # any number between 1~1023
+
+
+    # configurations for tiered cloud storage
+    # old volumes are transparently moved to cloud for cost efficiency
+    # [storage.backend]
+    #     [storage.backend.s3.default]
+    #         aws_access_key_id     = ""                 # if empty, loads from the shared credentials file (~/.aws/credentials).
+    #         aws_secret_access_key = ""                 # if empty, loads from the shared credentials file (~/.aws/credentials).
+    #         bucket                = "your_bucket_name" # an existing bucket
+    #         enabled               = false
+    #         endpoint              = ""
+    #         region                = "us-east-2"
+    #         storage_class         = "STANDARD_IA"
+
+# create this number of logical volumes if no more writable volumes
+# count_x means how many copies of data.
+# e.g.:
+#   000 has only one copy, copy_1
+#   010 and 001 has two copies, copy_2
+#   011 has only 3 copies, copy_3
+[master.volume_growth]
+    copy_1     = 7     # create 1 x 7 = 7 actual volumes
+    copy_2     = 6     # create 2 x 6 = 12 actual volumes
+    copy_3     = 3     # create 3 x 3 = 9 actual volumes
+    copy_other = 1     # create n x 1 = n actual volumes
+    disable    = false # disables volume growth if true
+    threshold  = 0.9   # create threshold
+
+# configuration flags for replication
+[master.replication]
+    # any replication counts should be considered minimums. If you specify 010 and
+    # have 3 different racks, that's still considered writable. Writes will still
+    # try to replicate to all available volumes. You should only use this option
+    # if you are doing your own replication or periodic sync of volumes.
+    treat_replication_as_minimums = false
diff --git a/seaweedfs/config/notification.toml b/seaweedfs/config/notification.toml
new file mode 100644
index 000000000..af869abaa
--- /dev/null
+++ b/seaweedfs/config/notification.toml
@@ -0,0 +1,70 @@
+# A sample TOML config file for SeaweedFS filer store
+# Based on 'docker compose -p seaweedfs exec -it sds-gateway-local-sfs-master weed scaffold -config notification'
+# Used by both "weed filer" or "weed server -filer" and "weed filer.replicate"
+# Put this file to one of the location, with descending priority
+#    ./notification.toml
+#    $HOME/.seaweedfs/notification.toml
+#    /etc/seaweedfs/notification.toml
+
+####################################################
+# notification
+# send and receive filer updates for each file to an external message queue
+####################################################
+[notification.log]
+    # this is only for debugging purpose and does not work with "weed filer.replicate"
+    enabled = false
+
+
+[notification.kafka]
+    enabled                   = false
+    hosts                     = ["localhost:9092"]
+    offsetFile                = "./last.offset"
+    offsetSaveIntervalSeconds = 10
+    topic                     = "seaweedfs_filer"
+
+
+[notification.aws_sqs]
+    # experimental, let me know if it works
+    aws_access_key_id     = ""               # if empty, loads from the shared credentials file (~/.aws/credentials).
+    aws_secret_access_key = ""               # if empty, loads from the shared credentials file (~/.aws/credentials).
+    enabled               = false
+    region                = "us-east-2"
+    sqs_queue_name        = "my_filer_queue" # an existing queue name
+
+
+[notification.google_pub_sub]
+    # read credentials doc at https://cloud.google.com/docs/authentication/getting-started
+    enabled                        = false
+    google_application_credentials = "/path/to/x.json"       # path to json credential file
+    project_id                     = ""                      # an existing project id
+    topic                          = "seaweedfs_filer_topic" # a topic, auto created if does not exists
+
+[notification.gocdk_pub_sub]
+    # The Go Cloud Development Kit (https://gocloud.dev).
+    # PubSub API (https://godoc.org/gocloud.dev/pubsub).
+    # Supports AWS SNS/SQS, Azure Service Bus, Google PubSub, NATS and RabbitMQ.
+    enabled = false
+    # This URL will Dial the RabbitMQ server at the URL in the environment
+    # variable RABBIT_SERVER_URL and open the exchange "myexchange".
+    # The exchange must have already been created by some other means, like
+    # the RabbitMQ management plugin. Сreate myexchange of type fanout and myqueue then
+    # create binding myexchange => myqueue
+    sub_url   = "rabbit://myqueue"
+    topic_url = "rabbit://myexchange"
+
+[notification.webhook]
+    # Send file system events to HTTP webhook endpoints (push model)
+    # BEST FOR: Low to moderate traffic (< 100 events/second sustained)
+    # FOR HIGH TRAFFIC: Consider using Kafka, SQS, or pull-based event logs instead
+    # Documentation: https://github.com/seaweedfs/seaweedfs/wiki/Filer-Notification-Webhook
+    backoff_seconds     = 3                                 # optional: initial backoff delay (default: 3, range: 1-60)
+    bearer_token        = ""                                # optional: bearer token for authentication
+    buffer_size         = 10000                             # optional: event buffer size (default: 10000, range: 100-1000000)
+    enabled             = false
+    endpoint            = "https://your-server.com/webhook" # required: HTTP endpoint URL
+    max_backoff_seconds = 30                                # optional: max backoff delay (default: 30, range: backoff_seconds-300)
+    max_retries         = 3                                 # optional: retry attempts (default: 3, range: 0-10)
+    timeout_seconds     = 10                                # optional: HTTP timeout (default: 10, range: 1-300)
+    workers             = 5                                 # optional: concurrent workers (default: 5, range: 1-100)
+    # event_types = ["create", "update", "delete", "rename"]  # optional: filter by event types (default: all)
+    # path_prefixes = ["/important", "/data"]          # optional: filter by path prefixes (default: all)
diff --git a/seaweedfs/config/replication.toml b/seaweedfs/config/replication.toml
new file mode 100644
index 000000000..fb827636a
--- /dev/null
+++ b/seaweedfs/config/replication.toml
@@ -0,0 +1,75 @@
+# A sample TOML config file for replicating SeaweedFS filer
+# Based on 'docker compose -p seaweedfs exec -it sds-gateway-local-sfs-master weed scaffold -config replication'
+# Used with "weed filer.backup"
+# Using with "weed filer.replicate" is deprecated.
+# Put this file to one of the location, with descending priority
+#    ./replication.toml
+#    $HOME/.seaweedfs/replication.toml
+#    /etc/seaweedfs/replication.toml
+
+# [source.filer] # deprecated. Only useful with "weed filer.replicate"
+#     enabled     = true
+#     grpcAddress = "localhost:18888"
+#     # all files under this directory tree are replicated.
+#     # this is not a directory on your hard drive, but on your filer.
+#     # i.e., all files with this "prefix" are sent to notification message queue.
+#     directory = "/buckets"
+#     # files from the directory separated by space are excluded from sending notifications
+#     excludeDirectories = "/buckets/tmp"
+
+[sink.local]
+    directory = "/data"
+    enabled   = false
+    # all replicated files are under modified time as yyyy-mm-dd directories
+    # so each date directory contains all new and updated files.
+    is_incremental = false
+
+[sink.filer]
+    enabled     = false
+    grpcAddress = "localhost:18888"
+    # all replicated files are under this directory tree
+    # this is not a directory on your hard drive, but on your filer.
+    # i.e., all received files will be "prefixed" to this directory.
+    collection     = ""
+    directory      = "/backup"
+    is_incremental = false
+    replication    = ""
+    ttlSec         = 0
+
+    [sink.s3]
+        # read credentials doc at https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/sessions.html
+        # default loads credentials from the shared credentials file (~/.aws/credentials).
+        aws_access_key_id     = "${MINIO_BACKUP_ACCESS_KEY}"  # if empty, loads from the shared credentials file (~/.aws/credentials).
+        aws_secret_access_key = "${MINIO_BACKUP_SECRET_KEY}"  # if empty, loads from the shared credentials file (~/.aws/credentials).
+        bucket                = "spectrumx"                   # an existing bucket in MinIO
+        directory             = "/spectrumx"                  # prefix inside the bucket
+        enabled               = true
+        endpoint              = "https://minio.example.com"   # your MinIO endpoint URL
+        is_incremental        = false
+        region                = "us-east-1"                   # can be anything for MinIO
+
+    # [sink.google_cloud_storage]
+    #     # read credentials doc at https://cloud.google.com/docs/authentication/getting-started
+    #     bucket                         = "spectrumx"        # an existing bucket
+    #     directory                      = "/"                # destination directory
+    #     enabled                        = false
+    #     google_application_credentials = "/path/to/x.json"  # path to json credential file
+    #     is_incremental                 = false
+
+    # [sink.azure]
+    #     # experimental, let me know if it works
+    #     account_key    = ""
+    #     account_name   = ""
+    #     container      = "mycontainer" # an existing container
+    #     directory      = "/"           # destination directory
+    #     enabled        = false
+    #     is_incremental = false
+
+    # [sink.backblaze]
+    #     b2_account_id             = ""
+    #     b2_master_application_key = ""
+    #     b2_region                 = ""
+    #     bucket                    = "mybucket" # an existing bucket
+    #     directory                 = "/"        # destination directory
+    #     enabled                   = false
+    #     is_incremental            = false
diff --git a/seaweedfs/config/s3-config.json b/seaweedfs/config/s3-config.json
new file mode 100644
index 000000000..5de1f4fae
--- /dev/null
+++ b/seaweedfs/config/s3-config.json
@@ -0,0 +1,24 @@
+{
+	"identities": [
+		{
+			"name": "admin",
+			"credentials": [
+				{
+					"accessKey": "admin-access-key",
+					"secretKey": "admin-secret-key"
+				}
+			],
+			"actions": ["Admin", "Read", "Write", "List", "Tagging"]
+		},
+		{
+			"name": "backup-user",
+			"credentials": [
+				{
+					"accessKey": "backup-access-key",
+					"secretKey": "backup-secret-key"
+				}
+			],
+			"actions": ["Read", "List"]
+		}
+	]
+}
diff --git a/seaweedfs/config/security.toml b/seaweedfs/config/security.toml
new file mode 100644
index 000000000..8f2f8ab67
--- /dev/null
+++ b/seaweedfs/config/security.toml
@@ -0,0 +1,174 @@
+# Put this file to one of the location, with descending priority
+# Based on 'docker compose -p seaweedfs exec -it sds-gateway-local-sfs-master weed scaffold -config security'
+#    ./security.toml
+#    $HOME/.seaweedfs/security.toml
+#    /etc/seaweedfs/security.toml
+# this file is read by master, volume server, filer, and worker
+
+# comma separated origins allowed to make requests to the filer and s3 gateway.
+# enter in this format: https://domain.com, or http://localhost:port
+[cors.allowed_origins]
+    values = "*"
+
+# this jwt signing key is read by master and volume server, and it is used for write operations:
+# - the Master server generates the JWT, which can be used to write a certain file on a volume server
+# - the Volume server validates the JWT on writing
+# the jwt defaults to expire after 10 seconds.
+# PRODUCTION: Set via WEED_JWT_SIGNING_KEY env var in compose (overrides this empty value).
+[jwt.signing]
+    expires_after_seconds = 10 # seconds
+    key                   = ""
+
+# by default, if the signing key above is set, the Volume UI over HTTP is disabled.
+# by setting ui.access to true, you can re-enable the Volume UI. Despite
+# some information leakage (as the UI is not authenticated), this should not
+# pose a security risk.
+[access]
+    ui = false
+
+# by default the filer UI is enabled. This can be a security risk if the filer is exposed to the public
+# and the JWT for reads is not set. If you don't want the public to have access to the objects in your
+# storage, and you haven't set the JWT for reads it is wise to disable access to directory metadata.
+# This disables access to the Filer UI, and will no longer return directory metadata in GET requests.
+[filer.expose_directory_metadata]
+    enabled = true
+
+    # this jwt signing key is read by master and volume server, and it is used for read operations:
+    # - the Master server generates the JWT, which can be used to read a certain file on a volume server
+    # - the Volume server validates the JWT on reading
+    # NOTE: jwt for read is only supported with master+volume setup. Filer does not support this mode.
+    # Not set for production read auth — gRPC traffic stays within Docker network.
+    [jwt.signing.read]
+        expires_after_seconds = 10 # seconds
+        key                   = ""
+
+
+# If this JWT key is configured, Filer only accepts writes over HTTP if they are signed with this JWT:
+# - f.e. the S3 API Shim generates the JWT
+# - the Filer server validates the JWT on writing
+# NOTE: This key is ALSO used as a fallback signing key for S3 STS if s3.iam.config does not specify a signingKey.
+# the jwt defaults to expire after 10 seconds.
+# PRODUCTION: Set via WEED_JWT_FILER_SIGNING_KEY env var in compose (overrides this empty value).
+[jwt.filer_signing]
+    expires_after_seconds = 10 # seconds
+    key                   = ""
+
+    # If this JWT key is configured, Filer only accepts reads over HTTP if they are signed with this JWT:
+    # - f.e. the S3 API Shim generates the JWT
+    # - the Filer server validates the JWT on reading
+    # the jwt defaults to expire after 10 seconds.
+    [jwt.filer_signing.read]
+        expires_after_seconds = 10 # seconds
+        key                   = ""
+
+# gRPC mTLS configuration
+# All gRPC TLS authentications are mutual (mTLS)
+# The values for ca, cert, and key are paths to the certificate/key files
+# The host name is not checked, so the certificate files can be shared
+[grpc]
+    ca = ""
+    # Set wildcard domain for enable TLS authentication by common names
+    allowed_wildcard_domain = "" # .mycompany.com
+
+    # Volume server gRPC options (server-side)
+    # Enables mTLS for incoming gRPC connections to volume server
+    [grpc.volume]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    # Master server gRPC options (server-side)
+    # Enables mTLS for incoming gRPC connections to master server
+    [grpc.master]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    # Filer server gRPC options (server-side)
+    # Enables mTLS for incoming gRPC connections to filer server
+    [grpc.filer]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    # S3 server gRPC options (server-side)
+    # Enables mTLS for incoming gRPC connections to S3 server
+    [grpc.s3]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    [grpc.msg_broker]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    [grpc.msg_agent]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    [grpc.admin]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    [grpc.worker]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    [grpc.mq]
+        allowed_commonNames = "" # comma-separated SSL certificate common names
+        cert                = ""
+        key                 = ""
+
+    # gRPC client configuration for outgoing gRPC connections
+    # Used by clients (S3, mount, backup, benchmark, filer.copy, filer.replicate, upload, etc.)
+    # when connecting to any gRPC server (master, volume, filer)
+    [grpc.client]
+        cert = ""
+        key  = ""
+
+# HTTPS client configuration for outgoing HTTP connections
+# Used by S3, mount, filer.copy, backup, and other clients when communicating with master/volume/filer
+# Set enabled=true to use HTTPS instead of HTTP for data operations (separate from gRPC)
+# If [https.filer] or [https.volume] are enabled on servers, clients must have [https.client] enabled=true
+[https.client]
+    ca      = ""    # CA certificate to verify server certificates (required when enabled=true)
+    cert    = ""    # Client certificate for mTLS (optional if server doesn't require client cert)
+    enabled = false # Set to true to enable HTTPS for all outgoing HTTP client connections
+    key     = ""    # Client key for mTLS (optional if server doesn't require client cert)
+
+# Volume server HTTPS options (server-side)
+# Enables HTTPS for incoming HTTP connections to volume server
+[https.volume]
+    ca   = ""
+    cert = ""
+    key  = ""
+
+# Master server HTTPS options (server-side)
+# Enables HTTPS for incoming HTTP connections to master server (web UI, HTTP API)
+[https.master]
+    ca   = ""
+    cert = ""
+    key  = ""
+
+# Filer server HTTPS options (server-side)
+# Enables HTTPS for incoming HTTP connections to filer server (web UI, HTTP API)
+[https.filer]
+    ca   = ""
+    cert = ""
+    key  = ""
+    # disable_tls_verify_client_cert = true|false (default: false)
+
+# Admin server HTTPS options (server-side)
+# Enables HTTPS for incoming HTTP connections to admin server
+[https.admin]
+    ca   = ""
+    cert = ""
+    key  = ""
+
+# white list. It's checking request ip address.
+[guard]
+    white_list = ""
diff --git a/seaweedfs/config/shell.toml b/seaweedfs/config/shell.toml
new file mode 100644
index 000000000..701519c95
--- /dev/null
+++ b/seaweedfs/config/shell.toml
@@ -0,0 +1,11 @@
+# A sample TOML config file for SeaweedFS cluster
+# Based on 'docker compose -p seaweedfs exec -it sds-gateway-local-sfs-master weed scaffold -config shell'
+
+[cluster]
+    default = "c1"
+
+    [cluster.c1]
+        master = "localhost:9333" # comma-separated master servers
+
+    [cluster.c2]
+        master = ""
diff --git a/seaweedfs/data/filer/.gitkeep b/seaweedfs/data/filer/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/seaweedfs/data/volumes/.gitkeep b/seaweedfs/data/volumes/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/seaweedfs/docs/.gitignore b/seaweedfs/docs/.gitignore
new file mode 100644
index 000000000..15261f1e3
--- /dev/null
+++ b/seaweedfs/docs/.gitignore
@@ -0,0 +1 @@
+sfs-wiki
diff --git a/seaweedfs/docs/operations.md b/seaweedfs/docs/operations.md
new file mode 100644
index 000000000..7f2cd5be4
--- /dev/null
+++ b/seaweedfs/docs/operations.md
@@ -0,0 +1,496 @@
+# SeaweedFS Operations Guide
+
+Reference guide for managing this deployment. All commands target the Docker Compose
+stack defined in `compose.yaml`.
+
++ [SeaweedFS Operations Guide](#seaweedfs-operations-guide)
+    + [Architecture](#architecture)
+        + [Data flow](#data-flow)
+    + [Deployment](#deployment)
+        + [Data directory ownership](#data-directory-ownership)
+        + [Standard compose commands](#standard-compose-commands)
+        + [Full teardown (destroy all data)](#full-teardown-destroy-all-data)
+        + [View logs](#view-logs)
+    + [Web UIs](#web-uis)
+    + [S3 API](#s3-api)
+        + [Create or find S3 credentials (required)](#create-or-find-s3-credentials-required)
+        + [AWS CLI setup](#aws-cli-setup)
+            + [Common operations with AWS CLI](#common-operations-with-aws-cli)
+        + [MinIO client setup](#minio-client-setup)
+            + [Common operations with MinIO client](#common-operations-with-minio-client)
+    + [Filer HTTP API](#filer-http-api)
+    + [Maintenance](#maintenance)
+        + [Open the admin shell](#open-the-admin-shell)
+        + [Garbage collection (reclaim space from deleted files)](#garbage-collection-reclaim-space-from-deleted-files)
+        + [Delete empty / orphaned volumes](#delete-empty--orphaned-volumes)
+        + [Check volume filesystem integrity](#check-volume-filesystem-integrity)
+        + [Fix replication](#fix-replication)
+        + [Balance volume distribution across servers](#balance-volume-distribution-across-servers)
+    + [Backup and Restore](#backup-and-restore)
+        + [Save filer metadata to a file](#save-filer-metadata-to-a-file)
+        + [Restore filer metadata from a file](#restore-filer-metadata-from-a-file)
+        + [Backup volume data incrementally](#backup-volume-data-incrementally)
+    + [Troubleshooting](#troubleshooting)
+        + [Filer metadata not persisting after restart](#filer-metadata-not-persisting-after-restart)
+        + [Disk space used but files not visible](#disk-space-used-but-files-not-visible)
+        + [Volume server not registering with master](#volume-server-not-registering-with-master)
+        + [No free volumes error](#no-free-volumes-error)
+
+## Architecture
+
+> For production, replace `local` with `prod`, matching the Gateway's compose file.
+
+| Component  | Container                          | Default Port | Purpose                              |
+| ---------- | ---------------------------------- | ------------ | ------------------------------------ |
+| Master     | `sds-gateway-local-sfs-master`     | 9333         | Cluster coordination, volume routing |
+| Volume     | `sds-gateway-local-sfs-volume`     | 8080         | Raw file chunk storage               |
+| Filer      | `sds-gateway-local-sfs-filer`      | 8888         | Metadata + path-based file access    |
+| S3 Gateway | `sds-gateway-local-sfs-s3`         | 8333         | AWS S3-compatible API                |
+| WebDAV     | `sds-gateway-local-sfs-webdav`     | 7333         | WebDAV mount access                  |
+| Prometheus | `sds-gateway-local-sfs-prometheus` | 9000         | Metrics scraping                     |
+
+### Data flow
+
+```text
+Client → S3/WebDAV/Filer HTTP → Filer (metadata in /data/filer/filerldb2)
+                                      ↓
+                               Volume Server (chunks in ./data/volumes)
+```
+
+The **Filer** stores only metadata (file paths, sizes, chunk IDs). The **Volume Server**
+stores the actual bytes. Both must persist across restarts — see the `volumes` section
+in `compose.yaml`.
+
+---
+
+## Deployment
+
+> [!TIP] Assign `alias dc='docker compose'` for convenience; then run e.g. `dc logs -f`
+> instead of `docker compose logs -f`.
+
+### Data directory ownership
+
+```bash
+sudo chown -R 1000:1000 data/
+# otherwise, match UID and GID used in compose.yaml
+```
+
+### Standard compose commands
+
+```bash
+cd seaweedfs/
+docker compose build
+docker compose up -d
+docker compose down
+docker compose restart sds-gateway-local-sfs-filer
+docker compose ps
+```
+
+If the alias is set, you can run a one-liner:
+
+```bash
+cd seaweedfs/
+dc pull --ignore-buildable; dc build && dc up -d && dc ps && dc logs -f
+```
+
+### Full teardown (destroy all data)
+
+```bash
+docker compose down -v
+rm -rf data/volumes/* data/filer/*
+```
+
+### View logs
+
+```bash
+# all services
+docker compose logs -f
+
+# single service
+docker compose logs -f sds-gateway-local-sfs-filer
+```
+
+---
+
+## Web UIs
+
+| UI                    | URL                                   |
+| --------------------- | ------------------------------------- |
+| Master cluster status | <http://localhost:9333>               |
+| Volume server status  | <http://localhost:8080/ui/index.html> |
+| Filer browser         | <http://localhost:8888>               |
+| Prometheus targets    | <http://localhost:9000/targets>       |
+
+---
+
+## S3 API
+
+The S3 gateway is compatible with the AWS CLI and any S3 SDK. The MinIO client also
+works, if migrating from that.
+
+### Create or find S3 credentials (required)
+
+This deployment stores S3 identities in SeaweedFS (not in `compose.yaml`).
+
++ Credential backend is configured in `config/credential.toml`.
++ In this repo, `[credential.filer_etc] enabled = true`, so identities are persisted in the filer store.
+
+Create a known admin key pair (recommended if you are unsure which keys exist):
+
+```bash
+export S3_ENDPOINT=http://localhost:8333
+export S3_USER=admin
+export S3_ACCESS_KEY=seaweed-sds-main
+export S3_SECRET_KEY=$(LC_ALL=C tr -dc 'A-Za-z0-9' </dev/urandom | head -c 32)
+
+# create/update credentials via weed shell
+echo "s3.configure -apply -user ${S3_USER} -access_key ${S3_ACCESS_KEY} -secret_key ${S3_SECRET_KEY} -actions Admin" \
+    | docker exec -i sds-gateway-local-sfs-master weed shell -master=localhost:9333
+```
+
+Verify credentials immediately:
+
+```bash
+AWS_ACCESS_KEY_ID="${S3_ACCESS_KEY}" AWS_SECRET_ACCESS_KEY="${S3_SECRET_KEY}" \
+    aws --endpoint-url "${S3_ENDPOINT}" s3 ls
+```
+
+If you already have working admin credentials, you can inspect users and access key IDs:
+
+```bash
+export AWS_ENDPOINT="${S3_ENDPOINT}"
+export AWS_ACCESS_KEY_ID="${S3_ACCESS_KEY}"
+export AWS_SECRET_ACCESS_KEY="${S3_SECRET_KEY}"
+
+aws --endpoint "${AWS_ENDPOINT}" iam list-users
+aws --endpoint "${AWS_ENDPOINT}" iam list-access-keys --user-name admin
+```
+
+> [!IMPORTANT]
+> Access key IDs can be listed later, but secret keys cannot be recovered in plain text.
+> If a secret is unknown, create/rotate credentials with `s3.configure` or IAM APIs.
+
+### AWS CLI setup
+
+```bash
+aws configure set aws_access_key_id "${S3_ACCESS_KEY}"
+aws configure set aws_secret_access_key "${S3_SECRET_KEY}"
+aws configure set default.region us-east-1
+aws configure set default.s3.signature_version s3v4
+
+export S3="${S3_ENDPOINT}"
+```
+
+#### Common operations with AWS CLI
+
+```bash
+# list buckets
+aws --endpoint-url "${S3}" s3 ls
+
+# create a bucket
+aws --endpoint-url "${S3}" s3 mb s3://my-bucket
+
+# upload a file
+aws --endpoint-url "${S3}" s3 cp local-file.txt s3://my-bucket/
+
+# list bucket contents
+aws --endpoint-url "${S3}" s3 ls s3://my-bucket
+
+# download a file
+aws --endpoint-url "${S3}" s3 cp s3://my-bucket/file.txt .
+
+# delete a file
+aws --endpoint-url "${S3}" s3 rm s3://my-bucket/file.txt
+
+# delete a bucket (must be empty)
+aws --endpoint-url "${S3}" s3 rb s3://my-bucket
+
+# sync a local directory to a bucket
+aws --endpoint-url "${S3}" s3 sync ./local-dir s3://my-bucket/prefix/
+```
+
+### MinIO client setup
+
+Installing `mc` CLI:
+
+```bash
+MINIO_INSTALL_DIR="${XDG_DATA_HOME:-${HOME}/.local/share}/mc"
+mkdir -p "${MINIO_INSTALL_DIR}"
+ls -alh "${MINIO_INSTALL_DIR}"
+curl --progress-bar -L https://dl.min.io/aistor/mc/release/linux-amd64/mc \
+    -o "${MINIO_INSTALL_DIR}/mc" \
+    && chmod +x "${MINIO_INSTALL_DIR}/mc"
+ln -s "${MINIO_INSTALL_DIR}/mc" "${HOME}/.local/bin/mc"
+```
+
+Bootstrap credentials for `mc` (run once if you do not already have a working key):
+
+```bash
+echo "s3.configure -apply -user ${S3_USER} -access_key ${S3_ACCESS_KEY} -secret_key ${S3_SECRET_KEY} -actions Admin" \
+    | docker exec -i sds-gateway-local-sfs-master weed shell -master=localhost:9333
+```
+
+Usage:
+
+```bash
+# install (choose one)
+# macOS:   brew install minio/stable/mc
+# linux:   https://min.io/docs/minio/linux/reference/minio-mc.html
+
+# configure an alias pointing to SeaweedFS S3 gateway
+mc alias set sfs "${S3_ENDPOINT}" "${S3_ACCESS_KEY}" "${S3_SECRET_KEY}" --api S3v4
+# Added `sfs` successfully.
+
+# verify alias
+mc alias ls
+# ...
+# sfs
+#   URL       : http://localhost:8333
+#   AccessKey : <access_key>
+#   SecretKey : <secret_key>
+#   API       : S3v4
+#   Path      : auto
+#   Src       : /home/user/.mc/config.json
+```
+
+Optional: temporary shell-only setup (no local alias file written):
+
+```bash
+export MC_HOST_sfs="http://${S3_ACCESS_KEY}:${S3_SECRET_KEY}@${S3_ENDPOINT#*://}"
+mc ls sfs
+```
+
+#### Common operations with MinIO client
+
+```bash
+# list buckets
+mc ls sfs
+
+# create a bucket
+mc mb sfs/main
+
+# upload a file
+mc cp docs/readme.md sfs/main/
+
+# list bucket contents
+mc ls sfs/main
+
+# download a file
+mc cp sfs/main/readme.md .
+
+# delete a file
+mc rm sfs/main/readme.md
+
+# delete a bucket (must be empty)
+mc rb sfs/main
+
+# sync a local directory to a bucket prefix
+mc mirror ./docs sfs/main/docs && mc ls sfs/main/docs
+# or more dangerously, include --overwrite:
+# mc mirror --overwrite ./docs sfs/main/docs
+
+# access it via the file browser (opens a browser)
+xdg-open http://localhost:8888/buckets/main/docs/
+```
+
+---
+
+## Filer HTTP API
+
+```bash
+# upload a file
+curl -F file=@report.pdf "http://localhost:8888/path/to/dir/"
+
+# upload with a specific name
+curl -F file=@report.pdf "http://localhost:8888/path/to/dir/renamed.pdf"
+
+# download
+curl "http://localhost:8888/path/to/dir/renamed.pdf" -o renamed.pdf
+
+# list directory (JSON)
+curl -H "Accept: application/json" "http://localhost:8888/path/to/dir/?pretty=y"
+
+# delete a file
+curl -X DELETE "http://localhost:8888/path/to/dir/renamed.pdf"
+
+# server-side copy (no client data transfer)
+curl -X POST "http://localhost:8888/dest/dir/?cp.from=/source/path/file.pdf"
+```
+
+---
+
+## Maintenance
+
+### Open the admin shell
+
+All maintenance operations go through `weed shell`.
+
+> [!IMPORTANT] Always `unlock` before exiting.
+
+```bash
+docker exec -it sds-gateway-local-sfs-master weed shell -master=localhost:9333
+```
+
+### Garbage collection (reclaim space from deleted files)
+
+Deleted file chunks are not immediately removed. Run vacuum to compact volumes and free
+disk space. The master also runs this automatically every 15 minutes when free space
+exceeds 30%.
+
+```bash
+# trigger immediately via HTTP (no shell needed)
+curl "http://localhost:9333/vol/vacuum"
+
+# or with a custom threshold (40% free space to trigger)
+curl "http://localhost:9333/vol/vacuum?garbageThreshold=0.4"
+```
+
+### Delete empty / orphaned volumes
+
+Volumes that contain no live data (e.g. left over from previous runs with missing
+metadata) can be removed. Run inside `weed shell`:
+
+```bash
+lock
+volume.deleteEmpty -quietFor=24h -apply
+unlock
+```
+
+`-quietFor=24h` skips volumes that have been written to within the last 24 hours, to
+avoid racing with active writes.
+
+### Check volume filesystem integrity
+
+```bash
+lock
+volume.fsck -findMissingChunks
+unlock
+```
+
+### Fix replication
+
+```bash
+lock
+volume.fix.replication -apply
+unlock
+```
+
+### Balance volume distribution across servers
+
+```bash
+lock
+volume.balance -apply
+unlock
+```
+
+---
+
+## Backup and Restore
+
+### Save filer metadata to a file
+
+Run inside `weed shell` on the source cluster:
+
+```bash
+lock
+fs.cd /
+fs.meta.save -o /tmp/filer-backup.meta
+unlock
+```
+
+Then copy it out:
+
+```bash
+docker cp sds-gateway-local-sfs-filer:/tmp/filer-backup.meta ./filer-backup.meta
+```
+
+### Restore filer metadata from a file
+
+```bash
+docker cp ./filer-backup.meta sds-gateway-local-sfs-filer:/tmp/filer-backup.meta
+```
+
+Then inside `weed shell`:
+
+```bash
+fs.meta.load /tmp/filer-backup.meta
+```
+
+### Backup volume data incrementally
+
+Run on any machine with enough disk space. SeaweedFS fetches only the delta since the
+last backup.
+
+```bash
+weed backup -server=localhost:9333 -dir=/backup/volumes -volumeId=1
+```
+
+Loop over all known volume IDs in a script — non-existent IDs are a no-op, so iterating
+`1..N` is safe.
+
+---
+
+## Troubleshooting
+
+### Filer metadata not persisting after restart
+
+Verify the filer process is writing to the bind-mounted path:
+
+```bash
+docker exec sds-gateway-local-sfs-filer find / -maxdepth 4 -name "filerldb2" -type d 2>/dev/null
+# Expected: /data/filer/filerldb2
+
+docker exec sds-gateway-local-sfs-filer ls /data/filer/
+# Expected: filerldb2/
+```
+
+If `filerldb2` appears outside `/data/filer/`, the `dir` setting in `config/filer.toml`
+is wrong. It must use an absolute path that falls inside the volume mount:
+
+```toml
+[leveldb2]
+    dir     = "/data/filer/filerldb2"
+    enabled = true
+```
+
+### Disk space used but files not visible
+
+This means orphaned volume chunks exist without filer metadata (e.g. the filer metadata
+was lost in a previous session). The data is unrecoverable. Reclaim the space with:
+
+```bash
+# inside weed shell
+lock
+volume.deleteEmpty -quietFor=24h -apply
+unlock
+```
+
+Or wipe `data/volumes/` entirely if you have no data to preserve.
+
+### Volume server not registering with master
+
+Check the master address in `compose.yaml` matches the master container name and port.
+The filer and volume services must be able to reach the master by its container name on
+the internal Docker network.
+
+```bash
+docker exec sds-gateway-local-sfs-volume ping sds-gateway-local-sfs-master
+```
+
+### No free volumes error
+
+The default setup creates 8 volumes of 30 GB each. If you need more (e.g. many S3
+buckets each use their own collection):
+
+```bash
+# pre-allocate 4 more volumes
+curl "http://localhost:9333/vol/grow?count=4"
+```
+
+Or reduce the volume size limit in the master command to allow more volumes from the
+same disk budget (requires restart):
+
+```bash
+# in compose.yaml master command, add:
+-volumeSizeLimitMB=1024
+```
diff --git a/seaweedfs/docs/readme.md b/seaweedfs/docs/readme.md
new file mode 100644
index 000000000..222951f6c
--- /dev/null
+++ b/seaweedfs/docs/readme.md
@@ -0,0 +1,17 @@
+# SeaweedFS integration docs
+
+SeaweedFS is a distributed file system that can be used as a storage backend for SPX.
+This document provides instructions on how to set up and integrate SeaweedFS with the
+SpectrumX Data System.
+
+## Documentation pages
+
++ [Operations Guide](./operations.md)
+
+## Additional docs
+
+Pull the latest SeaweedFS documentation locally:
+
+```bash
+git clone https://github.com/seaweedfs/seaweedfs.wiki.git sfs-wiki
+```
diff --git a/seaweedfs/docs/sfs-deployment-checklist.md b/seaweedfs/docs/sfs-deployment-checklist.md
new file mode 100644
index 000000000..ace061132
--- /dev/null
+++ b/seaweedfs/docs/sfs-deployment-checklist.md
@@ -0,0 +1,1261 @@
+# SeaweedFS Production Deployment Checklist
+
+- [SeaweedFS Production Deployment Checklist](#seaweedfs-production-deployment-checklist)
+    - [Infrastructure \& Pre-Deployment](#infrastructure--pre-deployment)
+        - [Single-Server, All-in-One with 5 XFS Drives](#single-server-all-in-one-with-5-xfs-drives)
+        - [0. Pre-Deployment Decisions](#0-pre-deployment-decisions)
+            - [EC Design Note](#ec-design-note)
+        - [1. OS \& Filesystem Preparation](#1-os--filesystem-preparation)
+            - [1a. Identify Drives (Both Tracks)](#1a-identify-drives-both-tracks)
+            - [1b. Track A — Fresh Drives (Empty, Can Be Formatted)](#1b-track-a--fresh-drives-empty-can-be-formatted)
+            - [1c. Track B — Existing Drives (Already Have Data, Cannot Reformat)](#1c-track-b--existing-drives-already-have-data-cannot-reformat)
+            - [1d. Set Mount Options Persistently (Both Tracks)](#1d-set-mount-options-persistently-both-tracks)
+                - [Why XFS Settings Matter](#why-xfs-settings-matter)
+    - [Core Service Configuration](#core-service-configuration)
+        - [2. Security Configuration](#2-security-configuration)
+            - [Why JWT Security Matters](#why-jwt-security-matters)
+            - [gRPC mTLS Note](#grpc-mtls-note)
+        - [3. Docker Compose Configuration](#3-docker-compose-configuration)
+            - [Why 5 Separate Volume Servers Instead of One With 5 Dirs](#why-5-separate-volume-servers-instead-of-one-with-5-dirs)
+            - [Why `-index=leveldb`](#why--indexleveldb)
+        - [4. S3 API Setup](#4-s3-api-setup)
+            - [S3 Encryption Note](#s3-encryption-note)
+    - [Operations \& Maintenance](#operations--maintenance)
+        - [5. Monitoring — Prometheus + Grafana](#5-monitoring--prometheus--grafana)
+            - [Push vs Pull Metrics](#push-vs-pull-metrics)
+        - [6. Backup to MinIO via Async Filer Backup](#6-backup-to-minio-via-async-filer-backup)
+            - [How Async Backup Works](#how-async-backup-works)
+            - [Alternative: Volume-Level Backup](#alternative-volume-level-backup)
+        - [7. Startup \& Verification](#7-startup--verification)
+            - [Smoke Test: Drive Failure Scenario](#smoke-test-drive-failure-scenario)
+        - [8. Volume Growth Tuning](#8-volume-growth-tuning)
+        - [9. Maintenance Plan](#9-maintenance-plan)
+            - [Daily / Automated](#daily--automated)
+            - [Weekly](#weekly)
+            - [Monthly](#monthly)
+            - [Erasure Coding (Always Active)](#erasure-coding-always-active)
+            - [Drive Replacement Procedure](#drive-replacement-procedure)
+    - [Appendices](#appendices)
+        - [Appendix A: Volume Size Calculation](#appendix-a-volume-size-calculation)
+        - [Appendix B: Port Reference](#appendix-b-port-reference)
+        - [Appendix C: Recommended Environment `.env` File](#appendix-c-recommended-environment-env-file)
+
+## Infrastructure & Pre-Deployment
+
+### Single-Server, All-in-One with 5 XFS Drives
+
+---
+
+### 0. Pre-Deployment Decisions
+
+Answers to scoping questions gathered before writing this checklist:
+
+| Question               | Decision                                  | Rationale                                                                                                                                      |
+| ---------------------- | ----------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
+| Topology               | Single server, all-in-one                 | One machine runs master, volume servers, filer, S3, monitoring                                                                                 |
+| Master HA              | Single master                             | Acceptable for single-node; master load is light; restartable                                                                                  |
+| Filer store            | leveldb2 (embedded)                       | Simplest for single filer; no external dependency                                                                                              |
+| Data durability        | Erasure Coding (RS 10+4) via admin worker | Writes go to `000` volumes; EC worker auto-converts full/quiet volumes to EC shards; survives up to 4 shard losses with ~1.4x storage overhead |
+| Drive size             | 5 × 22TB                                  | ~110TB raw, ~74.5TB usable after EC overhead (RS 10+4 = 1.4x)                                                                                  |
+| Drive failure target   | Up to 4 drives (theoretical max)          | RS(10,4) can lose any 4 of 14 shards; with 5 drives, EC shards are spread across all drives — losing 1-2 drives is fully survivable            |
+| Monitoring             | Prometheus + Grafana (push mode)          | Full observability with the upstream Grafana dashboard                                                                                         |
+| S3 gateway             | Yes                                       | Required for S3-compatible access; separate service on port 8333                                                                               |
+| Backup                 | Async to existing MinIO (S3 interface)    | `weed filer.backup` with S3 sink; user has mc alias ready                                                                                      |
+| Volume server approach | 5 separate volume servers (1 per drive)   | Cleaner drive isolation; easier replacement on failure                                                                                         |
+
+#### EC Design Note
+
+This deployment uses **Erasure Coding (RS 10+4)** as the primary data durability
+mechanism instead of replication. Here is how it works:
+
+**Write path:** New data is written to normal volumes with **`000` replication** (no
+copies). This is the initial landing zone. Data is temporarily at single-copy risk
+during the brief window before EC conversion.
+
+**EC conversion (automatic):** The `erasure_coding` plugin worker (running via `weed
+admin` + `weed worker`) continuously scans for volumes that are:
+
+- ≥80% full (fullness ratio threshold, configurable)
+- Unmodified for ≥300 seconds (quiet period, configurable)
+- Larger than 30MB
+
+When a volume qualifies, the worker encodes it into **14 EC shards** (10 data + 4
+parity) using Reed-Solomon coding. The 14 shards are spread across available volume
+servers (drives). After successful encoding, the original volume file is deleted,
+freeing space.
+
+**Failure tolerance:** RS(10,4) can reconstruct data from any **10 of 14 shards**. With
+5 drives and shards spread evenly, this means:
+
+- **1-2 drive failures:** Fully survivable — at most ~3 shards lost per volume
+- **3-4 drive failures:** Potentially survivable depending on shard distribution
+- All 5 drives can have some shards on each; losing any single drive never takes down
+  more than ~3 shards per volume (well within the 4-shard recovery limit)
+
+**Storage efficiency:** RS(10,4) requires only **1.4×** raw storage (vs 2× for 001
+replication, 3× for 002). For 5 × 22TB = 110TB raw, this yields ~74.5TB usable.
+
+**Trade-offs:**
+
+- Write amplification: EC reads the entire volume to encode it (one-time cost)
+- Read penalty: EC reads may require an extra network hop to reconstruct data from
+  multiple shards (~50% throughput vs normal volumes in benchmarks)
+- Deletes only: EC shards are append-only; updates require re-compaction
+- Temporary risk window: Before EC conversion, data lives on a single volume with 000
+  replication — conversion happens within minutes of volume filling up
+
+---
+
+### 1. OS & Filesystem Preparation
+
+This section splits into two tracks depending on whether the XFS drives are **fresh** or
+**already formatted with data**. Mount options can be fixed on either track; mkfs-level
+geometry cannot be changed without reformatting.
+
+#### 1a. Identify Drives (Both Tracks)
+
+- [ ] **Identify 5 drives** — confirm device paths:
+
+  ```bash
+  lsblk -o NAME,SIZE,TYPE,MOUNTPOINT,FSTYPE
+  ```
+
+- [ ] **Note mount points** — decide on a consistent scheme, e.g. `/disk1` … `/disk5`.
+  Create them:
+
+  ```bash
+  mkdir -p /disk{1,2,3,4,5}
+  ```
+
+---
+
+#### 1b. Track A — Fresh Drives (Empty, Can Be Formatted)
+
+> Use this if the drives are new or contain nothing you need to keep.
+
+- [ ] **XFS mkfs on each drive** with optimal settings:
+
+  ```bash
+  mkfs.xfs -f -d agcount=4 -l size=128m -n size=8192 /dev/vdb1   # repeat for vdc1, vdd1, vde1, vdf1
+  ```
+
+  | Flag      | Value | Why                                                                    |
+  | --------- | ----- | ---------------------------------------------------------------------- |
+  | `agcount` | 4     | More allocation groups → parallel allocation under concurrent writes   |
+  | `l size`  | 128m  | Larger journal → smoother write bursts                                 |
+  | `n size`  | 8192  | Larger dir blocks → better perf for directories with many volume files |
+
+  > **On 22TB drives** the defaults are often already close to these values (XFS
+  > auto-tunes based on device size). Run `xfs_info /dev/vdb1` after mkfs to confirm.
+
+---
+
+#### 1c. Track B — Existing Drives (Already Have Data, Cannot Reformat)
+
+> Use this when the drives are already in use or carry data you need to preserve.
+
+- [ ] **Check current XFS geometry** — some mkfs-time settings affect performance but
+  **cannot be changed without reformatting**. Run on each drive:
+
+  ```bash
+  xfs_info /dev/vdb1   # repeat for vdc1, vdd1, vde1, vdf1
+
+  # e.g.
+  # meta-data=/dev/vdb1              isize=512    agcount=22, agsize=268435455 blks
+  #          =                       sectsz=4096  attr=2, projid32bit=1
+  #          =                       crc=1        finobt=1, sparse=1, rmapbt=0
+  #          =                       reflink=1    bigtime=1 inobtcount=1 nrext64=0
+  # data     =                       bsize=4096   blocks=5859442176, imaxpct=5
+  #          =                       sunit=0      swidth=0 blks
+  # naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
+  # log      =internal log           bsize=4096   blocks=521728, version=2
+  #          =                       sectsz=4096  sunit=1 blks, lazy-count=1
+  # realtime =none                   extsz=4096   blocks=0, rtextents=0
+  ```
+
+  In the example above:
+
+    - **agcount** = `22` → well above 4, excellent for parallel allocation.
+    - **naming bsize** = `4096` → below the ideal `8192`. This means directory metadata
+  blocks are 4KB instead of 8KB. For SeaweedFS this is a minor factor because volume
+  files are written sequentially and directories hold at most a few thousand entries.
+  The `-n size=8192` mkfs flag is a "nice to have" optimization, not a requirement.
+    - **logsize** = `521728 blocks × 4096 bsize = ~2 GB` → well above the `128m` minimum.
+  The log holds metadata journal entries; a tiny log forces flushes more often under
+  concurrent writes. On 22TB drives XFS auto-sizes the log generously.
+
+  Pay attention to:
+
+  | Parameter | Ideal  | Impact if suboptimal                                             | Can fix?               |
+  | --------- | ------ | ---------------------------------------------------------------- | ---------------------- |
+  | `agcount` | ≥ 4    | Fewer AGs → less parallel allocation; minor perf hit             | **No** — requires mkfs |
+  | `logsize` | ≥ 64m  | Small log → more frequent log rotation under write load          | **No** — requires mkfs |
+  | `naming`  | ≥ 8192 | Small dir blocks → slower directory scans with many volume files | **No** — requires mkfs |
+
+- [ ] **Check current mount options**:
+
+  ```bash
+  mount | grep /disk
+  # or
+  findmnt /disk1
+  ```
+
+  If `noatime,nodiratime,allocsize=1m` are missing, fix them in the next step.
+
+---
+
+#### 1d. Set Mount Options Persistently (Both Tracks)
+
+Mount options — `noatime`, `nodiratime`, `nobarrier`, `allocsize` — can be changed at
+any time by updating `/etc/fstab` and remounting. These are the most impactful tuning
+parameters and the main reason to touch the filesystem config.
+
+| Option         | Effect                                                                         |
+| -------------- | ------------------------------------------------------------------------------ |
+| `noatime`      | Skip access-time writes on reads — critical for storage servers                |
+| `allocsize=1m` | XFS prealloc hint — matches SeaweedFS volume chunk patterns (1MB chunk writes) |
+
+Sources:
+
+- [`allocsize`](https://oneuptime.com/blog/post/2026-03-04-tune-xfs-file-system-performance-mount-options-rhel-9/view#allocsize)
+
+Other options
+
+| Option         | Effect                                                                  |
+| -------------- | ----------------------------------------------------------------------- |
+| `rw`           | Read-write mode (default)                                               |
+| `attr2`        | Enable version 2 on-disk inode format (immutable default on modern XFS) |
+| `nodiratime`   | Skip directory access time updates (`noatime` implies `nodiratime`)     |
+| `inode64`      | Support >16TB files (default on modern XFS)                             |
+| `logbufs=8`    | More log buffers can improve performance under heavy metadata load      |
+| `logbsize=64k` | Larger log buffer size can help with large transactions                 |
+| `noquota`      | Disable quota checks (not needed if not using XFS quotas)               |
+
+- [ ] **Add or update fstab entries** for each drive:
+
+  ```text
+  /dev/vdb1 /disk1 xfs noatime,allocsize=1m 0 0
+  /dev/vdc1 /disk2 xfs noatime,allocsize=1m 0 0
+  /dev/vdd1 /disk3 xfs noatime,allocsize=1m 0 0
+  /dev/vde1 /disk4 xfs noatime,allocsize=1m 0 0
+  /dev/vdf1 /disk5 xfs noatime,allocsize=1m 0 0
+  ```
+
+  The trailing `0 0` are for dump and fsck order (`fs_passno`):
+
+  `fs_passno`:
+    - 0 means "do not fsck". XFS with journaling rarely needs boot-time fsck, and checking
+  22TB drives at boot would add significant startup delay. This setting also avoids
+  potential hangs if fsck cannot resolve an issue without human intervention.
+    - 1 means "check first" and is reserved for the root filesystem.
+    - 2 means "check after root" and is standard for data drives. Use this instead of 0 if
+  you want periodic fsck checks at boot (e.g. every 30 mounts via tune2fs on ext4; XFS
+  doesn't use mount-count-based fsck).
+
+  > These options are **safe for existing data**. They only change how the kernel
+  > interacts with the filesystem going forward; no data rewrite occurs.
+
+- [ ] **Create SeaweedFS data directories** on each drive:
+
+  ```bash
+  mkdir -p /disk{1,2,3,4,5}/{data,idx}
+  ```
+
+- [ ] **Remount all drives** (non-disruptive — active processes continue; the new mount
+  options take effect):
+
+  ```bash
+  mount -o remount /disk1
+  mount -o remount /disk2
+  mount -o remount /disk3
+  mount -o remount /disk4
+  mount -o remount /disk5
+  ```
+
+  Or reboot (cleaner verification that fstab is correct):
+
+  ```bash
+  mount -a
+  ```
+
+- [ ] **Verify mount options are applied**:
+
+  ```bash
+  mount | grep /disk
+  # Confirm noatime,nodiratime,allocsize=1m appear in the options column
+  ```
+
+- [ ] **Verify disk space**:
+
+  ```bash
+  df -h | grep /disk
+  ```
+
+- [ ] **Set ulimit** (open file limit):
+
+  ```bash
+  echo "* soft nofile 102400" >> /etc/security/limits.conf
+  echo "* hard nofile 102400" >> /etc/security/limits.conf
+  ulimit -n 102400
+  ```
+
+  SeaweedFS can open many network connections under load. Default 1024 is insufficient.
+  See the [Optimization wiki
+  page](https://github.com/seaweedfs/seaweedfs/wiki/Optimization#increase-user-open-file-limit)
+  for details.
+- [ ] **Disable swap** or set `vm.swappiness=1` in `/etc/sysctl.conf` — prevents the
+  kernel from swapping out SeaweedFS processes under memory pressure:
+
+  ```bash
+  echo "vm.swappiness=1" >> /etc/sysctl.conf
+  echo "vm.vfs_cache_pressure=50" >> /etc/sysctl.conf
+  sysctl -p
+  ```
+
+  See the [Linux kernel VM
+  documentation](https://www.kernel.org/doc/html/latest/admin-guide/sysctl/vm.html) for
+  the rationale behind swappiness tuning. SeaweedFS benefits from keeping page cache hot
+  for frequently accessed volume indexes.
+- [ ] **Optimize network** (if applicable): net.core.somaxconn, net.ipv4.tcp_tw_reuse
+- [ ] **Install Docker Engine** — follow the [official Docker install
+  guide](https://docs.docker.com/engine/install/) for your distribution.
+- [ ] **Install Docker Compose** (v2 plugin or standalone binary) — see [Docker Compose
+  install docs](https://docs.docker.com/compose/install/).
+- [ ] **Create Docker network** for SeaweedFS:
+
+  ```bash
+  docker network create sds-gateway-prod-seaweedfs-net
+  ```
+
+##### Why XFS Settings Matter
+
+The XFS mount options and mkfs parameters above are tuned for large sequential I/O
+patterns typical of SeaweedFS volume files. In particular:
+
+| Setting              | Effect                                                                                                      |
+| -------------------- | ----------------------------------------------------------------------------------------------------------- |
+| `noatime`            | Eliminates metadata writes on reads, including directory atime (`nodiratime` is implied on kernels ≥2.6.30) |
+| `allocsize=1m`       | Hints XFS to allocate 1MB extents — matches SeaweedFS volume chunk patterns                                 |
+| `agcount=4`          | (mkfs option, not mount) More allocation groups = better parallel allocation under concurrent writes        |
+| Volume Preallocation | Master flag `-volumePreallocate` on XFS gives contiguous block allocation, reduces fragmentation            |
+
+See the [Optimization wiki
+page](https://github.com/seaweedfs/seaweedfs/wiki/Optimization#preallocate-volume-file-disk-spaces)
+for details on `-volumePreallocate` and XFS support.
+
+---
+
+## Core Service Configuration
+
+### 2. Security Configuration
+
+- [ ] **Generate `security.toml` scaffold**:
+
+  ```bash
+  docker run --rm docker.io/chrislusf/seaweedfs:4.23-large_disk_full weed scaffold -config=security > security.toml
+  ```
+
+- [ ] **Set JWT signing key for volume writes** — prevents unauthorized writes to volume
+  servers:
+
+  ```bash
+  WEED_JWT_SIGNING_KEY=$(openssl rand -hex 32)
+  ```
+
+- [ ] **Set JWT signing key for filer writes** — secures filer HTTP write endpoints:
+
+  ```bash
+  WEED_JWT_FILER_SIGNING_KEY=$(openssl rand -hex 32)
+  ```
+
+- [ ] **Set SSE-S3 KEK** — required if S3 clients send `x-amz-server-side-encryption:
+  AES256`:
+
+  ```bash
+  WEED_S3_SSE_KEK=$(openssl rand -hex 32)
+  ```
+
+  All S3 API servers must use the same KEK value.
+- [ ] **Create `.env` file** — Docker Compose [reads variables from a `.env`
+  file](https://docs.docker.com/compose/environment-variables/env-file/) in the same
+  directory as `compose.yaml`. Variable names in `.env` are plain (e.g.
+  `JWT_SIGNING_KEY`), referenced in the compose file as `${JWT_SIGNING_KEY}`. Add these
+  secrets (do NOT commit `.env` to Git):
+
+  ```ini
+  # JWT signing key for volume write authorization.
+  # Master signs JWTs during /dir/assign; volume servers validate them on write.
+  # Generate: openssl rand -hex 32
+  JWT_SIGNING_KEY=<value>
+
+  # JWT signing key for filer HTTP write/read authorization.
+  # S3 gateway generates these JWTs; filer validates them.
+  # Generate: openssl rand -hex 32
+  JWT_FILER_SIGNING_KEY=<value>
+
+  # SSE-S3 Key Encryption Key (KEK).
+  # Required if S3 clients send x-amz-server-side-encryption: AES256.
+  # All S3 API servers in the cluster must use the same value.
+  # Generate: openssl rand -hex 32
+  S3_SSE_KEK=<value>
+
+  # Grafana admin password.
+  GRAFANA_PASSWORD=<choose a strong password>
+  ```
+
+- [ ] **Store secrets in a vault/password manager** (Bitwarden, 1Password, pass, etc.)
+
+#### Why JWT Security Matters
+
+Without JWT signing keys, any client that can reach the volume servers can write data.
+The JWT is generated by the master during `/dir/assign`, so only clients that first
+authenticate with the master (or go through the filer/S3 gateway) can write. This
+prevents direct unauthorized writes to volume server HTTP endpoints.
+
+#### gRPC mTLS Note
+
+For a single-server deployment, gRPC mTLS is **optional**. The gRPC traffic stays within
+the Docker network and does not leave the host. Skip unless you need FIPS compliance or
+defense-in-depth.
+
+---
+
+### 3. Docker Compose Configuration
+
+Create `compose.yaml`:
+
+> **Port allocation**: 5 volume servers on ports 8081-8085 (leaving 8080 free if
+> needed).
+>
+> **Image tag choice**: `4.23-large_disk_full` is used for SeaweedFS because:
+>
+> - `large_disk` variant supports larger volume indexes without memory issues — critical
+>     for 22TB drives where default 30GB volumes are not performance-optimal and you may
+>     want fewer, larger volumes (e.g. 100GB+).
+> - `full` variant includes all optional backends (rclone, MySQL, Postgres, etc.),
+>     avoiding surprises if you later need cloud tiering or migrate the filer store.
+> - `4.23` (minimal) omits these — it would work but limits future options.
+> - Pinning to a specific version instead of `latest` ensures reproducibility: `latest`
+>     can change on rebuild and break your deployment.
+
+```yaml
+x-logging: &default-logging
+  driver: "json-file"
+  options:
+    max-size: "100m"
+    max-file: "3"
+
+networks:
+  sds-gateway-prod-seaweedfs-net:
+    external: true
+
+volumes:
+  prometheus-data:
+  grafana-data:
+
+services:
+  master:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-master
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "9333:9333"
+      - "19333:19333"
+    environment:
+      # JWT key for volume write auth — master signs, volume servers validate
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /data/seaweedfs/master:/data
+    logging: *default-logging
+    command: |
+      master
+      -mdir=/data
+      -ip=master
+      -port=9333
+      -volumePreallocate
+      -volumeSizeLimitMB=30000
+      -master.metrics.address=http://pushgateway:9091
+
+  # 5 volume servers — one per XFS drive
+  volume1:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-volume1
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "8081:8081"
+      - "18081:18081"
+    environment:
+      # JWT key to validate volume write tokens issued by master
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk1/data:/data
+      - /disk1/idx:/idx
+    logging: *default-logging
+    command: |
+      volume
+      -master=master:9333
+      -ip=volume1
+      -port=8081
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  volume2:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-volume2
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "8082:8082"
+      - "18082:18082"
+    environment:
+      # JWT key to validate volume write tokens issued by master
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk2/data:/data
+      - /disk2/idx:/idx
+    logging: *default-logging
+    command: |
+      volume
+      -master=master:9333
+      -ip=volume2
+      -port=8082
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  volume3:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-volume3
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "8083:8083"
+      - "18083:18083"
+    environment:
+      # JWT key to validate volume write tokens issued by master
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk3/data:/data
+      - /disk3/idx:/idx
+    logging: *default-logging
+    command: |
+      volume
+      -master=master:9333
+      -ip=volume3
+      -port=8083
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  volume4:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-volume4
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "8084:8084"
+      - "18084:18084"
+    environment:
+      # JWT key to validate volume write tokens issued by master
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk4/data:/data
+      - /disk4/idx:/idx
+    logging: *default-logging
+    command: |
+      volume
+      -master=master:9333
+      -ip=volume4
+      -port=8084
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  volume5:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-volume5
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "8085:8085"
+      - "18085:18085"
+    environment:
+      # JWT key to validate volume write tokens issued by master
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+    volumes:
+      - /disk5/data:/data
+      - /disk5/idx:/idx
+    logging: *default-logging
+    command: |
+      volume
+      -master=master:9333
+      -ip=volume5
+      -port=8085
+      -max=0
+      -dir=/data
+      -dir.idx=/idx
+      -index=leveldb
+      -dataCenter=dc1
+      -rack=rack1
+      -compactionMBps=40
+      -minFreeSpacePercent=7
+
+  filer:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-filer
+    restart: unless-stopped
+    depends_on:
+      - master
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "8888:8888"
+      - "18888:18888"
+    environment:
+      # JWT key for volume write auth — passed through from master
+      WEED_JWT_SIGNING_KEY: "${JWT_SIGNING_KEY}"
+      # JWT key for filer HTTP write auth — S3 gateway signs, filer validates
+      WEED_JWT_FILER_SIGNING_KEY: "${JWT_FILER_SIGNING_KEY}"
+    volumes:
+      - /data/seaweedfs/filer:/data
+      - ./filer.toml:/etc/seaweedfs/filer.toml:ro
+    logging: *default-logging
+    command: |
+      filer
+      -master=master:9333
+      -ip=filer
+      -port=8888
+      -encryptVolumeData=false
+      -maxMB=32
+
+  s3:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-s3
+    restart: unless-stopped
+    depends_on:
+      - filer
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "8333:8333"
+    environment:
+      # JWT key for signing filer HTTP requests — must match filer's WEED_JWT_FILER_SIGNING_KEY
+      WEED_JWT_FILER_SIGNING_KEY: "${JWT_FILER_SIGNING_KEY}"
+      # SSE-S3 Key Encryption Key — required when clients send x-amz-server-side-encryption: AES256
+      WEED_S3_SSE_KEK: "${S3_SSE_KEK}"
+    volumes:
+      - ./s3-config.json:/etc/seaweedfs/s3.json:ro
+    logging: *default-logging
+    command: |
+      s3
+      -filer=filer:8888
+      -port=8333
+      -config=/etc/seaweedfs/s3.json
+      -domain=.s3.example.com
+
+  # Admin server + worker for Erasure Coding and cluster maintenance
+  admin:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-admin
+    restart: unless-stopped
+    depends_on:
+      - master
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "23646:23646"
+    logging: *default-logging
+    command: |
+      admin
+      -master=master:9333
+
+  worker:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-worker
+    restart: unless-stopped
+    depends_on:
+      - admin
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    logging: *default-logging
+    command: |
+      worker
+      -admin=admin:23646
+
+  prometheus:
+    image: docker.io/prom/prometheus:v2.53.0
+    container_name: seaweedfs-prometheus
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "9090:9090"
+    volumes:
+      - prometheus-data:/prometheus
+      - ./prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
+    command:
+      - "--config.file=/etc/prometheus/prometheus.yaml"
+      - "--storage.tsdb.path=/prometheus"
+
+  pushgateway:
+    image: docker.io/prom/pushgateway:v1.9.0
+    container_name: seaweedfs-pushgateway
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "9091:9091"
+
+  grafana:
+    image: docker.io/grafana/grafana:11.1.0
+    container_name: seaweedfs-grafana
+    restart: unless-stopped
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    ports:
+      - "3000:3000"
+    environment:
+      GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_PASSWORD}"
+    volumes:
+      - grafana-data:/var/lib/grafana
+```
+
+- [ ] **Create `filer.toml`** for leveldb2 store (default — file may be empty or
+  scaffolded):
+
+  ```bash
+  docker run --rm docker.io/chrislusf/seaweedfs:4.23-large_disk_full weed scaffold -config=filer > filer.toml
+  ```
+
+- [ ] **Create `prometheus.yaml`** with pushgateway as a target (see section 5 for
+  contents)
+- [ ] **Set `${GRAFANA_PASSWORD}`** in the same `.env` file (Compose substitutes it into
+  the `grafana` service)
+- [ ] **Create directories**:
+
+  ```bash
+  mkdir -p /data/seaweedfs/{master,filer}
+  ```
+
+#### Why 5 Separate Volume Servers Instead of One With 5 Dirs
+
+| Approach                             | Pros                                                                                                                       | Cons                                                      |
+| ------------------------------------ | -------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------- |
+| 5 separate volume servers            | Each drive independent; replacing a failed drive = stop one container; cleaner metrics per-drive; easier to move/rebalance | More containers; more ports                               |
+| 1 server with 5 comma-separated dirs | Simpler; fewer ports                                                                                                       | Opaque per-drive health; harder to replace a single drive |
+
+For EC, separate volume servers are equally important. The EC shard placement algorithm
+spreads the 14 shards (10 data + 4 parity) across available volume servers. With 5
+separate servers (drives), shards are naturally distributed across all drives,
+maximizing failure tolerance. A single volume server with 5 dirs is seen as one node by
+the EC placement algorithm — losing that one node means losing the volume entirely,
+defeating the purpose of EC.
+
+| EC shard distribution (5 drives)       | Max survivable failures             |
+| -------------------------------------- | ----------------------------------- |
+| 14 shards spread across 5 servers      | 4 shards = any 2-3 drives           |
+| 14 shards on 1 server (5 dirs, 1 node) | 0 drives (server loss = total loss) |
+
+#### Why `-index=leveldb`
+
+- **Memory mode** (default): Fast but loads full index into RAM on startup — slow
+  restart with large volumes.
+- **LevelDB mode**: ~4MB fixed memory footprint per volume server, faster startup,
+  minimal performance impact since index lookups are dwarfed by network latency.
+- For 5 volume servers with large volumes, leveldb saves significant RAM.
+
+---
+
+### 4. S3 API Setup
+
+- [ ] **Create `s3-config.json`** with identities:
+
+  ```json
+  {
+    "identities": [
+      {
+        "name": "admin",
+        "credentials": [
+          {
+            "accessKey": "admin-access-key",
+            "secretKey": "admin-secret-key"
+          }
+        ],
+        "actions": ["Admin", "Read", "Write", "List", "Tagging"]
+      },
+      {
+        "name": "backup-user",
+        "credentials": [
+          {
+            "accessKey": "backup-access-key",
+            "secretKey": "backup-secret-key"
+          }
+        ],
+        "actions": ["Read", "List"]
+      }
+    ]
+  }
+  ```
+
+- [ ] **Admin actions** allow bucket creation/deletion. Avoid giving `Admin` to everyday
+  users.
+- [ ] **Test S3 access**:
+
+  ```bash
+  aws s3 --endpoint http://localhost:8333 ls
+  aws s3 --endpoint http://localhost:8333 mb s3://test-bucket
+  aws s3 --endpoint http://localhost:8333 cp /etc/hostname s3://test-bucket/
+  ```
+
+#### S3 Encryption Note
+
+If your S3 clients send `x-amz-server-side-encryption: AES256`, the SSE-S3 KEK must be
+configured (already done in step 2). Without it, these requests fail with `400 Bad
+Request`.
+
+---
+
+## Operations & Maintenance
+
+### 5. Monitoring — Prometheus + Grafana
+
+- [ ] **Start Prometheus pushgateway** (included in compose as `pushgateway` service)
+- [ ] **Master** configured with `-master.metrics.address=http://pushgateway:9091` — all
+  other components (volume, filer) inherit this from master's heartbeat and push their
+  own metrics.
+- [ ] **Configure Prometheus** to scrape the pushgateway:
+
+  ```yaml
+  # prometheus.yaml
+  global:
+    scrape_interval: 15s
+
+  scrape_configs:
+    - job_name: "seaweedfs-pushgateway"
+      honor_labels: true
+      static_configs:
+        - targets: ["pushgateway:9091"]
+  ```
+
+- [ ] **Import Grafana dashboard** from upstream:
+
+  ```bash
+  # Download the dashboard JSON from the SeaweedFS repo
+  curl -o grafana-seaweedfs.json \
+    https://raw.githubusercontent.com/seaweedfs/seaweedfs/master/other/metrics/grafana_seaweedfs.json
+  ```
+
+    - Login to Grafana at `http://<host>:3000` (default admin/admin)
+    - Create Prometheus datasource pointing to `http://prometheus:9090`
+    - Import `grafana-seaweedfs.json`
+- [ ] **Set up alerting** in Grafana for:
+    - Volume server down (heartbeat missing)
+    - Free volume count = 0 (cluster full)
+    - High compaction backlog
+    - Disk space < 10% on any volume drive
+
+#### Push vs Pull Metrics
+
+SeaweedFS components push metrics to the pushgateway. This is simpler than configuring
+Prometheus to discover dynamic volume server targets. The pushgateway is a lightweight
+bridge.
+
+---
+
+### 6. Backup to MinIO via Async Filer Backup
+
+- [ ] **Create backup access key** in your MinIO deployment (via mc or MinIO console)
+  with write permissions to a dedicated backup bucket.
+- [ ] **Generate `replication.toml`**:
+
+  ```bash
+  docker run --rm docker.io/chrislusf/seaweedfs:4.23-large_disk_full weed scaffold -config=replication > replication.toml
+  ```
+
+- [ ] **Edit `replication.toml`** to configure the S3 sink targeting your MinIO:
+
+  ```toml
+  [sink.s3]
+  enabled = true
+  aws_access_key_id = "minio-backup-access-key"
+  aws_secret_access_key = "minio-backup-secret-key"
+  region = "us-east-1"                   # can be anything for MinIO
+  bucket = "spectrumx"            # existing bucket in MinIO
+  directory = "/spectrumx"        # prefix inside the bucket
+  endpoint = "https://minio.example.com" # your MinIO endpoint URL
+  is_incremental = false                 # false = continuous mirroring
+  ```
+
+- [ ] **Create the backup bucket** in MinIO:
+
+  ```bash
+  mc mb --ignore-existing "sds-backup-minio/spectrumx"
+  ```
+
+- [ ] **Start backup** as an additional Docker service or standalone process:
+
+  ```yaml
+  # Add to compose.yaml
+  filer-backup:
+    image: docker.io/chrislusf/seaweedfs:4.23-large_disk_full
+    container_name: seaweedfs-filer-backup
+    restart: unless-stopped
+    depends_on:
+      - filer
+    networks:
+      - sds-gateway-prod-seaweedfs-net
+    volumes:
+      - ./replication.toml:/etc/seaweedfs/replication.toml:ro
+    command: |
+      filer.backup
+      -filer=filer:8888
+      -config=/etc/seaweedfs/replication.toml
+  ```
+
+#### How Async Backup Works
+
+- `weed filer.backup` subscribes to the filer's metadata change log (CDC).
+- When files are created/updated/deleted, it reads the content from SeaweedFS and
+  replicates to the configured sink.
+- Progress is checkpointed on the filer — safe to restart.
+- In `is_incremental = false` mode, the remote mirror keeps the same directory structure
+  as the source.
+
+#### Alternative: Volume-Level Backup
+
+For a full-clone backup (not just file-level), use `weed backup` per volume:
+
+```bash
+weed backup -server=master:9333 -dir=/backup -volumeId=<id>
+```
+
+This is useful for bootstrapping a second cluster but is not continuous.
+
+---
+
+### 7. Startup & Verification
+
+- [ ] **Start all services**:
+
+  ```bash
+  docker compose up -d
+  ```
+
+- [ ] **Verify cluster status** via master UI:
+
+  ```bash
+  curl http://localhost:9333/  # or open in browser
+  ```
+
+    - Check that all 5 volume servers appear
+    - Check that Free volume count > 0
+- [ ] **Verify volume servers**:
+
+  ```bash
+  curl http://localhost:8081/   # repeat for 8082-8085
+  ```
+
+- [ ] **Verify filer**:
+
+  ```bash
+  curl http://localhost:8888/
+  ```
+
+- [ ] **Verify S3 gateway**:
+
+  ```bash
+  aws s3 --endpoint http://localhost:8333 ls
+  ```
+
+- [ ] **Trigger volume allocation** to test write path:
+
+  ```bash
+  curl "http://localhost:9333/dir/assign"
+  ```
+
+- [ ] **Run the SeaweedFS benchmark** from within the Docker network:
+
+  ```bash
+  docker run --rm --network sds-gateway-prod-seaweedfs-net docker.io/chrislusf/seaweedfs:4.23-large_disk_full \
+    weed benchmark -master=master:9333 -n 10000
+  ```
+
+- [ ] **Verify Prometheus targets** — check pushgateway at `http://localhost:9091`
+- [ ] **Verify Grafana dashboard** — open at `http://localhost:3000`, check for data
+
+#### Smoke Test: Drive Failure Scenario
+
+Simulate a drive failure to verify EC durability:
+
+```bash
+# Stop one volume server (simulate drive failure)
+docker stop seaweedfs-volume1
+
+# Verify data is still accessible via S3/filer
+aws s3 --endpoint http://localhost:8333 ls s3://test-bucket/ --recursive
+# Read a file to confirm EC reconstruction works
+aws s3 --endpoint http://localhost:8333 cp s3://test-bucket/test-file /tmp/test-file
+
+# Check EC shard status via weed shell
+docker exec seaweedfs-master weed shell -c "ec.balance"
+
+# Restart the volume server (simulate drive replacement)
+docker start seaweedfs-volume1
+
+# After restart, rebalance EC shards to restore optimal distribution
+docker exec seaweedfs-master weed shell -c "ec.balance -apply"
+```
+
+---
+
+### 8. Volume Growth Tuning
+
+With EC and no replication (`copy_1`), the default growth strategy creates **7 writable
+volumes** initially. As these fill up and get EC-encoded, new volumes are automatically
+created. Given 22TB drives, this is more than sufficient.
+
+If you need more write concurrency (more simultaneous write streams), pre-create
+additional volumes:
+
+```bash
+docker run --rm docker.io/chrislusf/seaweedfs:4.23-large_disk_full weed scaffold -config=master > master.toml
+```
+
+Edit and mount to master:
+
+```toml
+[master.volume_growth]
+copy_1 = 16    # 16 writable volumes for no-replication (more write concurrency)
+threshold = 0.9
+```
+
+**Volume size tuning**: With 22TB drives, the default 30GB volume size means ~733
+volumes per drive. With LevelDB mode (`-index=leveldb`), each volume's index occupies
+roughly 20-40MB of **disk space** in the `idx` directory (~15-30GB total per drive on
+disk). The LevelDB block cache RAM footprint remains fixed at ~4MB per volume server
+regardless of volume count — this is the key advantage of LevelDB over memory mode. See
+the [Optimization wiki
+page](https://github.com/seaweedfs/seaweedfs/wiki/Optimization#use-leveldb) for details
+on index types and memory usage.
+
+```text
+- volumeSizeLimitMB=100000   # 100GB volumes → ~220 per drive
+```
+
+---
+
+### 9. Maintenance Plan
+
+#### Daily / Automated
+
+- [ ] **Admin script plugin** — the `admin` and `worker` Docker services (already in
+  `compose.yaml`) automatically run these maintenance tasks. Verify they are running:
+
+  ```bash
+  docker ps | grep seaweedfs-admin
+  docker ps | grep seaweedfs-worker
+  ```
+
+  Default script covers:
+    - `ec.balance -apply` — balance EC shards
+    - `fs.log.purge -daysAgo=7` — purge old filer logs
+    - `volume.deleteEmpty -quietFor=24h -apply` — delete empty volumes
+    - `volume.fix.replication -apply` — fix missing replicas
+    - `s3.clean.uploads -timeAgo=24h` — clean aborted S3 multipart uploads
+
+- [ ] **Monitor disk usage** on all 5 drives. Alert when any drive exceeds 85% usage.
+
+#### Weekly
+
+- [ ] **Check `weed shell` status**:
+
+  ```bash
+  docker exec seaweedfs-master weed shell -c "volume.status"
+  docker exec seaweedfs-master weed shell -c "volume.list"
+  ```
+
+#### Monthly
+
+- [ ] **Run full cluster health check**:
+
+  ```bash
+  weed shell -c "volume.fsck"
+  weed shell -c "volume.check.disk"
+  ```
+
+- [ ] **Review Grafana dashboards** for trends: compaction rates, write amplification,
+  disk growth
+- [ ] **Verify backup is running** — check that MinIO bucket has recent files
+
+#### Erasure Coding (Always Active)
+
+EC is the **primary durability mechanism** for this deployment, not an afterthought. The
+`erasure_coding` plugin worker runs automatically inside the `worker` container and
+continuously converts full/quiet volumes to RS(10,4) EC shards.
+
+**Detection defaults** (configurable from admin UI at `/plugin`):
+
+- Fullness ratio threshold: 80%
+- Quiet period: 300 seconds (5 minutes)
+- Minimum volume size: 30 MB
+- Scan interval: 5 minutes
+
+**What to watch for:**
+
+- Ensure the `worker` container is always running — if it stops, volumes will sit at
+  `000` replication (single copy) indefinitely.
+- If the cluster runs low on free volume IDs, pre-create volumes manually with `curl
+  http://localhost:9333/vol/grow?count=10`.
+- Monitor `ec.balance` shard distribution in Grafana after drive replacements.
+
+#### Drive Replacement Procedure
+
+When a drive fails with EC, the procedure differs from a replication-based setup. There
+are no volume replicas to "fix" — instead, the surviving EC shards on other drives can
+reconstruct missing data once the replacement drive is online.
+
+1. **Do NOT stop the volume container yet** — the volume server may still serve reads
+   from its surviving shards (depending on failure mode). Only stop it if the drive is
+   fully dead/unresponsive.
+
+2. If the drive is still partially readable, mark maintenance mode:
+
+    ```bash
+    docker exec seaweedfs-master weed shell -c "volumeServer.state --nodes volume1:8081 --maintenanceOn"
+    ```
+
+3. Replace the physical drive, mkfs.xfs, mount, recreate directory structure:
+
+    ```bash
+    # if the drive is new/empty, format with XFS and recommended options for SeaweedFS:
+    mkfs.xfs -f -d agcount=4 -l size=128m -n size=8192 /dev/vdb1   # replace with actual new drive
+
+    # if the filesystem already exists (e.g. replaced drive with pre-formatted data):
+    #   - check geometry is adequate:
+    #     xfs_info /dev/vdb1        (see Track B in §1 for what to look for)
+    #   - verify/add fstab entry then mount:
+    #     echo '/dev/vdb1 /disk1 xfs noatime,nodiratime,nobarrier,allocsize=1m 0 2' >> /etc/fstab
+    #     mount /disk1
+
+    mkdir -p /disk1/{data,idx}
+    ```
+
+4. Start the container on the new drive:
+
+    ```bash
+    docker start seaweedfs-volume1
+    ```
+
+5. **Rebalance EC shards** — the `ec.balance` command detects that some shards are
+   missing from the replacement server and moves/reconstructs shards to restore optimal
+   distribution:
+
+   ```bash
+   docker exec seaweedfs-master weed shell -c "ec.balance -apply"
+   ```
+
+   This may take time depending on how many EC volumes need shard reconstruction.
+   Monitor progress via the admin UI or Grafana.
+
+6. Re-run volume server state check:
+
+   ```bash
+   docker exec seaweedfs-master weed shell -c "volumeServer.state"
+   ```
+
+7. Turn off maintenance mode if it was enabled:
+
+   ```bash
+   docker exec seaweedfs-master weed shell -c "volumeServer.state --nodes volume1:8081 --maintenanceOff"
+   ```
+
+**Note:** Unlike replication (`volume.fix.replication`), EC shard reconstruction
+rebuilds only the missing shards from the parity data on surviving drives. This is
+network-efficient but computationally intensive (Reed-Solomon encoding). Monitor CPU on
+the worker/admin containers during reconstruction.
+
+---
+
+## Appendices
+
+### Appendix A: Volume Size Calculation
+
+| Drive count | Data durability | Volume size | Volumes per drive | Raw storage | Usable capacity |
+| ----------- | --------------- | ----------- | ----------------- | ----------- | --------------- |
+| 5 × 22TB    | RS(10,4) EC     | 30GB        | ~733 per drive    | 110TB       | ~74.5TB         |
+| 5 × 22TB    | RS(10,4) EC     | 100GB       | ~220 per drive    | 110TB       | ~74.5TB         |
+
+**Formula**: `usable = (total_raw / 1.4) × 0.95` (RS 10+4 = 1.4× raw overhead; ~5% for
+XFS filesystem overhead, index files, and compaction temp space)
+
+RS(10,4) Erasure Coding: for every 10 data shards, 4 parity shards are created — 14
+total. This means 1.4× raw storage consumption vs 2× for `001` replication or 3× for
+`002` replication.
+
+| Method          | Raw:Usable ratio | Usable from 110TB raw | # disk failures w/o data loss |
+| --------------- | ---------------- | --------------------- | ----------------------------- |
+| No redundancy   | 1:1              | 107.8TB               | 0 / 5                         |
+| EC RS(10,4)     | 1.4:1            | ~74.5TB               | 2 / 5                         |
+| Replication 001 | 2:1              | ~52.3TB               | 1 / 5                         |
+| Replication 002 | 3:1              | ~34.8TB               | 2 / 5                         |
+
+### Appendix B: Port Reference
+
+| Service         | HTTP Port | gRPC Port |
+| --------------- | --------- | --------- |
+| Master          | 9333      | 19333     |
+| Volume 1        | 8081      | 18081     |
+| Volume 2        | 8082      | 18082     |
+| Volume 3        | 8083      | 18083     |
+| Volume 4        | 8084      | 18084     |
+| Volume 5        | 8085      | 18085     |
+| Filer           | 8888      | 18888     |
+| S3              | 8333      | —         |
+| Prometheus      | 9090      | —         |
+| Pushgateway     | 9091      | —         |
+| Grafana         | 3000      | —         |
+| Admin (if used) | 23646     | —         |
+
+### Appendix C: Recommended Environment `.env` File
+
+This file lives **in the same directory as `compose.yaml`**. Docker Compose reads it
+automatically when you run `docker compose up`. Variable names are plain — Compose
+substitutes them when referenced as `${VAR_NAME}` in the YAML.
+
+```text
+JWT_SIGNING_KEY=<openssl rand -hex 32>
+JWT_FILER_SIGNING_KEY=<openssl rand -hex 32>
+S3_SSE_KEK=<openssl rand -hex 32>
+GRAFANA_PASSWORD=<choose a strong password>
+```
+
+**Do not commit `.env` to version control.** remember to add it to `.gitignore`.
diff --git a/seaweedfs/justfile b/seaweedfs/justfile
new file mode 100644
index 000000000..b164225f4
--- /dev/null
+++ b/seaweedfs/justfile
@@ -0,0 +1,203 @@
+set shell := ["bash", "-eu", "-o", "pipefail", "-c"]
+
+# constants
+
+env_selection_script := "./scripts/env-selection.sh"
+
+# variables | run `just env` to see current values
+
+compose_file := shell(env_selection_script + ' $1', "compose_file")
+env := shell(env_selection_script + ' $1', "env")
+env_file := shell(env_selection_script + ' $1', "env_file")
+filer_container := shell(env_selection_script + ' $1', "filer_container")
+master_container := shell(env_selection_script + ' $1', "master_container")
+docker_compose := "COMPOSE_FILE=" + compose_file + " docker compose --env-file " + env_file
+
+alias hooks := pre-commit
+alias run := up
+alias upgrade := update-hooks
+
+# show available recipes
+default:
+    @just --list
+
+# pulls and rebuilds the compose services with optional args
+[group('setup')]
+build *args:
+    @echo "Pulling and building sds-seaweedfs"
+    {{ docker_compose }} pull --ignore-buildable
+    {{ docker_compose }} build {{ args }}
+
+# runs a generic docker compose command e.g. `just dc ps`
+[group('utilities')]
+dc +args:
+    @echo "Running docker compose command: {{ args }}"
+    {{ docker_compose }} {{ args }}
+
+# sets up the data directories with correct ownership (local only)
+[group('setup')]
+data-setup:
+    #!/usr/bin/env bash
+    set -euo pipefail
+    if [[ "{{ env }}" != "local" ]]; then
+        echo "data-setup only needed for local; CI and production use volumes or bind mounts"
+        exit 0
+    fi
+    echo "Creating data directories..."
+    mkdir -p data/master data/volumes data/filer/filerldb2
+    echo "Setting ownership to ${UID:-1000}:${GID:-1000}..."
+    sudo chown --changes -R "${UID:-1000}:${GID:-1000}" data/
+    echo "Done"
+
+# runs a full deploy (start services, configure credentials, create bucket)
+[group('setup')]
+deploy *args:
+    @echo "Deploying SeaweedFS stack for '{{ env }}' environment"
+    ./scripts/deploy.sh {{ args }}
+
+# stops and removes compose services
+[group('service')]
+down *args:
+    @echo "Stopping SeaweedFS"
+    {{ docker_compose }} down --remove-orphans {{ args }}
+
+[group('setup')]
+load_credentials path="":
+    #!/usr/bin/env bash
+    set -Eeuo pipefail
+
+    env="{{ env }}"
+    path_override="{{ path }}"
+    primary_env_file="${path_override:-../gateway/.envs/${env}/storage.env}"
+    if [[ ! -f "${primary_env_file}" ]]; then
+        echo "Error: Primary storage credentials file not found at ${primary_env_file}" >&2
+        echo "Please run 'just generate-secrets' to create it." >&2
+        exit 1
+    fi
+    env_file_gateway=$(realpath ${primary_env_file})
+    echo "Loading credentials from ${env_file_gateway}..." >&2
+
+    if [[ ! -f "${env_file_gateway}" ]]; then
+        echo "Credentials file not found: ${env_file_gateway}" >&2
+        exit 1
+    fi
+
+    access_key=$(grep -E '^PRIMARY_ACCESS_KEY_ID=' "${env_file_gateway}" | cut -d'=' -f2- || true)
+    secret_key=$(grep -E '^PRIMARY_SECRET_ACCESS_KEY=' "${env_file_gateway}" | cut -d'=' -f2- || true)
+    bucket_name=$(grep -E '^PRIMARY_STORAGE_BUCKET_NAME=' "${env_file_gateway}" | cut -d'=' -f2- || true)
+
+    if [[ -z "${access_key}" || -z "${secret_key}" || -z "${bucket_name}" ]]; then
+        echo "Missing required credentials in ${env_file_gateway}. Expected:" >&2
+        echo -e "\tPRIMARY_ACCESS_KEY_ID, PRIMARY_SECRET_ACCESS_KEY, PRIMARY_STORAGE_BUCKET_NAME" >&2
+        exit 1
+    fi
+
+    printf '%s\n%s\n%s' "${access_key}" "${secret_key}" "${bucket_name}"
+
+# prints currently selected environment
+[group('utilities')]
+env:
+    #!/usr/bin/env bash
+    echo -e "\nSelected env:\n"
+    echo -e "\tEnvironment: \e[34m             '{{ env }}'\e[0m"
+    echo -e "\tEnv file: \e[34m                '{{ env_file }}'\e[0m"
+    echo -e "\tCompose file: \e[34m            '{{ compose_file }}'\e[0m"
+    echo -e "\tDocker compose command: \e[34m  '{{ docker_compose }}'\e[0m"
+    echo -e "\tFiler container: \e[34m         '{{ filer_container }}'\e[0m"
+    echo -e "\tMaster container: \e[34m        '{{ master_container }}'\e[0m"
+
+    if ! [ -f "{{ compose_file }}" ]; then
+        echo -e "\n\e[31mError:\e[0m Compose file '{{ compose_file }}' does not exist."
+        exit 1
+    fi
+    if ! [ -f "{{ env_file }}" ]; then
+        echo -e "\n\e[31mError:\e[0m Env file '{{ env_file }}' does not exist." \
+            "Generate secrets for this environment to create it."
+        exit 1
+    fi
+
+# streams logs until interrupted
+[group('monitoring')]
+logs *args:
+    @echo "Showing SeaweedFS logs..."
+    {{ docker_compose }} logs --tail 10000 -f {{ args }} || true
+
+# prints all recent logs once
+[group('monitoring')]
+logs-once *args:
+    @echo "Showing SeaweedFS logs once..."
+    {{ docker_compose }} logs {{ args }}
+
+# rebuilds then restarts services and shows logs
+[group('service')]
+redeploy services='':
+    just build {{ services }}
+    just down {{ services }}
+    just up {{ services }}
+    just logs {{ services }}
+
+# restarts running compose services
+[group('service')]
+restart *args:
+    @echo "Restarting SeaweedFS"
+    {{ docker_compose }} restart {{ args }}
+
+# opens an interactive weed shell session
+[group('utilities')]
+shell:
+    @echo "Opening weed shell on '{{ filer_container }}' (master: {{ master_container }})"
+    docker exec -it {{ filer_container }} \
+        weed shell -master="{{ master_container }}:9333"
+
+# starts services in detached mode
+[group('service')]
+up *args:
+    #!/usr/bin/env bash
+    echo "Starting SeaweedFS in detached mode"
+    echo "Environment: '{{ env }}'"
+    echo "Compose file: '{{ compose_file }}'"
+    {{ docker_compose }} up --detach --remove-orphans {{ args }}
+
+# runs the pre-commit hooks
+[group('qa')]
+pre-commit:
+    @uvx prek install -f
+    @uvx prek run --all-files
+
+# upgrades pre-commit hooks to their latest compatible versions
+[group('development')]
+update-hooks:
+    @uvx prek autoupdate
+
+# performs full teardown (removes data) — irreversible
+[confirm("This will destroy ALL SeaweedFS data. Are you sure? [y/N]")]
+[group('service')]
+wipe:
+    #!/usr/bin/env bash
+    set -euo pipefail
+    host=$(hostname)
+    echo -e "This will wipe ALL SeaweedFS data in env=\e[31m{{ env }}\e[0m and hostname=\e[31m${host}\e[0m"
+    echo "This includes Docker-managed volumes in this SeaweedFS stack, "
+    echo -e "\tand if env=local it will also delete local data directories.\n"
+    echo -e "\e[31mThis action is IRREVERSIBLE. Type this machine's hostname to confirm:\e[0m"
+    read -r confirmation
+    if [[ "${confirmation}" != "${host}" ]]; then
+        echo "Aborting."
+        exit 1
+    fi
+    just down --volumes
+    if [[ "{{ env }}" == "local" ]]; then
+        rm -rf data/volumes/* data/filer/*
+        echo "Local data directories cleared"
+    fi
+    echo "SeaweedFS data wiped"
+
+# health *args       # comprehensive cluster diagnostic (human-readable)
+[group('monitoring')]
+health *args:
+    @./scripts/health-check.sh {{ args }}
+
+# health-json        # machine-readable JSON output for agentic consumption
+[group('monitoring')]
+health-json:
+    @./scripts/health-check.sh --json
diff --git a/seaweedfs/progress.md b/seaweedfs/progress.md
new file mode 100644
index 000000000..6dcfe3bbc
--- /dev/null
+++ b/seaweedfs/progress.md
@@ -0,0 +1,118 @@
+# SeaweedFS Production Deployment Progress
+
+## Mission: Checklist-Compliant Production Deployment
+
+**Target:** 5 × 22TB drives, Erasure Coding RS(10+4), push-based monitoring, JWT security.
+
+## Audit Results
+
+### Current State vs Checklist Requirements
+
+| Area                | Before                                        | After                                          |
+| ------------------- | --------------------------------------------- | ---------------------------------------------- |
+| Image tag           | `4.17_large_disk`                             | `4.23-large_disk_full`                         |
+| Volume servers      | 1 (named Docker volume)                       | 5 (bind mount to /disk{1-5}/{data,idx})        |
+| Index               | memory (default)                              | leveldb on all 5 volumes                       |
+| EC (admin+worker)   | Not present                                   | admin + worker containers added                |
+| Monitoring          | Prometheus (direct scrape)                    | Pushgateway + Prometheus (push mode) + Grafana |
+| S3 config           | No s3-config.json                             | s3-config.json with identities                 |
+| Security (JWT)      | security.toml keys empty                      | Env var JWT keys in compose + .env             |
+| Backup              | Not present                                   | filer-backup service + replication.toml S3 sink|
+| Logging config      | Not defined                                   | x-logging with json-file driver                |
+| Network             | `sds-gateway-prod-seaweed-net` (bridge)       | External network (created before deploy)       |
+| WebDAV              | Present                                       | Preserved (image bumped to 4.23)               |
+| Healthchecks        | Present on volume, s3                         | Retained on all 5 volumes + s3                 |
+| Env file refs       | `.envs/*/seaweedfs.env` (wrong name)          | Fixed to `sfs.env` in env-selection.sh         |
+
+## Changes Made
+
+### 1. `compose.production.yaml` — Full rewrite
+
+- Image: `4.23-large_disk_full` (supports large volumes, includes all backends)
+- x-logging defaults for all services
+- External network `sds-gateway-prod-seaweed-net` (created before deploy)
+- Master: JWT env var, volumePreallocate, volumeSizeLimitMB=30000, push metrics
+- 5 volume services (volume1-5): bind mounts, leveldb index, compactionMBps=40, minFreeSpacePercent=7, per-drive healthchecks
+- Filer: JWT filer signing, leveldb2, encryptVolumeData=false, maxMB=32
+- S3: JWT filer signing, SSE KEK, s3-config.json, healthcheck, dual-network
+- WebDAV: preserved, image bumped
+- Admin: EC management, cluster maintenance
+- Worker: erasure_coding plugin runner
+- Prometheus: v2.53.0, pushgateway scrape target, web.enable-lifecycle
+- Pushgateway: v1.9.0
+- Grafana: 11.1.0, admin password from env
+- filer-backup: async S3 replication to MinIO
+
+### 2. `prometheus/prometheus.yaml` — Pushgateway mode
+
+- Changed from direct service scrape (4 targets) to single pushgateway target with `honor_labels: true`
+
+### 3. `config/security.toml` — Env var documentation
+
+- Added comments: `PRODUCTION: Set via WEED_JWT_SIGNING_KEY env var`
+
+### 4. `config/s3-config.json` — NEW
+
+- Admin identity (Admin, Read, Write, List, Tagging)
+- Backup-user identity (Read, List)
+
+### 5. `config/replication.toml` — S3 sink enabled
+
+- Uncommented `[sink.s3]` section, set `enabled = true`
+- Credentials use `${MINIO_BACKUP_ACCESS_KEY}` / `${MINIO_BACKUP_SECRET_KEY}` env vars
+- Target: `spectrumx` bucket, `/spectrumx` prefix
+
+### 6. `.envs/production/sfs.env` — Secrets scaffolding
+
+- Added: `JWT_SIGNING_KEY`, `JWT_FILER_SIGNING_KEY`, `S3_SSE_KEK`, `GRAFANA_PASSWORD`, `MINIO_BACKUP_ACCESS_KEY`, `MINIO_BACKUP_SECRET_KEY`
+
+### 7. `.envs/example/seaweedfs.env` — Updated template
+
+- Mirrors production env structure with secrets placeholders
+
+### 8. `scripts/env-selection.sh` — Bug fix
+
+- Fixed: `seaweedfs.env` → `sfs.env` (all actual env files use `sfs.env` naming)
+
+## Final Compliance Review
+
+| Checklist Section       | Status | Notes                                            |
+| ----------------------- | ------ | ------------------------------------------------ |
+| §0 Pre-Deployment       | ✅     | EC RS(10+4), 5×22TB, leveldb2, push monitoring   |
+| §1 OS & Filesystem      | 🟡     | Documented; mkfs/fstab are host-level ops         |
+| §2 Security             | ✅     | JWT env vars, security.toml scaffold, .env        |
+| §3 Docker Compose       | ✅     | Full compose with all checklist services          |
+| §4 S3 API               | ✅     | s3-config.json with admin + backup identities     |
+| §5 Monitoring           | ✅     | Pushgateway + Prometheus + Grafana               |
+| §6 Backup               | ✅     | filer-backup + replication.toml S3 sink          |
+| §7 Startup & Verify     | 🟡     | Documented in checklist; commands ready to run    |
+| §8 Volume Growth        | ✅     | master.toml volume_growth config present          |
+| §9 Maintenance          | ✅     | master.toml scripts + admin+worker services      |
+
+### Items requiring host-level ops (not in compose scope)
+
+- XFS filesystem creation with mkfs.xfs
+- /etc/fstab mount options (noatime,allocsize=1m)
+- /disk{1-5}/{data,idx} directory creation
+- Docker network creation
+- Docker Engine installation
+- ulimit and sysctl tuning
+- MinIO backup bucket creation
+- Grafana dashboard import
+- S3 credential configuration via `s3.configure` in weed shell
+
+## Progress Log
+
+### 2026-05-05
+
+- [x] Audited all existing compose files, config files, .env files, scripts
+- [x] Documented gap analysis
+- [x] Rewrote compose.production.yaml — full checklist compliance + merged existing features
+- [x] Updated prometheus.yaml for pushgateway mode
+- [x] Updated security.toml with env var documentation
+- [x] Created s3-config.json with admin + backup identities
+- [x] Updated replication.toml with S3 sink enabled
+- [x] Updated .envs/production/sfs.env with JWT secrets scaffolding
+- [x] Updated .envs/example/seaweedfs.env with secrets placeholders
+- [x] Fixed env-selection.sh bug (seaweedfs.env → sfs.env)
+- [x] Final review against checklist sections 0-9 — all covered
diff --git a/seaweedfs/prometheus/prometheus.yaml b/seaweedfs/prometheus/prometheus.yaml
new file mode 100644
index 000000000..884e9d477
--- /dev/null
+++ b/seaweedfs/prometheus/prometheus.yaml
@@ -0,0 +1,19 @@
+# PRODUCTION Prometheus config — pushgateway mode
+# SeaweedFS components push metrics to pushgateway (configured in master
+# via -master.metrics.address). Prometheus scrapes from pushgateway,
+# avoiding the need for dynamic target discovery.
+#
+# See checklist §5 — Monitoring
+
+global:
+  scrape_interval: 15s
+
+scrape_configs:
+  - job_name: "seaweedfs-pushgateway"
+    honor_labels: true
+    static_configs:
+      - targets: ["sds-gateway-prod-sfs-pushgateway:9091"]
+
+  - job_name: prometheus
+    static_configs:
+      - targets: ["localhost:9090"]
diff --git a/seaweedfs/scripts/.gitignore b/seaweedfs/scripts/.gitignore
new file mode 100644
index 000000000..7774f9875
--- /dev/null
+++ b/seaweedfs/scripts/.gitignore
@@ -0,0 +1 @@
+prod-hostnames.env
diff --git a/seaweedfs/scripts/checksum-audit.sh b/seaweedfs/scripts/checksum-audit.sh
new file mode 100755
index 000000000..486aae909
--- /dev/null
+++ b/seaweedfs/scripts/checksum-audit.sh
@@ -0,0 +1,420 @@
+#!/usr/bin/env bash
+# =============================================================================
+# minio-checksum-audit.sh
+#
+# Randomly samples objects from a MinIO bucket and verifies that each object's
+# BLAKE3 checksum matches its base name (the base name IS the expected hash).
+#
+# Usage:
+#   checksum-audit.sh --bucket my_bucket
+#   MC_ALIAS=my_minio MC_BUCKET=my_bucket checksum-audit.sh
+#
+# Environment variables:
+#   MC_ALIAS      MinIO alias configured in `mc` (default: local)
+#   MC_BUCKET     Bucket to audit (required)
+#   MC_PREFIX     Optional key prefix to scope the scan, no leading slash (default: "files")
+#   SAMPLE_RATE   Percentage of objects to sample, supports decimals (default: 1)
+#   LOG_FILE      Path to the log file (default: ./checksum_audit.log)
+#   FAIL_FAST     Exit on first mismatch if "true", otherwise audit all samples
+#                 and exit with an error at the end (default: true)
+# =============================================================================
+set -Eeuo pipefail
+IFS=$'\n\t'
+
+MC_ALIAS="${MC_ALIAS:-local}"
+MC_BUCKET="${MC_BUCKET:-}"
+MC_PREFIX="${MC_PREFIX:-files}"
+SAMPLE_RATE="${SAMPLE_RATE:-1}"
+LOG_FILE="${LOG_FILE:-./checksum_audit.log}"
+FAIL_FAST="${FAIL_FAST:-true}"
+OBJECT_REGEX=".*/[0-9a-f]{64}(_.*)?$"
+FIND_PATH=""
+
+target=""
+sampled=0
+checked=0
+errors=0
+temp_files=()
+
+color_reset=""
+color_info=""
+color_warn=""
+color_error=""
+color_fatal=""
+
+function init_colors() {
+	if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
+		color_reset=$'\033[0m'
+		color_info=$'\033[36m'
+		color_warn=$'\033[33m'
+		color_error=$'\033[31m'
+		color_fatal=$'\033[35m'
+	fi
+}
+
+function log() {
+	local level="${1}"
+	local color="${2}"
+	local stream="${3}"
+	shift 3
+	local text="$*"
+	local timestamp
+	local message
+	timestamp="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
+	message="[${timestamp}] [${level}] ${text}"
+
+	printf '%s\n' "${message}" >>"${LOG_FILE}"
+
+	if [[ "${stream}" == "stderr" ]]; then
+		if [[ -n "${color}" ]]; then
+			printf '%b%s%b\n' "${color}" "${message}" "${color_reset}" >&2
+		else
+			printf '%s\n' "${message}" >&2
+		fi
+		return
+	fi
+
+	if [[ -n "${color}" ]]; then
+		printf '%b%s%b\n' "${color}" "${message}" "${color_reset}"
+	else
+		printf '%s\n' "${message}"
+	fi
+}
+
+function log_info() {
+	log "INFO" "${color_info}" "stdout" "$*"
+}
+
+function log_warn() {
+	log "WARN" "${color_warn}" "stderr" "$*"
+}
+
+function log_error() {
+	log "ERROR" "${color_error}" "stderr" "$*"
+}
+
+function log_fatal() {
+	log "FATAL" "${color_fatal}" "stderr" "$*"
+}
+
+function die() {
+	log_fatal "$*"
+	exit 1
+}
+
+function remember_temp_file() {
+	local file_path="${1}"
+	temp_files+=("${file_path}")
+}
+
+function print_usage() {
+	cat <<EOF
+Usage:
+  checksum-audit.sh [options]
+
+Options:
+  -a, --alias <name>        MinIO alias configured in mc (default: env MC_ALIAS or "local")
+  -b, --bucket <name>       Bucket to audit (required; env fallback: MC_BUCKET)
+  -p, --prefix <prefix>     Optional key prefix to scope the scan, no leading slash (default: env MC_PREFIX or "files")
+  -r, --sample-rate <pct>   Sampling percentage in (0,100] (default: env SAMPLE_RATE or "1")
+  -l, --log-file <path>     Log file path (default: env LOG_FILE or "./checksum_audit.log")
+  -f, --fail-fast <bool>    true|false (default: env FAIL_FAST or "true")
+      --no-fail-fast        Shortcut for --fail-fast false
+  -h, --help                Show this help and exit
+
+Examples:
+  checksum-audit.sh --bucket spectrumx
+  checksum-audit.sh -b spectrumx -r 0.5 --fail-fast false
+  MC_BUCKET=spectrumx checksum-audit.sh -r 5
+EOF
+}
+
+function parse_args() {
+	while [[ $# -gt 0 ]]; do
+		case "${1}" in
+		-h | --help)
+			print_usage
+			exit 0
+			;;
+		-a | --alias)
+			[[ $# -lt 2 ]] && die "Missing value for ${1}"
+			MC_ALIAS="${2}"
+			shift 2
+			;;
+		-b | --bucket)
+			[[ $# -lt 2 ]] && die "Missing value for ${1}"
+			MC_BUCKET="${2}"
+			shift 2
+			;;
+		-p | --prefix)
+			[[ $# -lt 2 ]] && die "Missing value for ${1}"
+			MC_PREFIX="${2}"
+			shift 2
+			;;
+		-r | --sample-rate)
+			[[ $# -lt 2 ]] && die "Missing value for ${1}"
+			SAMPLE_RATE="${2}"
+			shift 2
+			;;
+		-l | --log-file)
+			[[ $# -lt 2 ]] && die "Missing value for ${1}"
+			LOG_FILE="${2}"
+			shift 2
+			;;
+		-f | --fail-fast)
+			[[ $# -lt 2 ]] && die "Missing value for ${1}"
+			FAIL_FAST="${2}"
+			shift 2
+			;;
+		--no-fail-fast)
+			FAIL_FAST="false"
+			shift
+			;;
+		--)
+			shift
+			break
+			;;
+		-*)
+			die "Unknown option: ${1}. Use --help for usage."
+			;;
+		*)
+			die "Unexpected positional argument: ${1}. Use --help for usage."
+			;;
+		esac
+	done
+
+	if [[ $# -gt 0 ]]; then
+		die "Unexpected positional argument: ${1}. Use --help for usage."
+	fi
+}
+
+function require_commands() {
+	for cmd in mc b3sum awk date jq mktemp; do
+		command -v "${cmd}" >/dev/null 2>&1 || die "Required command not found: '${cmd}'"
+	done
+}
+
+function validate_sample_rate() {
+	if ! awk -v rate="${SAMPLE_RATE}" 'BEGIN { exit !(rate > 0 && rate <= 100) }'; then
+		die "SAMPLE_RATE must be a number between 0 (exclusive) and 100. Got: '${SAMPLE_RATE}'"
+	fi
+	if ! mc alias list "${MC_ALIAS}" >/dev/null 2>&1; then
+		log_error "Available MinIO aliases:"
+		mc alias list
+		die "MinIO alias '${MC_ALIAS}' not found in 'mc' configuration. Pass it with --alias or set MC_ALIAS environment variable."
+	fi
+}
+
+function validate_fail_fast() {
+	case "${FAIL_FAST}" in
+	true | false) ;;
+	*) die "FAIL_FAST must be 'true' or 'false'. Got: '${FAIL_FAST}'" ;;
+	esac
+}
+
+function validate_config() {
+	[[ -z "${MC_BUCKET}" ]] && die "MC_BUCKET must be set, or specified with --bucket <name>"
+	validate_sample_rate
+	validate_fail_fast
+}
+
+function set_target() {
+	target="${MC_ALIAS}/${MC_BUCKET}"
+}
+
+function build_find_path() {
+	local normalized_prefix="${MC_PREFIX#/}"
+	normalized_prefix="${normalized_prefix%/}"
+
+	if [[ -z "${normalized_prefix}" ]]; then
+		FIND_PATH=""
+		return
+	fi
+
+	FIND_PATH="${normalized_prefix}/*"
+}
+
+function is_fail_fast() {
+	[[ "${FAIL_FAST}" == "true" ]]
+}
+
+function print_start_banner() {
+	log_info "════════════════════════════════════════"
+	log_info "MinIO BLAKE3 Checksum Audit — Starting"
+	log_info "Target    : ${target}"
+	log_info "Sample    : ${SAMPLE_RATE}%"
+	log_info "Fail-fast : ${FAIL_FAST}"
+	log_info "Prefix    : ${MC_PREFIX}"
+	log_info "Path      : ${FIND_PATH:-<none>}"
+	log_info "Regex     : ${OBJECT_REGEX}"
+	log_info "Log file  : ${LOG_FILE}"
+	log_info "════════════════════════════════════════"
+}
+
+function count_lines() {
+	local input_file="${1}"
+	awk 'END { print NR + 0 }' "${input_file}"
+}
+
+function filtered_objects() {
+	local output_file="${1}"
+	if [[ -n "${FIND_PATH}" ]]; then
+		log_info "mc find \"${target}\" --path \"${FIND_PATH}\" --regex \"${OBJECT_REGEX}\" > ${output_file}"
+		mc find "${target}" --path "${FIND_PATH}" --regex "${OBJECT_REGEX}" 2>>"${LOG_FILE}" >"${output_file}"
+		return
+	fi
+
+	log_info "mc find \"${target}\" --regex \"${OBJECT_REGEX}\" > ${output_file}"
+	mc find "${target}" --regex "${OBJECT_REGEX}" 2>>"${LOG_FILE}" >"${output_file}"
+}
+
+function sampled_objects() {
+	local filtered_file="${1}"
+	local sampled_file="${2}"
+
+	awk \
+		-v rate="${SAMPLE_RATE}" \
+		-v seed="$(($$ + $(date +%s)))" \
+		'BEGIN { srand(seed) } rand() * 100 < rate { print }' \
+		"${filtered_file}" >"${sampled_file}"
+}
+
+function stream_hash() {
+	local object_path="${1}"
+	mc cat "${object_path}" 2>>"${LOG_FILE}" | b3sum --no-names 2>>"${LOG_FILE}"
+}
+
+function on_stream_failure() {
+	local object_path="${1}"
+	log_error "STREAM_FAIL — could not read or hash object: ${object_path}"
+	errors=$((errors + 1))
+	if is_fail_fast; then
+		log_error "Aborting early (FAIL_FAST=true)."
+		exit 1
+	fi
+}
+
+function on_mismatch() {
+	local object_path="${1}"
+	local expected_hash="${2}"
+	local actual_hash="${3}"
+	log_error "MISMATCH — object  : ${object_path}"
+	log_error "MISMATCH — expected: ${expected_hash}"
+	log_error "MISMATCH — actual  : ${actual_hash}"
+	errors=$((errors + 1))
+	if is_fail_fast; then
+		log_error "Aborting early (FAIL_FAST=true)."
+		exit 1
+	fi
+}
+
+function verify_object() {
+	local object_path="${1}"
+	local base_name="${object_path##*/}"
+	local expected_hash="${base_name%%_*}"
+	local actual_hash=""
+
+	sampled=$((sampled + 1))
+	# log_info "Verifying [#${sampled}]: ${object_path}"
+
+	if ! actual_hash="$(stream_hash "${object_path}")"; then
+		on_stream_failure "${object_path}"
+		return
+	fi
+
+	checked=$((checked + 1))
+
+	if [[ "${actual_hash}" != "${expected_hash}" ]]; then
+		on_mismatch "${object_path}" "${expected_hash}" "${actual_hash}"
+		return
+	fi
+
+	log_info "OK — ${object_path}"
+}
+
+function verify_objects_from_file() {
+	local sampled_file="${1}"
+	while IFS= read -r object_path; do
+		verify_object "${object_path}"
+	done <"${sampled_file}"
+}
+
+function audit_objects() {
+	local filtered_file=""
+	local sampled_file=""
+	local filtered_count=0
+	local sampled_count=0
+
+	filtered_file="$(mktemp)"
+	remember_temp_file "${filtered_file}"
+	sampled_file="$(mktemp)"
+	remember_temp_file "${sampled_file}"
+
+	log_info "Running regex filter with: ${OBJECT_REGEX}"
+	filtered_objects "${filtered_file}"
+	filtered_count="$(count_lines "${filtered_file}")"
+	log_info "Objects after regex filter: ${filtered_count}"
+
+	if ((filtered_count == 0)); then
+		log_warn "No objects matched the regex filter. Skipping verification stage."
+		return
+	fi
+
+	sampled_objects "${filtered_file}" "${sampled_file}"
+	sampled_count="$(count_lines "${sampled_file}")"
+	log_info "Objects after sampling: ${sampled_count}"
+
+	if ((sampled_count == 0)); then
+		log_warn "No objects remained after sampling. Skipping verification stage."
+		return
+	fi
+
+	verify_objects_from_file "${sampled_file}"
+}
+
+function print_summary() {
+	local stream_errors=$((sampled - checked))
+
+	log_info "════════════════════════════════════════"
+	log_info "Audit Complete"
+	log_info "Sampled       : ${sampled}"
+	log_info "Hashed        : ${checked}"
+	log_info "Stream errors : ${stream_errors}"
+	log_info "Mismatches    : ${errors}"
+	log_info "════════════════════════════════════════"
+}
+
+function finalize_result() {
+	if [[ $sampled -eq 0 ]]; then
+		log_warn "No objects were sampled. Bucket may be empty or prefix too narrow."
+		log_info "Total objects in bucket ${MC_BUCKET}:"
+		mc stat "${MC_ALIAS}/${MC_BUCKET}" --json 2>>"${LOG_FILE}" |
+			jq '.Usage.objectsCount' 2>>"${LOG_FILE}" ||
+			log_warn "Could not retrieve object count for bucket."
+		exit 0
+	fi
+
+	if [[ ${errors} -gt 0 ]]; then
+		log_error "Audit FAILED — ${errors} error(s) detected across ${checked} verified objects."
+		exit 1
+	fi
+
+	log_info "Audit PASSED — all ${checked} sampled objects are clean."
+	exit 0
+}
+
+function main() {
+	trap cleanup_temp_files EXIT INT TERM
+	init_colors
+	parse_args "$@"
+	require_commands
+	validate_config
+	set_target
+	build_find_path
+	print_start_banner
+	audit_objects
+	print_summary
+	finalize_result
+}
+
+main "$@"
diff --git a/seaweedfs/scripts/common.sh b/seaweedfs/scripts/common.sh
new file mode 100644
index 000000000..d63e55ce5
--- /dev/null
+++ b/seaweedfs/scripts/common.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+# Script with helper functions to be sourced in other scripts.
+
+# ensure the script is sourced, not executed
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    echo "This script must be sourced. Use: source ${BASH_SOURCE[0]}" >&2
+    exit 1
+fi
+
+function ts() {
+    local timestamp
+    timestamp=$(date +"%Y-%m-%d %H:%M:%S")
+    echo "${timestamp}"
+}
+
+function log_msg() {
+    local msg="$1"
+    echo -e "$(ts) | INFO    | ${msg}"
+}
+
+function log_header() {
+    local msg="$1"
+    echo -e "$(ts) | \033[0;34m======= ${msg}\033[0m"
+}
+
+function log_success() {
+    local msg="$1"
+    echo -e "$(ts) | \033[0;32mSUCCESS\033[0m | ${msg}"
+}
+
+function log_error() {
+    local msg="$1"
+    echo -e "$(ts) | \033[0;31mERROR   | ${msg}\033[0m" >&2
+}
+
+function log_warning() {
+    local msg="$1"
+    echo -e "$(ts) | \033[0;33mWARNING | ${msg}\033[0m" >&2
+}
+
+function log_fatal_and_exit() {
+    local msg="$1"
+    log_error "${msg}"
+    exit 1
+}
+
+function log_error_and_skip() {
+    local msg="$1"
+    log_error "${msg}"
+    log_msg "Skipping this step and continuing..."
+}
diff --git a/seaweedfs/scripts/deploy.sh b/seaweedfs/scripts/deploy.sh
new file mode 100755
index 000000000..2d7299de7
--- /dev/null
+++ b/seaweedfs/scripts/deploy.sh
@@ -0,0 +1,353 @@
+#!/usr/bin/env bash
+# Deploy the SeaweedFS stack: start services, configure S3 credentials, create bucket.
+#
+# By default, S3 credentials are read from .envs/<env>/storage.env (PRIMARY vars).
+# Pass --sfs-env to override the credentials file path (used by gateway/deploy.sh).
+#
+# ENVIRONMENT VARIABLES:
+#   SFS_FORCE_SECRETS  - Set to 'true' to overwrite existing .envs files (default: false)
+#   SFS_SKIP_SETUP     - Set to 'true' to skip credential/bucket setup (default: false)
+#
+# USAGE EXAMPLES:
+#   ./deploy.sh local
+#   ./deploy.sh ci
+#   ./deploy.sh production
+#   ./deploy.sh --sfs-env /path/to/storage.env local
+
+set -euo pipefail
+
+SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+SFS_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
+
+# shellcheck disable=SC1091
+source "${SCRIPT_DIR}/common.sh"
+
+readonly DEFAULT_MAX_WAIT=60
+
+function show_usage() {
+	echo -e "Usage: ${0} [OPTIONS] <local|production|ci>"
+	echo ""
+	echo "Deploy the SeaweedFS stack: start services, configure S3 credentials, create bucket."
+	echo ""
+	echo -e "\e[34mOPTIONS:\e[0m"
+	echo "    --sfs-env <file>    Path to env file with S3 credentials"
+	echo "                        (defaults to .envs/<env>/storage.env)"
+	echo "    --skip-setup        Skip credential and bucket setup"
+	echo "    -h, --help          Show this help message"
+	echo ""
+	echo -e "\e[34mARGUMENTS:\e[0m"
+	echo "    <local|production|ci>   Target environment to deploy"
+	echo ""
+	echo -e "\e[34mCREDENTIALS FILE FORMAT:\e[0m"
+	echo "    PRIMARY_ACCESS_KEY_ID=<key>"
+	echo "    PRIMARY_SECRET_ACCESS_KEY=<secret>"
+	echo "    PRIMARY_STORAGE_BUCKET_NAME=<bucket>"
+	echo ""
+	echo -e "\e[34mEXAMPLES:\e[0m"
+	echo "    ${0} local"
+	echo "    ${0} ci"
+	echo "    ${0} --sfs-env .envs/production/storage.env production"
+	echo ""
+	exit 0
+}
+
+# Return 0 if running as root, 1 otherwise
+function is_root() {
+	[[ $(id -u) -eq 0 ]]
+}
+
+function setup_data_dirs() {
+	local env_type="$1"
+	if [[ "${env_type}" != "local" ]]; then
+		return 0
+	fi
+
+	log_header "Local Data Directory Setup"
+	log_msg "Creating data directories..."
+	local uid gid
+	uid=$(id -u)
+	gid=$(id -g)
+	# Export for compose (UID/GID are readonly in bash, so we use HOST_UID/HOST_GID)
+	export HOST_UID="${uid}" HOST_GID="${gid}"
+	mkdir -p "${SFS_ROOT}/data/volumes" "${SFS_ROOT}/data/filer/filerldb2"
+	# Dirs created by current user → already owned by ${uid}:${gid}
+	# Container also runs as ${uid}:${gid} via compose user: ${HOST_UID}:${HOST_GID}
+	# → no chown needed.
+
+	log_success "Data directories ready (uid=${uid}, gid=${gid})"
+}
+
+function start_stack() {
+	log_header "Starting SFS stack"
+	log_msg "Starting stack..."
+	{
+		just build
+		just up
+	} &>/dev/null &
+}
+
+function env_prefix() {
+	if [[ "$1" == "production" ]]; then
+		echo "prod"
+	else
+		echo "$1"
+	fi
+}
+
+function wait_for_s3_health() {
+	local env_type="$1"
+	local max_attempts="${2:-${DEFAULT_MAX_WAIT}}"
+	local prefix
+	prefix=$(env_prefix "${env_type}")
+	local s3_container="sds-gateway-${prefix}-sfs-s3"
+	local s3_port="${SFS_S3_PORT:-8333}"
+
+	log_msg "Waiting for S3 gateway to be healthy (container: ${s3_container})..."
+
+	local attempt=1
+	while [[ ${attempt} -le ${max_attempts} ]]; do
+		if docker exec "${s3_container}" curl -fsS "http://localhost:${s3_port}/healthz" >/dev/null 2>&1; then
+			log_success "S3 gateway is healthy"
+			return 0
+		fi
+
+		if [[ $((attempt % 10)) -eq 0 ]]; then
+			log_msg "Still waiting... (attempt ${attempt}/${max_attempts})"
+			log_msg "=== S3 gateway logs (last 20 lines) ==="
+			docker logs --tail 20 "${s3_container}" 2>&1 | while IFS= read -r line; do
+				log_msg "  ${line}"
+			done
+			log_msg "========================================="
+		fi
+
+		sleep 2
+		attempt=$((attempt + 1))
+	done
+
+	log_error "S3 gateway '${s3_container}' did not become healthy in time"
+	return 1
+}
+
+function configure_s3_credentials() {
+	local env_type="$1"
+	local access_key="$2"
+	local secret_key="$3"
+	local prefix
+	prefix=$(env_prefix "${env_type}")
+	local filer_container="sds-gateway-${prefix}-sfs-filer"
+	local master_container="sds-gateway-${prefix}-sfs-master"
+
+	log_header "Configuring S3 Credentials"
+	log_msg "Configuring S3 identity '${access_key}' on cluster..."
+
+	printf '%s\n' "s3.configure -apply -user ${access_key} -access_key ${access_key} -secret_key ${secret_key} -actions Admin -buckets *" |
+		docker exec -i "${filer_container}" weed shell \
+			-master="${master_container}:9333"
+
+	log_success "S3 credentials configured"
+}
+
+function create_bucket() {
+	local env_type="$1"
+	local bucket_name="$2"
+	local access_key="$3"
+	local secret_key="$4"
+	local prefix
+	prefix=$(env_prefix "${env_type}")
+	local filer_container="sds-gateway-${prefix}-sfs-filer"
+	local master_container="sds-gateway-${prefix}-sfs-master"
+
+	log_header "Creating S3 Bucket"
+	log_msg "Creating bucket '${bucket_name}'..."
+
+	printf '%s\n' "s3.bucket.create -name ${bucket_name}" |
+		docker exec -i "${filer_container}" weed shell \
+			-master="${master_container}:9333"
+
+	log_success "Bucket '${bucket_name}' ready"
+}
+
+function setup_prod_hostnames() {
+	local env_type="$1"
+	local example_file="${SCRIPT_DIR}/prod-hostnames.example.env"
+	local target_file="${SCRIPT_DIR}/prod-hostnames.env"
+
+	if [[ -f "${example_file}" && ! -f "${target_file}" ]]; then
+		cp "${example_file}" "${target_file}"
+		log_msg "Created: ${target_file}"
+	fi
+
+	if [[ "${env_type}" == "production" && -f "${target_file}" ]]; then
+		local current_hostname
+		current_hostname=$(hostname)
+		local rel_path
+		rel_path=$(realpath --relative-to="." "${target_file}")
+
+		if [[ -n "${current_hostname}" ]]; then
+			if ! grep -Fxq "${current_hostname}" "${target_file}"; then
+				log_error "Current hostname '${current_hostname}' not listed in '${rel_path}'."
+				log_msg "Add it:\n\n\techo '${current_hostname}' >> ${rel_path}"
+				exit 1
+			fi
+		fi
+	fi
+}
+
+function load_credentials() {
+	local env_file="$1"
+
+	if [[ ! -f "${env_file}" ]]; then
+		log_error "Credentials file not found: ${env_file}"
+		return 1
+	fi
+
+	local access_key secret_key bucket_name
+	access_key=$(grep -E '^PRIMARY_ACCESS_KEY_ID=' "${env_file}" | cut -d'=' -f2-)
+	secret_key=$(grep -E '^PRIMARY_SECRET_ACCESS_KEY=' "${env_file}" | cut -d'=' -f2-)
+	bucket_name=$(grep -E '^PRIMARY_STORAGE_BUCKET_NAME=' "${env_file}" | cut -d'=' -f2-)
+
+	if [[ -z "${access_key}" || -z "${secret_key}" || -z "${bucket_name}" ]]; then
+		log_error "Missing required credentials in ${env_file}"
+		log_msg "Expected: PRIMARY_ACCESS_KEY_ID, PRIMARY_SECRET_ACCESS_KEY, PRIMARY_STORAGE_BUCKET_NAME"
+		return 1
+	fi
+
+	printf '%s\n%s\n%s' "${access_key}" "${secret_key}" "${bucket_name}"
+}
+
+function load_secondary_credentials() {
+	local env_file="$1"
+
+	if [[ ! -f "${env_file}" ]]; then
+		return 1
+	fi
+
+	local access_key secret_key
+	access_key=$(grep -E '^SECONDARY_ACCESS_KEY_ID=' "${env_file}" | cut -d'=' -f2-)
+	secret_key=$(grep -E '^SECONDARY_SECRET_ACCESS_KEY=' "${env_file}" | cut -d'=' -f2-)
+
+	# If neither SECONDARY credential is set, the store is not configured
+	if [[ -z "${access_key}" || -z "${secret_key}" ]]; then
+		return 1
+	fi
+
+	# Filter out placeholder/admin defaults that indicate unset creds
+	if [[ "${access_key}" == "admin" && "${secret_key}" == "admin" ]]; then
+		return 1
+	fi
+
+	printf '%s\n%s' "${access_key}" "${secret_key}"
+}
+
+function parse_arguments() {
+	local -n _args_ref=$1
+	shift
+
+	# Ensure key exists (shellcheck can't follow nameref)
+	if [[ -z "${_args_ref["skip_setup"]+x}" ]]; then
+		_args_ref["skip_setup"]="false"
+	fi
+	if [[ -z "${_args_ref["sfs_env"]+x}" ]]; then
+		_args_ref["sfs_env"]=""
+	fi
+	if [[ "${SFS_SKIP_SETUP:-}" == "true" ]]; then
+		_args_ref["skip_setup"]="true"
+	fi
+
+	while [[ $# -gt 0 ]]; do
+		case "$1" in
+		--sfs-env)
+			if [[ -z "${2:-}" ]]; then
+				log_error "Missing value for --sfs-env"
+				show_usage
+			fi
+			_args_ref["sfs_env"]="$2"
+			shift 2
+			;;
+		--skip-setup)
+			_args_ref["skip_setup"]="true"
+			shift
+			;;
+		-h | --help)
+			show_usage
+			;;
+		local | production | ci)
+			_args_ref["env_type"]="$1"
+			shift
+			;;
+		*)
+			log_error "Unknown argument: $1"
+			show_usage
+			;;
+		esac
+	done
+
+	if [[ -z "${_args_ref["env_type"]}" ]]; then
+		log_error "Environment type required (local, production, or ci)"
+		show_usage
+	fi
+}
+
+function assert_selected_env() {
+	local env_type="$1"
+	local selected_env
+	selected_env="$(just env | awk -F"'" '/Environment:/{print $2}')"
+	if [[ "${env_type}" != "${selected_env}" ]]; then
+		log_error "Selected environment >${selected_env}< does not match argument >${env_type}<"
+		log_msg "If you are attempting to run e.g. a CI env locally, tear down your local stack,"
+		log_msg "then run the deploy script with CI=1, e.g.:\n\n\tCI=1 ${0} ci\n"
+		exit 1
+	fi
+}
+
+function main() {
+	declare -A args=(
+		[env_type]=""
+		[skip_setup]="false"
+		[sfs_env]=""
+	)
+
+	parse_arguments args "$@"
+
+	cd "${SFS_ROOT}"
+	log_header "SeaweedFS Deployment - ${args[env_type]} environment"
+
+	assert_selected_env "${args[env_type]}"
+	setup_prod_hostnames "${args[env_type]}"
+	setup_data_dirs "${args[env_type]}"
+	start_stack "${args[env_type]}"
+	wait_for_s3_health "${args[env_type]}" "${DEFAULT_MAX_WAIT}"
+
+	if [[ "${args[skip_setup]}" == "false" ]]; then
+		local creds
+		local sfs_env_path="${args[sfs_env]}"
+		creds=$(just load_credentials "${sfs_env_path}")
+		local access_key secret_key bucket_name
+		access_key=$(echo "${creds}" | sed -n '1p')
+		secret_key=$(echo "${creds}" | sed -n '2p')
+		bucket_name=$(echo "${creds}" | sed -n '3p')
+
+		configure_s3_credentials "${args[env_type]}" "${access_key}" "${secret_key}"
+		create_bucket "${args[env_type]}" "${bucket_name}" "${access_key}" "${secret_key}"
+
+		# Also configure SECONDARY S3 identity if credentials are available (local/dev)
+		local secondary_creds
+		secondary_creds=$(just load_secondary_credentials "${sfs_env_path}") || true
+		if [[ -n "${secondary_creds}" ]]; then
+			local sec_access_key sec_secret_key
+			sec_access_key=$(echo "${secondary_creds}" | sed -n '1p')
+			sec_secret_key=$(echo "${secondary_creds}" | sed -n '2p')
+			log_msg "Configuring SECONDARY S3 identity on SeaweedFS..."
+			configure_s3_credentials "${args[env_type]}" "${sec_access_key}" "${sec_secret_key}"
+			log_success "SECONDARY S3 identity configured on SeaweedFS"
+		fi
+	else
+		log_msg "Skipping credential and bucket setup (--skip-setup)"
+	fi
+
+	log_header "SeaweedFS deployment complete"
+	log_msg "S3 endpoint: http://localhost:${SFS_S3_PORT:-8333}"
+	log_msg "File browser: http://localhost:${SFS_FILER_PORT:-8888}"
+}
+
+main "$@"
diff --git a/seaweedfs/scripts/env-selection.sh b/seaweedfs/scripts/env-selection.sh
new file mode 100755
index 000000000..430856c6b
--- /dev/null
+++ b/seaweedfs/scripts/env-selection.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+set -euo pipefail
+IFS=$'\n\t'
+
+function is_production_host() {
+	local script_dir
+	script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+	local host
+	host=$(hostname)
+	local prod_hosts_file="${script_dir}/prod-hostnames.env"
+
+	if [[ ! -f "${prod_hosts_file}" ]]; then
+		printf '\033[33mProduction host list not found at %s: defaulting to local\033[0m\n' "${prod_hosts_file}" >&2
+		printf 'Create this file to make the warning go away:\n\n\tcp %s/prod-hostnames.example.env %s\n\n' "${script_dir}" "${prod_hosts_file}" >&2
+		return 1
+	fi
+
+	while read -r line; do
+		line=$(echo "${line}" | xargs)
+		[[ -z "${line}" || ${line:0:1} == '#' ]] && continue
+		if [[ "${line}" == "${host}" ]]; then
+			return 0
+		fi
+	done <"${prod_hosts_file}"
+
+	return 1
+}
+
+function is_ci_env() {
+	if [[ -n "${CI:-}" ]] || [[ -n "${GITHUB_ACTIONS:-}" ]] || [[ -n "${GITLAB_CI:-}" ]] || [[ -n "${BUILD_ID:-}" ]] || [[ -n "${JENKINS_URL:-}" ]]; then
+		return 0
+	fi
+	return 1
+}
+
+function get_target_value() {
+	local target="$1"
+	local env_type="$2"
+	local local_env_file=".envs/local/sfs.env"
+	local production_env_file=".envs/production/sfs.env"
+	local ci_env_file=".envs/ci/sfs.env"
+	local value=""
+
+	case "${target}" in
+	env)
+		value="${env_type}"
+		;;
+	compose_file)
+		case "${env_type}" in
+		production) value="compose.production.yaml" ;;
+		local) value="compose.local.yaml" ;;
+		ci) value="compose.ci.yaml" ;;
+		esac
+		;;
+	env_file)
+		case "${env_type}" in
+		ci)
+			value="${ci_env_file}"
+			;;
+		local)
+			value="${local_env_file}"
+			;;
+		production)
+			value="${production_env_file}"
+			;;
+		*)
+			printf 'unsupported environment type: %s\n' "${env_type}" >&2
+			exit 1
+			;;
+		esac
+		;;
+	filer_container)
+		case "${env_type}" in
+		production) value="sds-gateway-prod-sfs-filer" ;;
+		*) value="sds-gateway-${env_type}-sfs-filer" ;;
+		esac
+		;;
+	master_container)
+		case "${env_type}" in
+		production) value="sds-gateway-prod-sfs-master" ;;
+		*) value="sds-gateway-${env_type}-sfs-master" ;;
+		esac
+		;;
+	s3_container)
+		case "${env_type}" in
+		production) value="sds-gateway-prod-sfs-s3" ;;
+		*) value="sds-gateway-${env_type}-sfs-s3" ;;
+		esac
+		;;
+	*)
+		printf 'Unknown target: %s\n' "${target}" >&2
+		exit 1
+		;;
+	esac
+
+	printf '%s' "${value}"
+}
+
+function main() {
+	if [[ $# -ne 1 ]]; then
+		printf 'usage: %s <env|compose_file|app_container|env_file>\n' "${0}" >&2
+		exit 1
+	fi
+
+	# determine the environment type
+	local target=${1:-}
+	local env_type=""
+
+	# allow explicit override via SDS_ENV (e.g., SDS_ENV=ci just env)
+	if [[ -n "${SDS_ENV:-}" ]]; then
+		case "${SDS_ENV}" in
+		ci | local | production) env_type="${SDS_ENV}" ;;
+		*)
+			printf '\033[33mUnknown SDS_ENV="%s": must be ci, local, or production\033[0m\n' "${SDS_ENV}" >&2
+			exit 1
+			;;
+		esac
+	elif is_production_host 2>/dev/null; then
+		env_type="production"
+	elif is_ci_env; then
+		env_type="ci"
+	else
+		env_type="local"
+	fi
+
+	get_target_value "${target}" "${env_type}"
+
+}
+
+main "$@"
diff --git a/seaweedfs/scripts/health-check.sh b/seaweedfs/scripts/health-check.sh
new file mode 100755
index 000000000..ebcb3bc4c
--- /dev/null
+++ b/seaweedfs/scripts/health-check.sh
@@ -0,0 +1,536 @@
+#!/usr/bin/env bash
+# seaweedfs-health-check.sh — comprehensive cluster diagnostic
+# Human-readable colored output + machine-readable JSON summary
+#
+# Usage: ./scripts/health-check.sh [--json | --silent]
+#
+# Exit codes:
+#   0  — all OK
+#   1  — failures (warnings don't fail)
+#   2  — fatal error (can't run checks)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+source "${SCRIPT_DIR}/common.sh"
+
+# ── args ────────────────────────────────────────────────────
+OUTPUT_MODE="human"
+for arg in "$@"; do
+	case "$arg" in
+	--json) OUTPUT_MODE="json" ;;
+	--silent) OUTPUT_MODE="silent" ;;
+	esac
+done
+
+# ── environment detection ───────────────────────────────────
+ENV_TYPE=""
+if "${SCRIPT_DIR}/env-selection.sh" env 2>/dev/null | grep -q "^production$" 2>/dev/null; then
+	ENV_TYPE="production"
+elif [[ -n "${CI:-}" || -n "${GITHUB_ACTIONS:-}" || -n "${GITLAB_CI:-}" || -n "${BUILD_ID:-}" ]]; then
+	ENV_TYPE="ci"
+else
+	ENV_TYPE="local"
+fi
+
+case "$ENV_TYPE" in
+production) COMPOSE_FILE="compose.production.yaml" ;;
+ci) COMPOSE_FILE="compose.ci.yaml" ;;
+*) COMPOSE_FILE="compose.local.yaml" ;;
+esac
+
+ENV_FILE=".envs/${ENV_TYPE}/sfs.env"
+COMPOSE_ABS="${PROJECT_DIR}/${COMPOSE_FILE}"
+ENV_ABS="${PROJECT_DIR}/${ENV_FILE}"
+DOCKER_COMPOSE="docker compose -f ${COMPOSE_ABS} --env-file ${ENV_ABS}"
+
+# ── detect compose profile ──────────────────────────────────
+COMPOSE_PROFILE=$(basename "${COMPOSE_FILE}" .yaml | sed 's/^compose\.//')
+
+# Service availability per profile
+HAS_WEBDAV=false
+HAS_ADMIN=false
+HAS_GRAFANA=false
+HAS_WORKER=false
+HAS_PROMETHEUS=false
+HAS_PUSHGATEWAY=false
+case "$COMPOSE_PROFILE" in
+production)
+	HAS_WEBDAV=true
+	HAS_ADMIN=true
+	HAS_GRAFANA=true
+	HAS_WORKER=true
+	HAS_PROMETHEUS=true
+	HAS_PUSHGATEWAY=true
+	;;
+ci)
+	HAS_WEBDAV=true
+	HAS_ADMIN=false
+	HAS_GRAFANA=false
+	HAS_WORKER=false
+	HAS_PROMETHEUS=true
+	HAS_PUSHGATEWAY=false
+	;;
+local)
+	HAS_WEBDAV=true
+	HAS_ADMIN=false
+	HAS_GRAFANA=false
+	HAS_WORKER=false
+	HAS_PROMETHEUS=true
+	HAS_PUSHGATEWAY=false
+	;;
+esac
+
+# Volume server config per profile
+case "$COMPOSE_PROFILE" in
+production)
+	VOL_COUNT=5
+	VOL_BASE_PORT=8081
+	VOL_BASE_GRPC=18081
+	DISK_BASE="/disk"
+	;;
+*)
+	VOL_COUNT=1
+	VOL_BASE_PORT=8080
+	VOL_BASE_GRPC=18080
+	DISK_BASE=""
+	;;
+esac
+
+# Load custom ports from env file
+if [[ -f "$ENV_ABS" ]]; then
+	SFS_FILER_PORT=$(grep '^SFS_FILER_PORT=' "$ENV_ABS" | cut -d= -f2 || echo "8888")
+	SFS_WEBDAV_PORT=$(grep '^SFS_WEBDAV_PORT=' "$ENV_ABS" | cut -d= -f2 || echo "7333")
+	SFS_PROM_HOST_PORT=$(grep '^SFS_PROMETHEUS_HOST_PORT=' "$ENV_ABS" | cut -d= -f2 || echo "9090")
+fi
+
+# ── counters ────────────────────────────────────────────────
+TOTAL=0
+OK=0
+WARN=0
+FAIL=0
+JSON_CHECKS="[]"
+
+add_check() {
+	local name="$1" status="$2" detail="${3:-}"
+	TOTAL=$((TOTAL + 1))
+	case "$status" in
+	ok) OK=$((OK + 1)) ;;
+	warn) WARN=$((WARN + 1)) ;;
+	fail) FAIL=$((FAIL + 1)) ;;
+	esac
+	JSON_CHECKS=$(echo "$JSON_CHECKS" | jq --arg n "$name" --arg s "$status" --arg d "$detail" \
+		'. + [{"name": $n, "status": $s, "detail": $d}]')
+	if [[ "$OUTPUT_MODE" == "human" ]]; then
+		case "$status" in
+		ok) log_success "${name}" ;;
+		warn) log_msg "${name} [${YELLOW}⚠ ${status}${RESET}]" ;;
+		fail) log_error "${name}" ;;
+		esac
+	fi
+}
+
+YELLOW='\033[0;33m'
+RESET='\033[0m'
+
+curl_ok() { curl -fsS --max-time 5 "$@" >/dev/null 2>&1; }
+curl_json() { curl -fsS --max-time 5 "$@" 2>/dev/null || echo '{}'; }
+
+output_header() {
+	if [[ "$OUTPUT_MODE" == "human" ]]; then
+		log_header "$1"
+	fi
+}
+
+# ─────────────────────────────────────────────────────────────
+output_header "0. PRELIMINARY"
+
+if [[ -f "$COMPOSE_ABS" ]]; then
+	add_check "Compose file exists" "ok" "$(basename "$COMPOSE_ABS")"
+else
+	add_check "Compose file exists" "fail" "$(basename "$COMPOSE_ABS") not found"
+	log_fatal_and_exit "Compose file not found: $COMPOSE_ABS"
+fi
+
+if [[ -f "$ENV_ABS" ]]; then
+	add_check "Env file exists" "ok" "$(basename "$ENV_ABS")"
+else
+	add_check "Env file exists" "warn" "$(basename "$ENV_ABS") not found (may use docker secrets)"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "1. CONTAINER STATUS"
+
+SERVICES_LIST=$(${DOCKER_COMPOSE} ps --format '{{.Service}}' 2>/dev/null || true)
+
+if [[ -z "$SERVICES_LIST" ]]; then
+	add_check "Compose stack running" "fail" "no services"
+else
+	SVC_COUNT=$(echo "$SERVICES_LIST" | wc -l)
+	add_check "Compose stack running" "ok" "${SVC_COUNT} service(s)"
+	while IFS= read -r svc; do
+		svc_health=$(${DOCKER_COMPOSE} ps --format '{{.Service}}|{{.Health}}|{{.Status}}' 2>/dev/null | grep "^${svc}|" || true)
+		if [[ -z "$svc_health" ]]; then
+			add_check "Container: $svc" "warn" "no health output"
+			continue
+		fi
+		health=$(echo "$svc_health" | cut -d'|' -f2)
+		status=$(echo "$svc_health" | cut -d'|' -f3)
+		if echo "$health" | grep -qi "healthy\|none"; then
+			add_check "Container: $svc" "ok" "$health / $status"
+		elif echo "$status" | grep -qi "up\|running"; then
+			add_check "Container: $svc" "ok" "no healthcheck / $status"
+		else
+			add_check "Container: $svc" "fail" "$health / $status"
+		fi
+	done <<<"$SERVICES_LIST"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "2. MASTER"
+
+if curl_ok http://localhost:9333/cluster/status; then
+	add_check "Master HTTP (9333)" "ok" ""
+else
+	add_check "Master HTTP (9333)" "fail" "unreachable"
+fi
+
+if curl_ok http://localhost:19333/debug/vars; then
+	add_check "Master gRPC (19333)" "ok" ""
+else
+	add_check "Master gRPC (19333)" "warn" "unreachable (may be normal)"
+fi
+
+MASTER_JSON=$(curl_json http://localhost:9333/cluster/status)
+MASTER_LEADER=$(echo "$MASTER_JSON" | jq -r '.Leader // "unknown"' 2>/dev/null)
+MASTER_IS_LEADER=$(echo "$MASTER_JSON" | jq -r '.IsLeader // "unknown"' 2>/dev/null)
+MASTER_MAX_VOL=$(echo "$MASTER_JSON" | jq -r '.MaxVolumeId // "unknown"' 2>/dev/null)
+add_check "Master topology" "ok" "leader=${MASTER_LEADER}, isLeader=${MASTER_IS_LEADER}, maxVolId=${MASTER_MAX_VOL}"
+
+# ─────────────────────────────────────────────────────────────
+output_header "3. VOLUME SERVERS"
+
+for i in $(seq 1 $VOL_COUNT); do
+	port=$((VOL_BASE_PORT + i - 1))
+	grpc_port=$((VOL_BASE_GRPC + i - 1))
+
+	if [[ "$COMPOSE_PROFILE" == "local" ]]; then
+		svc_name="sds-gateway-${ENV_TYPE}-sfs-volume"
+	else
+		svc_name="sds-gateway-${ENV_TYPE}-sfs-volume${i}"
+	fi
+
+	if curl_ok "http://localhost:${port}/healthz"; then
+		add_check "${svc_name} HTTP (${port})" "ok" ""
+	else
+		add_check "${svc_name} HTTP (${port})" "fail" "healthz unreachable"
+	fi
+
+	if curl_ok "http://localhost:${grpc_port}/debug/vars"; then
+		add_check "${svc_name} gRPC (${grpc_port})" "ok" ""
+	else
+		add_check "${svc_name} gRPC (${grpc_port})" "warn" "debug/vars unreachable"
+	fi
+done
+
+# ─────────────────────────────────────────────────────────────
+output_header "4. CLUSTER INFO"
+
+if [[ "$MASTER_JSON" != "{}" ]]; then
+	# Try to get volume/filer info from master (only available in some SeaweedFS versions)
+	VOL_SERVERS=$(echo "$MASTER_JSON" | jq '[.Volumes[]? // {} | .url // empty] | length' 2>/dev/null || echo "-1")
+	FILER_COUNT=$(echo "$MASTER_JSON" | jq '.Filervers | length // .filers | length' 2>/dev/null || echo "-1")
+
+	if [[ "$VOL_SERVERS" -eq -1 ]]; then
+		add_check "Volume servers registered" "warn" "master JSON has no Volumes field (may be normal)"
+	elif [[ "$VOL_SERVERS" -eq "$VOL_COUNT" ]]; then
+		add_check "Volume servers registered" "ok" "${VOL_SERVERS}/${VOL_COUNT}"
+	else
+		add_check "Volume servers registered" "warn" "master reports ${VOL_SERVERS}, expected ${VOL_COUNT}"
+	fi
+
+	if [[ "$FILER_COUNT" -eq -1 || "$FILER_COUNT" -eq 0 ]]; then
+		add_check "Filers registered" "warn" "master JSON has no Filers field (may be normal)"
+	else
+		add_check "Filers registered" "ok" "${FILER_COUNT}"
+	fi
+
+	VOL_DISTRIBUTION=$(echo "$MASTER_JSON" | jq -r '.Volumes[]? | "Volume \(.id): \(.url) DC=\(.dataCenter // "?") Rack=\(.rack // "?")"' 2>/dev/null || echo "")
+	if [[ -n "$VOL_DISTRIBUTION" ]]; then
+		add_check "Volume distribution" "ok" "$(echo "$VOL_DISTRIBUTION" | head -c 200)"
+	fi
+else
+	add_check "Cluster info" "fail" "master /cluster/status returned empty"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "5. FILER"
+
+if curl_ok "http://localhost:${SFS_FILER_PORT:-8888}/"; then
+	add_check "Filer HTTP (${SFS_FILER_PORT:-8888})" "ok" ""
+else
+	add_check "Filer HTTP (${SFS_FILER_PORT:-8888})" "fail" "unreachable"
+fi
+
+if curl_ok http://localhost:18888/; then
+	add_check "Filer gRPC (18888)" "ok" ""
+else
+	add_check "Filer gRPC (18888)" "warn" "unreachable (may be normal)"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "6. S3 GATEWAY"
+
+if curl_ok http://localhost:8333/healthz; then
+	add_check "S3 HTTP (8333)" "ok" ""
+else
+	add_check "S3 HTTP (8333)" "fail" "healthz unreachable"
+fi
+
+S3_LIST=$(curl -fsS --max-time 5 http://localhost:8333/ 2>/dev/null || echo "unavailable")
+if echo "$S3_LIST" | grep -q '<ListBucketResult' 2>/dev/null; then
+	BUCKET_COUNT=$(echo "$S3_LIST" | grep -c '<Name>' 2>/dev/null || echo "0")
+	add_check "S3 list buckets" "ok" "${BUCKET_COUNT} bucket(s)"
+elif echo "$S3_LIST" | grep -q 'unavailable\|403\|401\|405' 2>/dev/null; then
+	add_check "S3 list buckets" "warn" "auth/no-buckets (may be normal)"
+else
+	add_check "S3 list buckets" "warn" "unexpected response: $(echo "$S3_LIST" | head -c 100)"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "7. WEBDAV"
+
+if [[ "$HAS_WEBDAV" == "true" ]]; then
+	if curl_ok -o /dev/null "http://localhost:${SFS_WEBDAV_PORT:-7333}/"; then
+		add_check "WebDAV HTTP (${SFS_WEBDAV_PORT:-7333})" "ok" ""
+	else
+		# 405 may mean WebDAV is running but / is not the root endpoint
+		WEBDAV_CODE=$(curl -fsS --max-time 5 -o /dev/null -w '%{http_code}' "http://localhost:${SFS_WEBDAV_PORT:-7333}/" 2>/dev/null || echo "000")
+		if [[ "$WEBDAV_CODE" == "405" ]]; then
+			add_check "WebDAV HTTP (${SFS_WEBDAV_PORT:-7333})" "ok" "responding (405 on / is normal)"
+		else
+			add_check "WebDAV HTTP (${SFS_WEBDAV_PORT:-7333})" "warn" "unexpected status $WEBDAV_CODE"
+		fi
+	fi
+else
+	add_check "WebDAV" "warn" "not in ${COMPOSE_PROFILE} profile"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "8. ADMIN & WORKER"
+
+if [[ "$HAS_ADMIN" == "true" ]]; then
+	if curl_ok http://localhost:23646/; then
+		add_check "Admin HTTP (23646)" "ok" ""
+	else
+		add_check "Admin HTTP (23646)" "fail" "unreachable"
+	fi
+
+	WORKER_JSON=$(curl_json http://localhost:23646/admin/worker)
+	if echo "$WORKER_JSON" | jq -e 'keys | length > 0' >/dev/null 2>&1; then
+		add_check "Worker plugin" "ok" "$(echo "$WORKER_JSON" | jq -r 'keys | join(", ") // "active"' 2>/dev/null)"
+	else
+		add_check "Worker plugin" "warn" "status unknown"
+	fi
+else
+	add_check "Admin HTTP (23646)" "warn" "not in ${COMPOSE_PROFILE} profile"
+	add_check "Worker plugin" "warn" "not in ${COMPOSE_PROFILE} profile"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "9. METRICS"
+
+PROM_HTTP_PORT="${SFS_PROM_HOST_PORT:-9090}"
+
+if curl_ok "http://localhost:${PROM_HTTP_PORT}/-/healthy"; then
+	add_check "Prometheus HTTP (${PROM_HTTP_PORT})" "ok" ""
+else
+	add_check "Prometheus HTTP (${PROM_HTTP_PORT})" "warn" "unreachable (may be normal)"
+fi
+
+if [[ "$HAS_PROMETHEUS" == "true" && "$HAS_PUSHGATEWAY" == "true" ]]; then
+	if curl_ok http://localhost:9091/-/healthy; then
+		add_check "Pushgateway HTTP (9091)" "ok" ""
+	else
+		add_check "Pushgateway HTTP (9091)" "fail" "unreachable"
+	fi
+fi
+
+if [[ "$HAS_PROMETHEUS" == "true" ]]; then
+	PROM_TARGETS=$(curl_json "http://localhost:${PROM_HTTP_PORT}/api/v1/targets")
+	if echo "$PROM_TARGETS" | jq -e '.data.activeTargets | length > 0' >/dev/null 2>&1; then
+		PROM_OK=$(echo "$PROM_TARGETS" | jq '[.data.activeTargets[]? | select(.health == "up")] | length' 2>/dev/null || echo "0")
+		PROM_TOTAL=$(echo "$PROM_TARGETS" | jq '.data.activeTargets | length' 2>/dev/null || echo "0")
+		if [[ "$PROM_OK" -eq "$PROM_TOTAL" ]]; then
+			add_check "Prometheus targets" "ok" "${PROM_OK}/${PROM_TOTAL} healthy"
+		else
+			add_check "Prometheus targets" "warn" "${PROM_OK}/${PROM_TOTAL} healthy"
+		fi
+	else
+		add_check "Prometheus targets" "warn" "no active targets"
+	fi
+else
+	add_check "Prometheus targets" "warn" "not in ${COMPOSE_PROFILE} profile"
+fi
+
+if [[ "$HAS_GRAFANA" == "true" ]]; then
+	if curl_ok http://localhost:3000/api/health; then
+		GRAFANA_HEALTH=$(curl_json http://localhost:3000/api/health)
+		add_check "Grafana HTTP (3000)" "ok" "$(echo "$GRAFANA_HEALTH" | jq -r '.version // "ok"' 2>/dev/null || echo "ok")"
+	else
+		add_check "Grafana HTTP (3000)" "fail" "unreachable"
+	fi
+else
+	add_check "Grafana" "warn" "not in ${COMPOSE_PROFILE} profile"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "10. DISK SPACE"
+
+if [[ "$COMPOSE_PROFILE" == "production" && -n "$DISK_BASE" ]]; then
+	for disk in 1 2 3 4 5; do
+		if [[ -d "${DISK_BASE}${disk}/data" ]]; then
+			DF_RESULT=$(df -h "${DISK_BASE}${disk}/data" 2>/dev/null || echo "unavailable")
+			if echo "$DF_RESULT" | grep -q "Filesystem"; then
+				USE_PCT=$(echo "$DF_RESULT" | tail -1 | awk '{print $5}' | tr -d '%')
+				AVAIL=$(echo "$DF_RESULT" | tail -1 | awk '{print $4}')
+				if [[ "$USE_PCT" =~ ^[0-9]+$ ]] && [[ "$USE_PCT" -ge 90 ]]; then
+					add_check "Disk /disk${disk}/data" "warn" "${USE_PCT}% used (${AVAIL} avail)"
+				else
+					add_check "Disk /disk${disk}/data" "ok" "${USE_PCT}% used (${AVAIL} avail)"
+				fi
+			else
+				add_check "Disk /disk${disk}/data" "warn" "not mounted"
+			fi
+		else
+			add_check "Disk /disk${disk}/data" "warn" "directory not found"
+		fi
+	done
+else
+	for d in data/master data/volumes data/filer; do
+		if [[ -d "${PROJECT_DIR}/${d}" ]]; then
+			USE_PCT=$(df "${PROJECT_DIR}/${d}" 2>/dev/null | tail -1 | awk '{print $5}' || echo "?")
+			if [[ "$USE_PCT" =~ ^[0-9]+$ ]] && [[ "$USE_PCT" -ge 90 ]]; then
+				add_check "Dir ${d}" "warn" "${USE_PCT}% (high)"
+			else
+				add_check "Dir ${d}" "ok" "${USE_PCT}%"
+			fi
+		else
+			add_check "Dir ${d}" "warn" "not found"
+		fi
+	done
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "11. CROSS-SERVICE DEPENDENCIES"
+
+# Volume → master registration
+if [[ "$MASTER_JSON" != "{}" ]]; then
+	VOL_SERVERS_CHECK=$(echo "$MASTER_JSON" | jq '[.Volumes[]? // {} | .url // empty] | length' 2>/dev/null || echo "-1")
+	if [[ "$VOL_SERVERS_CHECK" -ne -1 ]]; then
+		for i in $(seq 1 $VOL_COUNT); do
+			port=$((VOL_BASE_PORT + i - 1))
+			if [[ "$COMPOSE_PROFILE" == "local" ]]; then
+				svc_name="sds-gateway-${ENV_TYPE}-sfs-volume"
+			else
+				svc_name="sds-gateway-${ENV_TYPE}-sfs-volume${i}"
+			fi
+			if [[ "$VOL_SERVERS_CHECK" -gt 0 ]]; then
+				add_check "${svc_name} → master" "ok" "registered"
+			else
+				add_check "${svc_name} → master" "warn" "not in master registry"
+			fi
+		done
+	else
+		# Fallback: master HTTP is up, assume connectivity
+		for i in $(seq 1 $VOL_COUNT); do
+			if [[ "$COMPOSE_PROFILE" == "local" ]]; then
+				svc_name="sds-gateway-${ENV_TYPE}-sfs-volume"
+			else
+				svc_name="sds-gateway-${ENV_TYPE}-sfs-volume${i}"
+			fi
+			add_check "${svc_name} → master" "ok" "master HTTP reachable"
+		done
+	fi
+fi
+
+# Filer → master connectivity
+if curl_ok "http://localhost:${SFS_FILER_PORT:-8888}/"; then
+	add_check "Filer → master" "ok" "filer responding"
+else
+	add_check "Filer → master" "fail" "filer unreachable"
+fi
+
+# S3 → filer connectivity
+S3_FILER=$(docker exec sds-gateway-${ENV_TYPE}-sfs-s3 \
+	weed s3.filer 2>/dev/null || echo "unknown")
+if [[ "$S3_FILER" != "unknown" ]]; then
+	add_check "S3 → filer" "ok" "connected to ${S3_FILER}"
+else
+	add_check "S3 → filer" "warn" "can't verify connection"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "12. DOCKER CLEANUP"
+
+RUNNING_COUNT=$(${DOCKER_COMPOSE} ps --format '{{.Service}}' 2>/dev/null | wc -l || echo "0")
+add_check "Running services" "ok" "${RUNNING_COUNT}"
+
+NETWORK_NAME="sds-gateway-${ENV_TYPE}-seaweed-net"
+ORPHANS=$(${DOCKER_COMPOSE} ps --format '{{.Name}}' 2>/dev/null || echo "")
+ORPHAN_LIST=$(docker ps -q --filter "network=${NETWORK_NAME}" 2>/dev/null | while read cid; do
+	cname=$(docker inspect --format '{{.Name}}' "$cid" 2>/dev/null | sed 's|^/||')
+	if ! echo "$ORPHANS" | grep -qw "$cname"; then
+		echo "$cname"
+	fi
+done || true)
+
+if [[ -n "$ORPHAN_LIST" ]]; then
+	add_check "Orphaned containers" "warn" "$ORPHAN_LIST"
+else
+	add_check "Orphaned containers" "ok" "none"
+fi
+
+# ─────────────────────────────────────────────────────────────
+output_header "SUMMARY"
+
+if [[ "$OUTPUT_MODE" == "human" ]]; then
+	printf "  Checks: %d  |  ✓ %d OK  |  ⚠ %d WARN  |  ✗ %d FAIL\n" "$TOTAL" "$OK" "$WARN" "$FAIL"
+fi
+
+# ── JSON output ─────────────────────────────────────────────
+if [[ "$OUTPUT_MODE" == "json" ]]; then
+	jq -n \
+		--argjson checks "$JSON_CHECKS" \
+		--arg total "$TOTAL" \
+		--arg ok "$OK" \
+		--arg warn "$WARN" \
+		--arg fail "$FAIL" \
+		--arg env "$ENV_TYPE" \
+		--arg profile "$COMPOSE_PROFILE" \
+		--arg compose_file "$COMPOSE_FILE" \
+		'{
+			env: $env,
+			profile: $profile,
+			compose_file: $compose_file,
+			total: ($total | tonumber),
+			ok: ($ok | tonumber),
+			warn: ($warn | tonumber),
+			fail: ($fail | tonumber),
+			status: (if ($fail | tonumber) > 0 then "failed" elif ($warn | tonumber) > 0 then "warning" else "ok" end),
+			checks: $checks
+		}'
+fi
+
+# ── EXIT ────────────────────────────────────────────────────
+if [[ "$FAIL" -gt 0 ]]; then
+	[[ "$OUTPUT_MODE" == "human" ]] && log_error "HEALTH CHECK FAILED"
+	exit 1
+elif [[ "$WARN" -gt 0 ]]; then
+	log_msg "HEALTH CHECK PASSED WITH WARNINGS"
+	exit 0
+else
+	log_success "ALL HEALTH CHECKS PASSED"
+	exit 0
+fi
diff --git a/seaweedfs/scripts/prod-hostnames.example.env b/seaweedfs/scripts/prod-hostnames.example.env
new file mode 100644
index 000000000..7f0613204
--- /dev/null
+++ b/seaweedfs/scripts/prod-hostnames.example.env
@@ -0,0 +1,9 @@
+# Production hostnames — one per line.
+# The deploy script checks the current hostname against this list when deploying
+# to production, preventing accidental deploys on non-production machines.
+#
+# Add the hostname of each production server below, one per line.
+# Get the hostname with: hostname
+#
+# example-prod-host-01
+# example-prod-host-02