From 6eb60969d0cb6604db4919b29304b8db5be4c5a7 Mon Sep 17 00:00:00 2001 From: Koen Date: Thu, 19 Feb 2026 13:48:23 +0200 Subject: [PATCH 01/25] =?UTF-8?q?=F0=9F=92=9A=20automate=20indexer=20deplo?= =?UTF-8?q?yment?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 227 ++++++++++++++++++ .../workflows/deploy-indexer-bluegreen.yaml | 176 ++++++++++++++ atp-indexer/src/api/handlers/sync-status.ts | 82 +++++++ atp-indexer/src/api/index.ts | 2 + .../src/api/routes/sync-status.routes.ts | 11 + scripts/init-deployment-state.sh | 114 +++++++++ staking-dashboard/bootstrap.sh | 21 +- staking-dashboard/terraform/data.tf | 1 + staking-dashboard/terraform/main.tf | 118 ++++++++- 9 files changed, 729 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/check-indexer-sync.yaml create mode 100644 .github/workflows/deploy-indexer-bluegreen.yaml create mode 100644 atp-indexer/src/api/handlers/sync-status.ts create mode 100644 atp-indexer/src/api/routes/sync-status.routes.ts create mode 100755 scripts/init-deployment-state.sh diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml new file mode 100644 index 000000000..50b85f897 --- /dev/null +++ b/.github/workflows/check-indexer-sync.yaml @@ -0,0 +1,227 @@ +name: Check Indexer Sync & Switchover + +# Cron workflow that checks if a pending blue-green switchover is ready. +# Runs every 30 minutes. When no switchover is pending, exits in <10s. +# When a pending switchover's slave indexer reports synced: +# 1. Updates the frontend CloudFront's indexer origin to point to the new live backend +# 2. Updates the S3 deployment state +# 3. Triggers a deploy of the old live backend (so both end up updated) + +on: + schedule: + - cron: '*/30 * * * *' + workflow_dispatch: + inputs: + environment: + description: "Check specific environment only (leave empty for all)" + required: false + type: choice + options: + - "" + - testnet + - prod + +permissions: + id-token: write + contents: read + actions: write + +jobs: + check-and-switch: + runs-on: ubuntu-latest + strategy: + matrix: + environment: [testnet, prod] + # For manual runs targeting a specific env, skip others + if: >- + github.event_name == 'schedule' || + inputs.environment == '' || + inputs.environment == matrix.environment + environment: ${{ matrix.environment }} + env: + STATE_BUCKET: aztec-token-sale-terraform-state + AWS_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials with GitHub OIDC + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.AWS_OIDC_ROLE_ARN }} + role-session-name: ${{ github.run_id }}-${{ matrix.environment }} + aws-region: ${{ secrets.AWS_DEFAULT_REGION }} + + - name: Read deployment state + id: state + run: | + STATE_KEY="deployment-state/${{ matrix.environment }}.json" + + if ! aws s3 cp "s3://${STATE_BUCKET}/${STATE_KEY}" /tmp/deploy-state.json 2>/dev/null; then + echo "No deployment state for ${{ matrix.environment }}, skipping" + echo "has_pending=false" >> $GITHUB_OUTPUT + exit 0 + fi + + PENDING=$(jq -r '.pending_switchover' /tmp/deploy-state.json) + if [ "$PENDING" = "null" ]; then + echo "No pending switchover for ${{ matrix.environment }}" + echo "has_pending=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "has_pending=true" >> $GITHUB_OUTPUT + + TARGET_COLOR=$(jq -r '.pending_switchover.target_color' /tmp/deploy-state.json) + STARTED_AT=$(jq -r '.pending_switchover.started_at' /tmp/deploy-state.json) + LIVE_COLOR=$(jq -r '.live_color' /tmp/deploy-state.json) + TARGET_CF_DOMAIN=$(jq -r ".colors.${TARGET_COLOR}.cf_domain" /tmp/deploy-state.json) + FRONTEND_DIST_ID=$(jq -r '.frontend_distribution_id' /tmp/deploy-state.json) + + echo "target_color=$TARGET_COLOR" >> $GITHUB_OUTPUT + echo "live_color=$LIVE_COLOR" >> $GITHUB_OUTPUT + echo "target_cf_domain=$TARGET_CF_DOMAIN" >> $GITHUB_OUTPUT + echo "frontend_dist_id=$FRONTEND_DIST_ID" >> $GITHUB_OUTPUT + echo "started_at=$STARTED_AT" >> $GITHUB_OUTPUT + + echo "Pending switchover: $LIVE_COLOR → $TARGET_COLOR (since $STARTED_AT)" + + - name: Check timeout (2 hours) + if: steps.state.outputs.has_pending == 'true' + id: timeout + run: | + STARTED_AT="${{ steps.state.outputs.started_at }}" + STARTED_EPOCH=$(date -d "$STARTED_AT" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%SZ" "$STARTED_AT" +%s) + NOW_EPOCH=$(date +%s) + ELAPSED=$(( NOW_EPOCH - STARTED_EPOCH )) + TIMEOUT=7200 # 2 hours + + if [ "$ELAPSED" -gt "$TIMEOUT" ]; then + echo "::error::Switchover timed out after $(( ELAPSED / 60 )) minutes" + echo "timed_out=true" >> $GITHUB_OUTPUT + + # Clear the pending switchover + jq '.pending_switchover = null' /tmp/deploy-state.json > /tmp/deploy-state-updated.json + aws s3 cp /tmp/deploy-state-updated.json \ + "s3://${STATE_BUCKET}/deployment-state/${{ matrix.environment }}.json" \ + --content-type "application/json" + else + echo "Elapsed: $(( ELAPSED / 60 )) minutes (timeout: $(( TIMEOUT / 60 )) minutes)" + echo "timed_out=false" >> $GITHUB_OUTPUT + fi + + - name: Check slave sync status + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' + id: sync + run: | + TARGET_CF_DOMAIN="${{ steps.state.outputs.target_cf_domain }}" + SYNC_URL="https://${TARGET_CF_DOMAIN}/api/sync-status" + + echo "Checking sync status at: $SYNC_URL" + + HTTP_RESPONSE=$(curl -s -w "\n%{http_code}" --max-time 30 "$SYNC_URL" 2>/dev/null || echo -e "\n000") + HTTP_BODY=$(echo "$HTTP_RESPONSE" | head -n -1) + HTTP_CODE=$(echo "$HTTP_RESPONSE" | tail -n 1) + + echo "HTTP Status: $HTTP_CODE" + echo "Response: $HTTP_BODY" + + if [ "$HTTP_CODE" != "200" ]; then + echo "Sync endpoint not ready (HTTP $HTTP_CODE)" + echo "is_synced=false" >> $GITHUB_OUTPUT + exit 0 + fi + + IS_SYNCED=$(echo "$HTTP_BODY" | jq -r '.synced') + BEHIND=$(echo "$HTTP_BODY" | jq -r '.behindBlocks') + HAS_DATA=$(echo "$HTTP_BODY" | jq -r '.hasData') + + echo "Synced: $IS_SYNCED | Behind: $BEHIND blocks | Has data: $HAS_DATA" + + if [ "$IS_SYNCED" = "true" ]; then + echo "is_synced=true" >> $GITHUB_OUTPUT + else + echo "is_synced=false" >> $GITHUB_OUTPUT + fi + + - name: Switch frontend CloudFront indexer origin + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' + run: | + FRONTEND_DIST_ID="${{ steps.state.outputs.frontend_dist_id }}" + NEW_ORIGIN="${{ steps.state.outputs.target_cf_domain }}" + + echo "Switching indexer origin on frontend CloudFront $FRONTEND_DIST_ID to: $NEW_ORIGIN" + + # Get current distribution config + aws cloudfront get-distribution-config --id "$FRONTEND_DIST_ID" --output json > /tmp/cf-config.json + ETAG=$(jq -r '.ETag' /tmp/cf-config.json) + + # Update the "indexerOrigin" origin's domain name (not the S3 origin) + jq --arg domain "$NEW_ORIGIN" \ + '(.DistributionConfig.Origins.Items[] | select(.Id == "indexerOrigin")).DomainName = $domain' \ + /tmp/cf-config.json | jq '.DistributionConfig' > /tmp/cf-config-updated.json + + # Apply the update + aws cloudfront update-distribution \ + --id "$FRONTEND_DIST_ID" \ + --distribution-config file:///tmp/cf-config-updated.json \ + --if-match "$ETAG" \ + --no-cli-pager + + # Invalidate /api/* cache + aws cloudfront create-invalidation \ + --distribution-id "$FRONTEND_DIST_ID" \ + --paths "/api/*" \ + --no-cli-pager + + echo "Frontend CloudFront indexer origin updated and /api/* cache invalidated" + + - name: Update deployment state + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' + run: | + NEW_LIVE="${{ steps.state.outputs.target_color }}" + + jq --arg live "$NEW_LIVE" \ + '.live_color = $live | .pending_switchover = null' \ + /tmp/deploy-state.json > /tmp/deploy-state-updated.json + + echo "Updated state:" + cat /tmp/deploy-state-updated.json + + aws s3 cp /tmp/deploy-state-updated.json \ + "s3://${STATE_BUCKET}/deployment-state/${{ matrix.environment }}.json" \ + --content-type "application/json" + + - name: Trigger deploy to old live (now slave) + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' + uses: actions/github-script@v7 + with: + script: | + const oldLive = '${{ steps.state.outputs.live_color }}'; + const env = '${{ matrix.environment }}'; + + console.log(`Triggering deploy to old live (${oldLive}) for ${env}`); + + await github.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'deploy-indexer.yaml', + ref: 'main', + inputs: { + environment: env, + green: oldLive === 'green' ? 'true' : 'false', + dry_run: 'false' + } + }); + + console.log(`Deploy triggered for ${env} ${oldLive}`); + + - name: Write summary + if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' + run: | + echo "### Switchover Complete" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "- **Environment:** ${{ matrix.environment }}" >> $GITHUB_STEP_SUMMARY + echo "- **New live:** ${{ steps.state.outputs.target_color }}" >> $GITHUB_STEP_SUMMARY + echo "- **Old live (${{ steps.state.outputs.live_color }}):** deploy triggered to update" >> $GITHUB_STEP_SUMMARY + echo "- **Frontend CloudFront:** ${{ steps.state.outputs.frontend_dist_id }}" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/deploy-indexer-bluegreen.yaml b/.github/workflows/deploy-indexer-bluegreen.yaml new file mode 100644 index 000000000..c73180954 --- /dev/null +++ b/.github/workflows/deploy-indexer-bluegreen.yaml @@ -0,0 +1,176 @@ +name: Deploy Indexer (Blue-Green) + +# Automated blue-green deployment for the ATP indexer. +# Reads deployment state from S3 to determine which color is the slave, +# deploys the indexer to the slave, and marks a switchover as pending. +# The check-indexer-sync.yaml cron workflow handles the actual switchover +# once the slave finishes re-indexing. + +on: + workflow_dispatch: + inputs: + environment: + description: "Environment to deploy to" + required: true + default: "testnet" + type: choice + options: + - testnet + - prod + dry_run: + description: "Whether to run a dry run (plan only)" + required: false + default: false + type: boolean + force: + description: "Force deploy even if a switchover is already pending" + required: false + default: false + type: boolean + +permissions: + id-token: write + contents: read + actions: read + +jobs: + deploy-to-slave: + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + env: + ENV: ${{ inputs.environment }} + DRY_RUN: ${{ inputs.dry_run }} + FORCE: ${{ inputs.force }} + + # AWS Configuration + AWS_ACCOUNT: ${{ secrets.AWS_ACCOUNT }} + AWS_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + + # Network Configuration + RPC_URL: ${{ secrets.RPC_URL }} + CHAIN_ID: ${{ vars.CHAIN_ID }} + SEPOLIA_RPC_URL: ${{ secrets.RPC_URL }} + TESTNET_RPC_URL: ${{ secrets.RPC_URL }} + + # Contract Addresses (from GitHub environment variables) + ATP_FACTORY_ADDRESS: ${{ vars.ATP_FACTORY_ADDRESS }} + ATP_FACTORY_AUCTION_ADDRESS: ${{ vars.ATP_FACTORY_AUCTION_ADDRESS }} + ATP_REGISTRY_ADDRESS: ${{ vars.ATP_REGISTRY_ADDRESS }} + ATP_REGISTRY_AUCTION_ADDRESS: ${{ vars.ATP_REGISTRY_AUCTION_ADDRESS }} + STAKING_REGISTRY_ADDRESS: ${{ vars.STAKING_REGISTRY_ADDRESS }} + ROLLUP_ADDRESS: ${{ vars.ROLLUP_ADDRESS }} + START_BLOCK: ${{ vars.ATP_FACTORY_DEPLOYMENT_BLOCK }} + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + cache: 'yarn' + cache-dependency-path: atp-indexer/yarn.lock + + - name: Install Foundry + uses: foundry-rs/foundry-toolchain@v1 + + - name: Install Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Mark repo as safe + run: git config --global --add safe.directory $GITHUB_WORKSPACE + + - name: Configure AWS credentials with GitHub OIDC + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.AWS_OIDC_ROLE_ARN }} + role-session-name: ${{ github.run_id }} + aws-region: ${{ secrets.AWS_DEFAULT_REGION }} + + - name: Verify AWS identity + run: aws sts get-caller-identity + + - name: Read deployment state from S3 + id: state + run: | + STATE_KEY="deployment-state/${{ inputs.environment }}.json" + STATE_BUCKET="aztec-token-sale-terraform-state" + + if aws s3 cp "s3://${STATE_BUCKET}/${STATE_KEY}" /tmp/deploy-state.json 2>/dev/null; then + echo "Found existing deployment state" + cat /tmp/deploy-state.json + + LIVE_COLOR=$(jq -r '.live_color' /tmp/deploy-state.json) + PENDING=$(jq -r '.pending_switchover' /tmp/deploy-state.json) + + # Determine slave color + if [ "$LIVE_COLOR" = "red" ]; then + SLAVE_COLOR="green" + else + SLAVE_COLOR="red" + fi + + echo "live_color=$LIVE_COLOR" >> $GITHUB_OUTPUT + echo "slave_color=$SLAVE_COLOR" >> $GITHUB_OUTPUT + echo "has_pending=$([ "$PENDING" != "null" ] && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT + else + echo "::error::No deployment state found. Run scripts/init-deployment-state.sh first." + exit 1 + fi + + - name: Check for pending switchover + if: steps.state.outputs.has_pending == 'true' && inputs.force == false + run: | + echo "::error::A switchover is already pending. Use force=true to override." + echo "Current state:" + cat /tmp/deploy-state.json + exit 1 + + - name: Deploy indexer to slave (${{ steps.state.outputs.slave_color }}) + working-directory: atp-indexer + run: | + SLAVE="${{ steps.state.outputs.slave_color }}" + echo "Deploying to ${{ inputs.environment }} ($SLAVE)" + + if [ "${{ inputs.environment }}" = "testnet" ]; then + if [ "$SLAVE" = "green" ]; then + ./bootstrap.sh deploy-testnet-green + else + ./bootstrap.sh deploy-testnet + fi + elif [ "${{ inputs.environment }}" = "prod" ]; then + if [ "$SLAVE" = "green" ]; then + ./bootstrap.sh deploy-prod-green + else + ./bootstrap.sh deploy-prod + fi + fi + + - name: Update deployment state with pending switchover + if: inputs.dry_run == false + run: | + SLAVE="${{ steps.state.outputs.slave_color }}" + NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + + # Update state with pending switchover + jq --arg target "$SLAVE" \ + --arg started "$NOW" \ + --arg sha "${{ github.sha }}" \ + '.pending_switchover = { target_color: $target, started_at: $started, commit_sha: $sha }' \ + /tmp/deploy-state.json > /tmp/deploy-state-updated.json + + echo "Updated deployment state:" + cat /tmp/deploy-state-updated.json + + aws s3 cp /tmp/deploy-state-updated.json \ + "s3://aztec-token-sale-terraform-state/deployment-state/${{ inputs.environment }}.json" \ + --content-type "application/json" + + echo "### Deployment Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "- **Environment:** ${{ inputs.environment }}" >> $GITHUB_STEP_SUMMARY + echo "- **Deployed to:** $SLAVE (slave)" >> $GITHUB_STEP_SUMMARY + echo "- **Live:** ${{ steps.state.outputs.live_color }}" >> $GITHUB_STEP_SUMMARY + echo "- **Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "The check-indexer-sync cron will monitor and switch over once indexing completes." >> $GITHUB_STEP_SUMMARY diff --git a/atp-indexer/src/api/handlers/sync-status.ts b/atp-indexer/src/api/handlers/sync-status.ts new file mode 100644 index 000000000..ae072ab72 --- /dev/null +++ b/atp-indexer/src/api/handlers/sync-status.ts @@ -0,0 +1,82 @@ +import type { Context } from 'hono'; +import { db } from 'ponder:api'; +import { max, count } from 'drizzle-orm'; +import { deposit, provider, atpPosition } from 'ponder:schema'; +import { getPublicClient } from '../../utils/viem-client'; + +interface SyncStatusResponse { + synced: boolean; + indexedBlock: number; + chainHead: number; + behindBlocks: number; + hasData: boolean; + timestamp: string; +} + +const SYNC_THRESHOLD_BLOCKS = 50; + +/** + * Handle GET /api/sync-status + * Returns the indexer's sync status by comparing the latest indexed block to the chain head. + * Used by the blue-green deployment cron to determine when a slave indexer has caught up. + */ +export async function handleSyncStatus(c: Context): Promise { + try { + const client = getPublicClient(); + + const [ + chainHeadBlock, + depositMaxBlock, + providerMaxBlock, + atpMaxBlock, + providerCountResult, + ] = await Promise.all([ + client.getBlockNumber(), + db.select({ maxBlock: max(deposit.blockNumber) }).from(deposit), + db.select({ maxBlock: max(provider.blockNumber) }).from(provider), + db.select({ maxBlock: max(atpPosition.blockNumber) }).from(atpPosition), + db.select({ count: count() }).from(provider), + ]); + + const chainHead = Number(chainHeadBlock); + + // Take the highest block number across all tables + const maxBlocks = [ + depositMaxBlock[0]?.maxBlock, + providerMaxBlock[0]?.maxBlock, + atpMaxBlock[0]?.maxBlock, + ] + .filter((b): b is bigint => b !== null && b !== undefined) + .map(Number); + + const indexedBlock = maxBlocks.length > 0 ? Math.max(...maxBlocks) : 0; + const hasData = Number(providerCountResult[0].count) > 0; + const behindBlocks = chainHead - indexedBlock; + const synced = behindBlocks < SYNC_THRESHOLD_BLOCKS && hasData; + + const response: SyncStatusResponse = { + synced, + indexedBlock, + chainHead, + behindBlocks, + hasData, + timestamp: new Date().toISOString(), + }; + + return c.json(response); + } catch (error) { + console.error('Sync status check failed:', error); + return c.json( + { + synced: false, + indexedBlock: 0, + chainHead: 0, + behindBlocks: -1, + hasData: false, + timestamp: new Date().toISOString(), + error: 'Failed to check sync status', + }, + 500 + ); + } +} diff --git a/atp-indexer/src/api/index.ts b/atp-indexer/src/api/index.ts index 40a61972f..8e5dd11dc 100644 --- a/atp-indexer/src/api/index.ts +++ b/atp-indexer/src/api/index.ts @@ -6,6 +6,7 @@ import { healthRoutes } from './routes/health.routes'; import { providerRoutes } from './routes/provider.routes'; import { stakingRoutes } from './routes/staking.routes'; import { atpRoutes } from './routes/atp.routes'; +import { syncStatusRoutes } from './routes/sync-status.routes'; import { config } from '../config'; /** @@ -32,6 +33,7 @@ app.route('/api/health', healthRoutes); app.route('/api/providers', providerRoutes); app.route('/api/staking', stakingRoutes); app.route('/api/atp', atpRoutes); +app.route('/api/sync-status', syncStatusRoutes); app.notFound((c) => { return c.json({ error: 'Not found' }, 404); diff --git a/atp-indexer/src/api/routes/sync-status.routes.ts b/atp-indexer/src/api/routes/sync-status.routes.ts new file mode 100644 index 000000000..c1b461fff --- /dev/null +++ b/atp-indexer/src/api/routes/sync-status.routes.ts @@ -0,0 +1,11 @@ +import { Hono } from 'hono'; +import { handleSyncStatus } from '../handlers/sync-status'; +import { healthCheckLimiter } from '../middleware/rate-limit'; + +export const syncStatusRoutes = new Hono(); + +/** + * GET /api/sync-status + * Returns indexer sync status for blue-green deployment automation + */ +syncStatusRoutes.get('/', healthCheckLimiter, handleSyncStatus); diff --git a/scripts/init-deployment-state.sh b/scripts/init-deployment-state.sh new file mode 100755 index 000000000..b8da331de --- /dev/null +++ b/scripts/init-deployment-state.sh @@ -0,0 +1,114 @@ +#!/bin/bash +set -eu + +# Initialize the S3 deployment state files for blue-green indexer deployments. +# Run this once per environment before using the deploy-indexer-bluegreen workflow. +# +# Prerequisites: +# - AWS CLI configured with access to the terraform state bucket +# - Frontend staking-dashboard Terraform already applied +# - Both red and green indexer deployments exist +# +# Usage: +# ./scripts/init-deployment-state.sh +# +# Examples: +# ./scripts/init-deployment-state.sh testnet red +# ./scripts/init-deployment-state.sh prod red + +ROOT=$(git rev-parse --show-toplevel) +source "$ROOT/scripts/logging.sh" + +ENVIRONMENT=${1:-""} +LIVE_COLOR=${2:-""} +STATE_BUCKET="aztec-token-sale-terraform-state" + +if [ -z "$ENVIRONMENT" ] || [ -z "$LIVE_COLOR" ]; then + echo "Usage: $0 " + echo "" + echo " environment: testnet or prod" + echo " live_color: red or green (which color is currently serving traffic)" + echo "" + echo "Examples:" + echo " $0 testnet red" + echo " $0 prod red" + exit 1 +fi + +if [ "$ENVIRONMENT" != "testnet" ] && [ "$ENVIRONMENT" != "prod" ]; then + echo "Error: Environment must be 'testnet' or 'prod'" + exit 1 +fi + +if [ "$LIVE_COLOR" != "red" ] && [ "$LIVE_COLOR" != "green" ]; then + echo "Error: Live color must be 'red' or 'green'" + exit 1 +fi + +log_step "Initializing deployment state for $ENVIRONMENT (live: $LIVE_COLOR)" + +# Get CloudFront domain names from indexer terraform states +log_step "Reading red indexer CloudFront domain from terraform state..." +RED_STATE_KEY="${ENVIRONMENT}/backends/atp-indexer/terraform.tfstate" +RED_CF_DOMAIN=$(aws s3 cp "s3://${STATE_BUCKET}/${RED_STATE_KEY}" - | \ + jq -r '.outputs.cf_domain_name.value // empty') + +if [ -z "$RED_CF_DOMAIN" ]; then + echo "Error: Could not read red indexer CloudFront domain from state" + echo "Make sure the red indexer has been deployed for $ENVIRONMENT" + exit 1 +fi +echo " Red CF domain: $RED_CF_DOMAIN" + +log_step "Reading green indexer CloudFront domain from terraform state..." +GREEN_STATE_KEY="${ENVIRONMENT}-green/backends/atp-indexer/terraform.tfstate" +GREEN_CF_DOMAIN=$(aws s3 cp "s3://${STATE_BUCKET}/${GREEN_STATE_KEY}" - | \ + jq -r '.outputs.cf_domain_name.value // empty') + +if [ -z "$GREEN_CF_DOMAIN" ]; then + echo "Error: Could not read green indexer CloudFront domain from state" + echo "Make sure the green indexer has been deployed for $ENVIRONMENT" + exit 1 +fi +echo " Green CF domain: $GREEN_CF_DOMAIN" + +# Get frontend CloudFront distribution ID +log_step "Reading frontend CloudFront distribution ID from terraform state..." +FRONTEND_STATE_KEY="${ENVIRONMENT}-aztec-staking-dashboard/terraform.tfstate" +FRONTEND_DIST_ID=$(aws s3 cp "s3://${STATE_BUCKET}/${FRONTEND_STATE_KEY}" - | \ + jq -r '.outputs.staking_dashboard_distribution_id.value // empty') + +if [ -z "$FRONTEND_DIST_ID" ]; then + echo "Error: Could not read frontend distribution ID from state." + echo "Make sure the staking-dashboard Terraform has been applied for $ENVIRONMENT" + exit 1 +fi +echo " Frontend distribution ID: $FRONTEND_DIST_ID" + +# Create state file +STATE_FILE="/tmp/deploy-state-${ENVIRONMENT}.json" +cat > "$STATE_FILE" << EOF +{ + "live_color": "$LIVE_COLOR", + "frontend_distribution_id": "$FRONTEND_DIST_ID", + "colors": { + "red": { "cf_domain": "$RED_CF_DOMAIN" }, + "green": { "cf_domain": "$GREEN_CF_DOMAIN" } + }, + "pending_switchover": null +} +EOF + +echo "" +log_step "State file contents:" +cat "$STATE_FILE" +echo "" + +# Upload to S3 +STATE_KEY="deployment-state/${ENVIRONMENT}.json" +log_step "Uploading to s3://${STATE_BUCKET}/${STATE_KEY}..." +aws s3 cp "$STATE_FILE" "s3://${STATE_BUCKET}/${STATE_KEY}" --content-type "application/json" + +log_success "Deployment state initialized for $ENVIRONMENT (live: $LIVE_COLOR)" +echo "" +echo "You can now use the 'Deploy Indexer (Blue-Green)' workflow in GitHub Actions." diff --git a/staking-dashboard/bootstrap.sh b/staking-dashboard/bootstrap.sh index bf300d340..fb99e0c08 100755 --- a/staking-dashboard/bootstrap.sh +++ b/staking-dashboard/bootstrap.sh @@ -93,23 +93,20 @@ function update_env_file() { if [ -z "${VITE_API_HOST:-}" ]; then log_step "Updating VITE_API_HOST" - if [ "$environment" = "staging" ]; then + if [ "$environment" = "prod" ]; then + # Same-domain API — /api/* is routed to the live indexer by CloudFront. + # No need to distinguish red/green; the blue-green cron handles origin switching. + VITE_API_HOST="https://stake.aztec.network" + elif [ "$environment" = "testnet" ]; then + # Same-domain API for testnet + VITE_API_HOST="https://testnet.stake.aztec.network" + elif [ "$environment" = "staging" ]; then if [ "$green" = "green" ]; then - # staging green deployment VITE_API_HOST="https://d1lzkj24db7400.cloudfront.net" else - # staging red deployment VITE_API_HOST="https://d24imfdgeak2db.cloudfront.net" fi - elif [ "$environment" = "prod" ]; then - if [ "$green" = "green" ]; then - # prod green deployment - VITE_API_HOST="https://dgk9duhuxabbq.cloudfront.net" - else - # prod red deployment - VITE_API_HOST="https://d10cun7h2qqnvc.cloudfront.net" - fi - else + else VITE_API_HOST="http://localhost:42068" fi fi diff --git a/staking-dashboard/terraform/data.tf b/staking-dashboard/terraform/data.tf index 0e18ed5b5..e99b5dfc7 100644 --- a/staking-dashboard/terraform/data.tf +++ b/staking-dashboard/terraform/data.tf @@ -20,6 +20,7 @@ data "terraform_remote_state" "shared" { # Local references to backend service URLs locals { atp_indexer_url = "https://${data.terraform_remote_state.atp-indexer.outputs.cf_domain_name}" + atp_indexer_cf_domain = data.terraform_remote_state.atp-indexer.outputs.cf_domain_name cloudfront_logs_bucket = try(data.terraform_remote_state.shared.outputs.cloudfront_logs_bucket_domain_name, "") } diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 7bfab8206..183a08967 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -180,20 +180,107 @@ resource "aws_cloudfront_function" "basic_auth_staking_dashboard" { }) } +# CloudFront function for SPA routing — rewrites non-file URIs to /index.html. +# This replaces the 404 custom_error_response so that API 404s pass through correctly +# (custom_error_response is distribution-wide and would swallow API errors). +resource "aws_cloudfront_function" "spa_routing" { + name = "${var.env}-aztec-staking-dashboard-spa-routing" + runtime = "cloudfront-js-2.0" + comment = "SPA routing: rewrite non-file paths to /index.html" + + code = <<-EOF + function handler(event) { + var request = event.request; + var uri = request.uri; + + // If the URI has a file extension (e.g. .js, .css, .png), serve it as-is. + // Otherwise rewrite to /index.html for SPA client-side routing. + if (!uri.includes('.')) { + request.uri = '/index.html'; + } + + return request; + } + EOF +} + +# CORS response headers policy for the /api/* behavior +resource "aws_cloudfront_response_headers_policy" "api_cors" { + name = "${var.env}-staking-dashboard-api-cors" + + cors_config { + access_control_allow_credentials = false + + access_control_allow_headers { + items = ["Content-Type", "Origin", "Accept", "X-Requested-With"] + } + + access_control_allow_methods { + items = ["GET", "OPTIONS", "HEAD"] + } + + access_control_allow_origins { + items = ["*"] + } + + access_control_expose_headers { + items = ["Content-Type"] + } + + access_control_max_age_sec = 86400 + origin_override = true + } +} + resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { enabled = true default_root_object = "index.html" web_acl_id = module.website_waf.web_acl_arn - + # Use custom domain with certificate aliases = var.env == "prod" ? ["stake.aztec.network"] : var.env == "testnet" ? ["testnet.stake.aztec.network"] : [] + # Origin 1: S3 bucket for static frontend assets origin { domain_name = aws_s3_bucket.staking_dashboard_bucket.bucket_regional_domain_name origin_id = "stakingDashboardS3Origin" origin_access_control_id = aws_cloudfront_origin_access_control.oac-staking-dashboard.id } + # Origin 2: Live indexer CloudFront (proxied for /api/* requests). + # The blue-green cron workflow updates this origin's domain via AWS CLI + # when switching between red/green indexers. + origin { + domain_name = local.atp_indexer_cf_domain + origin_id = "indexerOrigin" + + custom_origin_config { + http_port = 80 + https_port = 443 + origin_protocol_policy = "https-only" + origin_ssl_protocols = ["TLSv1.2"] + } + } + + # /api/* requests → indexer origin + ordered_cache_behavior { + path_pattern = "/api/*" + allowed_methods = ["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"] + cached_methods = ["GET", "HEAD", "OPTIONS"] + target_origin_id = "indexerOrigin" + + viewer_protocol_policy = "redirect-to-https" + + # CachingDisabled — the per-color indexer CloudFront handles caching + cache_policy_id = "4135ea2d-6df8-44a3-9df3-4b5a84be39ad" + + # AllViewer — forward all headers to origin + origin_request_policy_id = "216adef6-5c7f-47e4-b989-5492eafa07d3" + + response_headers_policy_id = aws_cloudfront_response_headers_policy.api_cors.id + } + + # Default: S3 static frontend assets default_cache_behavior { allowed_methods = ["GET", "HEAD", "OPTIONS"] cached_methods = ["GET", "HEAD"] @@ -208,10 +295,17 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { forward = "none" } } - + + # SPA routing: rewrite non-file paths to /index.html so client-side + # routing works on page refresh. This replaces the old 404 custom_error_response + # which was distribution-wide and would have swallowed API 404s. + function_association { + event_type = "viewer-request" + function_arn = aws_cloudfront_function.spa_routing.arn + } } - # Redirect to blocked.html for 403 errors + # Redirect to blocked.html for 403 errors (geo-blocking) custom_error_response { error_code = 403 response_code = 403 @@ -219,14 +313,9 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { error_caching_min_ttl = 0 } - # Redirect to index.html for 404 errors - # This is to handle the case where the user is on a route and refreshes the page - custom_error_response { - error_code = 404 - response_code = 200 - response_page_path = "/index.html" - error_caching_min_ttl = 0 - } + # NOTE: The 404 custom_error_response was removed because it's distribution-wide + # and would intercept API 404s (returning index.html instead of JSON errors). + # SPA routing is now handled by the spa_routing CloudFront Function above. restrictions { geo_restriction { @@ -250,6 +339,13 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { prefix = "frontend/staking-dashboard/" } } + + # The indexer origin domain is updated by the blue-green cron via AWS CLI. + # Ignore origin changes so Terraform doesn't revert the switchover. + # The S3 origin never changes so this is safe. + lifecycle { + ignore_changes = [origin] + } } # From 63f7e8c2de8db3a1fe214f7ad8bdac7aa8c10f2d Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 12:09:33 +0200 Subject: [PATCH 02/25] =?UTF-8?q?=F0=9F=92=9A=20add=20dev=20&=20staging=20?= =?UTF-8?q?to=20CI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 4 ++- .../workflows/deploy-indexer-bluegreen.yaml | 21 +++++------- .github/workflows/deploy-indexer.yaml | 19 +++-------- atp-indexer/bootstrap.sh | 33 ++++++++++++++++--- scripts/init-deployment-state.sh | 10 ++++-- 5 files changed, 52 insertions(+), 35 deletions(-) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 50b85f897..6e2d87885 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -18,6 +18,8 @@ on: type: choice options: - "" + - dev + - staging - testnet - prod @@ -31,7 +33,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - environment: [testnet, prod] + environment: [dev, staging, testnet, prod] # For manual runs targeting a specific env, skip others if: >- github.event_name == 'schedule' || diff --git a/.github/workflows/deploy-indexer-bluegreen.yaml b/.github/workflows/deploy-indexer-bluegreen.yaml index c73180954..54f21c725 100644 --- a/.github/workflows/deploy-indexer-bluegreen.yaml +++ b/.github/workflows/deploy-indexer-bluegreen.yaml @@ -15,6 +15,8 @@ on: default: "testnet" type: choice options: + - dev + - staging - testnet - prod dry_run: @@ -130,20 +132,13 @@ jobs: working-directory: atp-indexer run: | SLAVE="${{ steps.state.outputs.slave_color }}" - echo "Deploying to ${{ inputs.environment }} ($SLAVE)" + ENV="${{ inputs.environment }}" + echo "Deploying to ${ENV} ($SLAVE)" - if [ "${{ inputs.environment }}" = "testnet" ]; then - if [ "$SLAVE" = "green" ]; then - ./bootstrap.sh deploy-testnet-green - else - ./bootstrap.sh deploy-testnet - fi - elif [ "${{ inputs.environment }}" = "prod" ]; then - if [ "$SLAVE" = "green" ]; then - ./bootstrap.sh deploy-prod-green - else - ./bootstrap.sh deploy-prod - fi + if [ "$SLAVE" = "green" ]; then + ./bootstrap.sh "deploy-${ENV}-green" + else + ./bootstrap.sh "deploy-${ENV}" fi - name: Update deployment state with pending switchover diff --git a/.github/workflows/deploy-indexer.yaml b/.github/workflows/deploy-indexer.yaml index 0ab6d1dac..f492c559d 100644 --- a/.github/workflows/deploy-indexer.yaml +++ b/.github/workflows/deploy-indexer.yaml @@ -9,6 +9,8 @@ on: default: "testnet" type: choice options: + - dev + - staging - testnet - prod dry_run: @@ -94,19 +96,8 @@ jobs: - name: Deploy working-directory: atp-indexer run: | - if [ "$ENV" = "testnet" ]; then - if [ "$GREEN" = "true" ]; then - ./bootstrap.sh deploy-testnet-green - else - ./bootstrap.sh deploy-testnet - fi - elif [ "$ENV" = "prod" ]; then - if [ "$GREEN" = "true" ]; then - ./bootstrap.sh deploy-prod-green - else - ./bootstrap.sh deploy-prod - fi + if [ "$GREEN" = "true" ]; then + ./bootstrap.sh "deploy-${ENV}-green" else - echo "Unknown environment: $ENV" - exit 1 + ./bootstrap.sh "deploy-${ENV}" fi diff --git a/atp-indexer/bootstrap.sh b/atp-indexer/bootstrap.sh index 5d906d5fa..cb93a5b03 100755 --- a/atp-indexer/bootstrap.sh +++ b/atp-indexer/bootstrap.sh @@ -317,9 +317,16 @@ function deploy() { CHAIN_ID=11155111 chain_environment="sepolia_testnet" infra_parent_environment="dev" - fi - - if [ "$infra_environment" = "prod" ]; then + elif [ "$infra_environment" = "dev" ] || [ "$infra_environment" = "staging" ]; then + if [ -z "${RPC_URL:-}" ]; then + echo "Error: RPC_URL must be set" + exit 1 + fi + RPC_URL=$RPC_URL + CHAIN_ID=1 + chain_environment="prod" + infra_parent_environment="dev" + elif [ "$infra_environment" = "prod" ]; then if [ -z "${RPC_URL:-}" ]; then echo "Error: RPC_URL must be set" exit 1 @@ -421,6 +428,18 @@ case $ACTION in build) build ;; + deploy-dev) + deploy "dev" + ;; + deploy-dev-green) + deploy "dev" "-g" + ;; + deploy-staging) + deploy "staging" + ;; + deploy-staging-green) + deploy "staging" "-g" + ;; deploy-testnet) deploy "testnet" ;; @@ -439,8 +458,14 @@ case $ACTION in echo "Actions:" echo " dev Start development server" echo " build Install deps, generate providers & types" + echo " deploy-dev Deploy to dev" + echo " deploy-dev-green Deploy to dev (green)" + echo " deploy-staging Deploy to staging" + echo " deploy-staging-green Deploy to staging (green)" echo " deploy-testnet Deploy to testnet" - echo " deploy-prod Deploy to prod" + echo " deploy-testnet-green Deploy to testnet (green)" + echo " deploy-prod Deploy to prod" + echo " deploy-prod-green Deploy to prod (green)" echo " help Show this help" echo "" echo "Environments:" diff --git a/scripts/init-deployment-state.sh b/scripts/init-deployment-state.sh index b8da331de..2a00947f7 100755 --- a/scripts/init-deployment-state.sh +++ b/scripts/init-deployment-state.sh @@ -13,6 +13,8 @@ set -eu # ./scripts/init-deployment-state.sh # # Examples: +# ./scripts/init-deployment-state.sh dev red +# ./scripts/init-deployment-state.sh staging red # ./scripts/init-deployment-state.sh testnet red # ./scripts/init-deployment-state.sh prod red @@ -26,17 +28,19 @@ STATE_BUCKET="aztec-token-sale-terraform-state" if [ -z "$ENVIRONMENT" ] || [ -z "$LIVE_COLOR" ]; then echo "Usage: $0 " echo "" - echo " environment: testnet or prod" + echo " environment: dev, staging, testnet, or prod" echo " live_color: red or green (which color is currently serving traffic)" echo "" echo "Examples:" + echo " $0 dev red" + echo " $0 staging red" echo " $0 testnet red" echo " $0 prod red" exit 1 fi -if [ "$ENVIRONMENT" != "testnet" ] && [ "$ENVIRONMENT" != "prod" ]; then - echo "Error: Environment must be 'testnet' or 'prod'" +if [ "$ENVIRONMENT" != "dev" ] && [ "$ENVIRONMENT" != "staging" ] && [ "$ENVIRONMENT" != "testnet" ] && [ "$ENVIRONMENT" != "prod" ]; then + echo "Error: Environment must be 'dev', 'staging', 'testnet', or 'prod'" exit 1 fi From 0647aa400eb488ad3201fb62d993d07d2307c803 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 12:14:52 +0200 Subject: [PATCH 03/25] =?UTF-8?q?=F0=9F=92=9A=20fix=20if=20condition?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 6e2d87885..2963a4cba 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -34,20 +34,32 @@ jobs: strategy: matrix: environment: [dev, staging, testnet, prod] - # For manual runs targeting a specific env, skip others - if: >- - github.event_name == 'schedule' || - inputs.environment == '' || - inputs.environment == matrix.environment environment: ${{ matrix.environment }} env: STATE_BUCKET: aztec-token-sale-terraform-state AWS_REGION: ${{ secrets.AWS_DEFAULT_REGION }} steps: + # For manual runs targeting a specific env, skip others + - name: Check if this environment is targeted + id: should-run + run: | + EVENT="${{ github.event_name }}" + TARGET="${{ inputs.environment }}" + CURRENT="${{ matrix.environment }}" + + if [ "$EVENT" = "schedule" ] || [ -z "$TARGET" ] || [ "$TARGET" = "$CURRENT" ]; then + echo "run=true" >> $GITHUB_OUTPUT + else + echo "Skipping $CURRENT (targeted: $TARGET)" + echo "run=false" >> $GITHUB_OUTPUT + fi + - uses: actions/checkout@v4 + if: steps.should-run.outputs.run == 'true' - name: Configure AWS credentials with GitHub OIDC + if: steps.should-run.outputs.run == 'true' uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ vars.AWS_OIDC_ROLE_ARN }} @@ -55,6 +67,7 @@ jobs: aws-region: ${{ secrets.AWS_DEFAULT_REGION }} - name: Read deployment state + if: steps.should-run.outputs.run == 'true' id: state run: | STATE_KEY="deployment-state/${{ matrix.environment }}.json" From 27a7df861e34180c76d215cdb496448886e7e882 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 13:39:27 +0200 Subject: [PATCH 04/25] =?UTF-8?q?=F0=9F=93=9D=20=20add=20deployment=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 6 +- .../workflows/deploy-indexer-bluegreen.yaml | 30 +-- DEPLOYMENT.md | 192 ++++++++++++++++++ atp-indexer/src/api/handlers/sync-status.ts | 2 +- 4 files changed, 211 insertions(+), 19 deletions(-) create mode 100644 DEPLOYMENT.md diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 2963a4cba..57b2f7c46 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -2,7 +2,7 @@ name: Check Indexer Sync & Switchover # Cron workflow that checks if a pending blue-green switchover is ready. # Runs every 30 minutes. When no switchover is pending, exits in <10s. -# When a pending switchover's slave indexer reports synced: +# When a pending switchover's backup indexer reports synced: # 1. Updates the frontend CloudFront's indexer origin to point to the new live backend # 2. Updates the S3 deployment state # 3. Triggers a deploy of the old live backend (so both end up updated) @@ -125,7 +125,7 @@ jobs: echo "timed_out=false" >> $GITHUB_OUTPUT fi - - name: Check slave sync status + - name: Check backup sync status if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' id: sync run: | @@ -207,7 +207,7 @@ jobs: "s3://${STATE_BUCKET}/deployment-state/${{ matrix.environment }}.json" \ --content-type "application/json" - - name: Trigger deploy to old live (now slave) + - name: Trigger deploy to old live (now backup) if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' uses: actions/github-script@v7 with: diff --git a/.github/workflows/deploy-indexer-bluegreen.yaml b/.github/workflows/deploy-indexer-bluegreen.yaml index 54f21c725..b657671ec 100644 --- a/.github/workflows/deploy-indexer-bluegreen.yaml +++ b/.github/workflows/deploy-indexer-bluegreen.yaml @@ -1,10 +1,10 @@ name: Deploy Indexer (Blue-Green) # Automated blue-green deployment for the ATP indexer. -# Reads deployment state from S3 to determine which color is the slave, -# deploys the indexer to the slave, and marks a switchover as pending. +# Reads deployment state from S3 to determine which color is the backup, +# deploys the indexer to the backup, and marks a switchover as pending. # The check-indexer-sync.yaml cron workflow handles the actual switchover -# once the slave finishes re-indexing. +# once the backup finishes re-indexing. on: workflow_dispatch: @@ -36,7 +36,7 @@ permissions: actions: read jobs: - deploy-to-slave: + deploy-to-backup: runs-on: ubuntu-latest environment: ${{ inputs.environment }} env: @@ -105,15 +105,15 @@ jobs: LIVE_COLOR=$(jq -r '.live_color' /tmp/deploy-state.json) PENDING=$(jq -r '.pending_switchover' /tmp/deploy-state.json) - # Determine slave color + # Determine backup color if [ "$LIVE_COLOR" = "red" ]; then - SLAVE_COLOR="green" + BACKUP_COLOR="green" else - SLAVE_COLOR="red" + BACKUP_COLOR="red" fi echo "live_color=$LIVE_COLOR" >> $GITHUB_OUTPUT - echo "slave_color=$SLAVE_COLOR" >> $GITHUB_OUTPUT + echo "backup_color=$BACKUP_COLOR" >> $GITHUB_OUTPUT echo "has_pending=$([ "$PENDING" != "null" ] && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT else echo "::error::No deployment state found. Run scripts/init-deployment-state.sh first." @@ -128,14 +128,14 @@ jobs: cat /tmp/deploy-state.json exit 1 - - name: Deploy indexer to slave (${{ steps.state.outputs.slave_color }}) + - name: Deploy indexer to backup (${{ steps.state.outputs.backup_color }}) working-directory: atp-indexer run: | - SLAVE="${{ steps.state.outputs.slave_color }}" + BACKUP="${{ steps.state.outputs.backup_color }}" ENV="${{ inputs.environment }}" - echo "Deploying to ${ENV} ($SLAVE)" + echo "Deploying to ${ENV} ($BACKUP)" - if [ "$SLAVE" = "green" ]; then + if [ "$BACKUP" = "green" ]; then ./bootstrap.sh "deploy-${ENV}-green" else ./bootstrap.sh "deploy-${ENV}" @@ -144,11 +144,11 @@ jobs: - name: Update deployment state with pending switchover if: inputs.dry_run == false run: | - SLAVE="${{ steps.state.outputs.slave_color }}" + BACKUP="${{ steps.state.outputs.backup_color }}" NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # Update state with pending switchover - jq --arg target "$SLAVE" \ + jq --arg target "$BACKUP" \ --arg started "$NOW" \ --arg sha "${{ github.sha }}" \ '.pending_switchover = { target_color: $target, started_at: $started, commit_sha: $sha }' \ @@ -164,7 +164,7 @@ jobs: echo "### Deployment Summary" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "- **Environment:** ${{ inputs.environment }}" >> $GITHUB_STEP_SUMMARY - echo "- **Deployed to:** $SLAVE (slave)" >> $GITHUB_STEP_SUMMARY + echo "- **Deployed to:** $BACKUP (backup)" >> $GITHUB_STEP_SUMMARY echo "- **Live:** ${{ steps.state.outputs.live_color }}" >> $GITHUB_STEP_SUMMARY echo "- **Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 000000000..316edcced --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,192 @@ +# Deployment + +This repo contains two deployable components: + +1. **Staking Dashboard** (`staking-dashboard/`) — React frontend served from S3 via CloudFront +2. **ATP Indexer** (`atp-indexer/`) — Ponder blockchain indexer running on ECS Fargate + +Both are deployed per-environment (`dev`, `staging`, `testnet`, `prod`). The indexer has two instances per environment — **red** and **green** — to enable zero-downtime deployments. + +## Architecture + +``` + stake.aztec.network + │ + ┌──────┴──────┐ + │ CloudFront │ + │ Distribution│ + └──┬───────┬──┘ + │ │ + /static │ │ /api/* + │ │ + ┌──────┴──┐ ┌─┴─────────────┐ + │ S3 │ │ indexerOrigin │ ← points to live color + │ Bucket │ │ (CF domain) │ + └─────────┘ └──────┬────────┘ + │ + ┌────────────┴────────────┐ + │ │ + ┌──────┴──────┐ ┌───────┴─────┐ + │ Red CF │ │ Green CF │ + └──────┬──────┘ └──────┬──────┘ + ┌──────┴──────┐ ┌──────┴──────┐ + │ Red ALB │ │ Green ALB │ + └──────┬──────┘ └──────┴──────┘ + │ │ + Red ECS Green ECS + (indexer+server) (indexer+server) +``` + +The frontend CloudFront distribution has two origins: +- **S3** for static assets (default behavior) +- **indexerOrigin** for `/api/*` requests, pointing to whichever indexer color is live + +This means the frontend always uses its own domain for API calls (`/api/*`). Indexer switchovers only update the CloudFront origin — no frontend redeploy needed. + +## Environments + +| Environment | Chain | AWS Cluster | Domain | Branch restriction | +|-------------|----------|-------------|---------------------------------|--------------------| +| `dev` | Mainnet | dev | — | None (any PR) | +| `staging` | Mainnet | dev | — | None | +| `testnet` | Sepolia | dev | `testnet.stake.aztec.network` | None | +| `prod` | Mainnet | prod | `stake.aztec.network` | `main` only | + +Each environment requires a matching [GitHub environment](https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment) with the relevant secrets and variables (AWS credentials, RPC URL, contract addresses, etc.). + +To allow deploying `dev` from any PR branch, set its GitHub environment's **Deployment branches** to "All branches". + +## Deploying the Frontend + +**Workflow:** `Deploy Staking Dashboard` (`deploy-staking-dashboard.yaml`) + +Trigger manually from the Actions tab or push a tag: +``` +v1.0.0-testnet-dashboard +v1.0.0-prod-dashboard +``` + +This builds the React app, uploads to S3, and invalidates the CloudFront cache. No interaction with red/green — the frontend is a single static deployment. + +## Deploying the Indexer (Blue-Green) + +The indexer uses a two-phase blue-green deployment. When indexer code changes, the new version re-indexes from scratch (~30 minutes). Rather than having a GitHub Actions runner sit idle waiting, the deploy exits immediately and a cron job handles the switchover. + +### Phase 1: Deploy to Backup + +**Workflow:** `Deploy Indexer (Blue-Green)` (`deploy-indexer-bluegreen.yaml`) + +1. Reads deployment state from S3 to determine which color is **live** and which is **backup** +2. Deploys the indexer to the backup (Terraform + Docker + ECS) +3. Writes a `pending_switchover` to the S3 state file +4. Exits (~5–10 min total) + +Trigger manually from the Actions tab: +- **environment**: `dev` / `staging` / `testnet` / `prod` +- **dry_run**: Plan only, don't apply +- **force**: Override an existing pending switchover + +### Phase 2: Automatic Switchover + +**Workflow:** `Check Indexer Sync & Switchover` (`check-indexer-sync.yaml`) + +Runs on a cron every 30 minutes. For each environment with a `pending_switchover`: + +1. Hits `GET /api/sync-status` on the backup's CloudFront domain +2. If not synced yet → exits, retries next cron run +3. If synced → performs the switchover: + - Updates the frontend CloudFront's `indexerOrigin` to point to the new live color (via AWS CLI) + - Invalidates `/api/*` cache + - Updates the S3 deployment state (`live_color` = new color, `pending_switchover` = null) + - Triggers `Deploy ATP Indexer` for the old live (so both colors end up on the latest code) +4. If timed out (>2 hours) → clears the pending switchover and logs an error + +Can also be triggered manually to check a specific environment immediately. + +### Sync Status Endpoint + +`GET /api/sync-status` returns: + +```json +{ + "synced": true, + "indexedBlock": 21345678, + "chainHead": 21345680, + "behindBlocks": 2, + "hasData": true, + "timestamp": "2024-02-19T12:00:00Z" +} +``` + +The indexer is considered synced when `behindBlocks < 50` and `hasData` is true (at least one provider exists in the database). + +### Deploying to a Single Color (Manual) + +**Workflow:** `Deploy ATP Indexer` (`deploy-indexer.yaml`) + +Deploys to a specific color without blue-green orchestration. Used by the cron to update the old live, or for manual overrides: +- Set **green** = true to deploy the green instance, false for red + +Can also be triggered via tags: +``` +v1.0.0-testnet-indexer +v1.0.0-testnet-indexer-green +v1.0.0-prod-indexer +v1.0.0-prod-indexer-green +``` + +## S3 Deployment State + +Path: `s3://aztec-token-sale-terraform-state/deployment-state/{env}.json` + +```json +{ + "live_color": "red", + "frontend_distribution_id": "E1234567890", + "colors": { + "red": { "cf_domain": "d10cun7h2qqnvc.cloudfront.net" }, + "green": { "cf_domain": "dgk9duhuxabbq.cloudfront.net" } + }, + "pending_switchover": null +} +``` + +When a switchover is pending: +```json +{ + "pending_switchover": { + "target_color": "green", + "started_at": "2024-02-19T12:00:00Z", + "commit_sha": "abc123" + } +} +``` + +## Initial Setup + +Before using the blue-green workflow for an environment, run the init script once: + +```bash +./scripts/init-deployment-state.sh +``` + +This reads the CloudFront domains from Terraform state and creates the S3 deployment state file. Prerequisites: +- AWS CLI configured with access to the state bucket +- Both red and green indexer Terraform applied +- Frontend staking-dashboard Terraform applied + +## Terraform + +The frontend CloudFront distribution's `indexerOrigin` is managed with `lifecycle { ignore_changes = [origin] }` so that Terraform doesn't revert origin changes made by the blue-green cron via AWS CLI. + +SPA routing is handled by a CloudFront Function (`spa_routing`) on the default behavior's viewer-request event instead of a 404 `custom_error_response`, because `custom_error_response` is distribution-wide and would intercept API 404s. + +## Troubleshooting + +**Switchover stuck / timed out:** The cron clears pending switchovers after 2 hours. Check the backup's `/api/sync-status` endpoint directly. If the indexer is erroring, check ECS logs. + +**Switchover never triggers:** Verify the S3 state file has a `pending_switchover` set. The cron only runs every 30 minutes — trigger `Check Indexer Sync & Switchover` manually for faster feedback. + +**Wrong color is live:** Manually run `Deploy ATP Indexer` targeting the correct color, then update the S3 state file's `live_color` field directly. + +**Terraform wants to revert the origin:** The `lifecycle { ignore_changes = [origin] }` block should prevent this. If it's happening, check that the block is still present in `staking-dashboard/terraform/main.tf`. diff --git a/atp-indexer/src/api/handlers/sync-status.ts b/atp-indexer/src/api/handlers/sync-status.ts index ae072ab72..9921d81f7 100644 --- a/atp-indexer/src/api/handlers/sync-status.ts +++ b/atp-indexer/src/api/handlers/sync-status.ts @@ -18,7 +18,7 @@ const SYNC_THRESHOLD_BLOCKS = 50; /** * Handle GET /api/sync-status * Returns the indexer's sync status by comparing the latest indexed block to the chain head. - * Used by the blue-green deployment cron to determine when a slave indexer has caught up. + * Used by the blue-green deployment cron to determine when a backup indexer has caught up. */ export async function handleSyncStatus(c: Context): Promise { try { From 10c64bc885eedd61dcb779f0d72cc8ad089e55a1 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 14:21:58 +0200 Subject: [PATCH 05/25] =?UTF-8?q?=F0=9F=92=9A=20add=20domains=20for=20dev?= =?UTF-8?q?=20&=20staging?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../workflows/deploy-staking-dashboard.yaml | 36 +++--------- DEPLOYMENT.md | 4 +- staking-dashboard/bootstrap.sh | 55 +++++++++---------- staking-dashboard/terraform/main.tf | 4 +- 4 files changed, 39 insertions(+), 60 deletions(-) diff --git a/.github/workflows/deploy-staking-dashboard.yaml b/.github/workflows/deploy-staking-dashboard.yaml index 538827576..348c056f0 100644 --- a/.github/workflows/deploy-staking-dashboard.yaml +++ b/.github/workflows/deploy-staking-dashboard.yaml @@ -6,9 +6,11 @@ on: environment: description: "Environment to deploy to" required: true - default: "staging" + default: "dev" type: choice options: + - dev + - staging - testnet - prod dry_run: @@ -16,18 +18,13 @@ on: required: false default: false type: boolean - green: - description: "Whether to use the green indexer" - required: false - default: false - type: boolean push: tags: + - 'v*-dev-dashboard' + - 'v*-staging-dashboard' - 'v*-testnet-dashboard' - - 'v*-testnet-dashboard-green' - 'v*-prod-dashboard' - - 'v*-prod-dashboard-green' permissions: id-token: write @@ -37,10 +34,9 @@ permissions: jobs: deploy: runs-on: ubuntu-latest - environment: ${{ inputs.environment || (contains(github.ref, '-prod') && 'prod') || 'staging' }} + environment: ${{ inputs.environment || (contains(github.ref, '-prod') && 'prod') || (contains(github.ref, '-testnet') && 'testnet') || (contains(github.ref, '-staging') && 'staging') || 'dev' }} env: - ENV: ${{ inputs.environment || (contains(github.ref, '-prod') && 'prod') || 'staging' }} - GREEN: ${{ inputs.green || contains(github.ref, '-green') }} + ENV: ${{ inputs.environment || (contains(github.ref, '-prod') && 'prod') || (contains(github.ref, '-testnet') && 'testnet') || (contains(github.ref, '-staging') && 'staging') || 'dev' }} DRY_RUN: ${{ inputs.dry_run }} # AWS Configuration @@ -98,20 +94,4 @@ jobs: - name: Deploy working-directory: staking-dashboard - run: | - if [ "$ENV" = "testnet" ]; then - if [ "$GREEN" = "true" ]; then - ./bootstrap.sh deploy-testnet-green - else - ./bootstrap.sh deploy-testnet - fi - elif [ "$ENV" = "prod" ]; then - if [ "$GREEN" = "true" ]; then - ./bootstrap.sh deploy-prod-green - else - ./bootstrap.sh deploy-prod - fi - else - echo "Unknown environment: $ENV" - exit 1 - fi + run: ./bootstrap.sh "deploy-${ENV}" diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 316edcced..3cea53c42 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -47,8 +47,8 @@ This means the frontend always uses its own domain for API calls (`/api/*`). Ind | Environment | Chain | AWS Cluster | Domain | Branch restriction | |-------------|----------|-------------|---------------------------------|--------------------| -| `dev` | Mainnet | dev | — | None (any PR) | -| `staging` | Mainnet | dev | — | None | +| `dev` | Mainnet | dev | `dev.stake.aztec.network` | None (any PR) | +| `staging` | Mainnet | dev | `staging.stake.aztec.network` | None | | `testnet` | Sepolia | dev | `testnet.stake.aztec.network` | None | | `prod` | Mainnet | prod | `stake.aztec.network` | `main` only | diff --git a/staking-dashboard/bootstrap.sh b/staking-dashboard/bootstrap.sh index fb99e0c08..30d6641d9 100755 --- a/staking-dashboard/bootstrap.sh +++ b/staking-dashboard/bootstrap.sh @@ -97,15 +97,9 @@ function update_env_file() { # Same-domain API — /api/* is routed to the live indexer by CloudFront. # No need to distinguish red/green; the blue-green cron handles origin switching. VITE_API_HOST="https://stake.aztec.network" - elif [ "$environment" = "testnet" ]; then - # Same-domain API for testnet - VITE_API_HOST="https://testnet.stake.aztec.network" - elif [ "$environment" = "staging" ]; then - if [ "$green" = "green" ]; then - VITE_API_HOST="https://d1lzkj24db7400.cloudfront.net" - else - VITE_API_HOST="https://d24imfdgeak2db.cloudfront.net" - fi + elif [ "$environment" = "testnet" ] || [ "$environment" = "staging" ]; then + # Same-domain API for testnet/staging + VITE_API_HOST="https://${environment}.stake.aztec.network" else VITE_API_HOST="http://localhost:42068" fi @@ -377,9 +371,7 @@ function deploy() { CHAIN_ID=11155111 chain_environment="sepolia_testnet" VITE_EXPLORER_URL="https://sepolia.etherscan.io" - fi - - if [ "$environment" = "prod" ]; then + elif [ "$environment" = "dev" ] || [ "$environment" = "staging" ] || [ "$environment" = "prod" ]; then if [ -z "${RPC_URL:-}" ]; then echo "Error: RPC_URL environment variable must be set" exit 1 @@ -412,11 +404,11 @@ function deploy() { export TF_VAR_basic_auth_pass="${BASIC_AUTH_PASSWORD:-}" # Set parent environment for shared infrastructure - # testnet uses dev shared infrastructure - if [ "$environment" = "testnet" ]; then - export TF_VAR_env_parent="dev" + # Only prod uses the prod cluster; dev, staging, and testnet use dev + if [ "$environment" = "prod" ]; then + export TF_VAR_env_parent="prod" else - export TF_VAR_env_parent="$environment" + export TF_VAR_env_parent="dev" fi if [ "${DRY_RUN:-false}" = "true" ]; then @@ -436,19 +428,14 @@ function deploy() { # Apply the terraform configuration (cd terraform && terraform apply -auto-approve -var="indexer_deployment_suffix=$indexer_deployment_suffix") - # Get ATP indexer URL from terraform output, fallback to localhost if not available - ATP_INDEXER_URL=$(cd $WEBSITE_ROOT/terraform && terraform output -raw atp_indexer_url) - - # TODO: Remove this. - # Use dev-tn indexer URL for dev environment. - # This is because website for dev-tn cannot be deployed due to broken tfstate. - if [ "$environment" = "dev" ]; then - echo "WARNING:Using hardcoded dev-tn indexer URL" - ATP_INDEXER_URL="https://d1ibwybv6l4hzw.cloudfront.net" + # Same-domain API — /api/* is routed to the live indexer by CloudFront. + # No need to reference the indexer directly; the blue-green cron handles origin switching. + if [ "$environment" = "prod" ]; then + export VITE_API_HOST="https://stake.aztec.network" + else + export VITE_API_HOST="https://${environment}.stake.aztec.network" fi - - echo "ATP_INDEXER_URL: $ATP_INDEXER_URL" - export VITE_API_HOST="$ATP_INDEXER_URL" + echo "VITE_API_HOST: $VITE_API_HOST" export VITE_CHAIN_ID=$CHAIN_ID export VITE_RPC_URL=$RPC_URL @@ -493,6 +480,18 @@ case $ACTION in build) build ;; + deploy-dev) + deploy "dev" + ;; + deploy-dev-green) + deploy "dev" "-green" + ;; + deploy-staging) + deploy "staging" + ;; + deploy-staging-green) + deploy "staging" "-green" + ;; deploy-testnet) deploy "testnet" ;; diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 183a08967..d7519fdfd 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -1,5 +1,5 @@ locals { - create_dns_record = var.env == "prod" || var.env == "testnet" ? true : false + create_dns_record = true } terraform { required_version = ">= 1.5.0" @@ -238,7 +238,7 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { web_acl_id = module.website_waf.web_acl_arn # Use custom domain with certificate - aliases = var.env == "prod" ? ["stake.aztec.network"] : var.env == "testnet" ? ["testnet.stake.aztec.network"] : [] + aliases = var.env == "prod" ? ["stake.aztec.network"] : ["${var.env}.stake.aztec.network"] # Origin 1: S3 bucket for static frontend assets origin { From 51ad74650a39f2a39f1abf91a7df70efd94d834d Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 14:58:17 +0200 Subject: [PATCH 06/25] :green_heart: Fix db schema --- db-schemas.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/db-schemas.json b/db-schemas.json index 38bb99d1d..d39311d04 100644 --- a/db-schemas.json +++ b/db-schemas.json @@ -1,5 +1,7 @@ { "atp-indexer": { + "dev": "atp-indexer-dev-v01", + "staging": "atp-indexer-staging-v01", "testnet": "atp-indexer-testnet-v03", "prod": "atp-indexer-prod-v14" } From 5d8b685ce95db9d4166a459b9d01d82b9a33f5ff Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 15:56:46 +0200 Subject: [PATCH 07/25] =?UTF-8?q?=F0=9F=92=9A=20bump=20dev=20db=20schema?= =?UTF-8?q?=20to=202?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db-schemas.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db-schemas.json b/db-schemas.json index d39311d04..717518f41 100644 --- a/db-schemas.json +++ b/db-schemas.json @@ -1,6 +1,6 @@ { "atp-indexer": { - "dev": "atp-indexer-dev-v01", + "dev": "atp-indexer-dev-v02", "staging": "atp-indexer-staging-v01", "testnet": "atp-indexer-testnet-v03", "prod": "atp-indexer-prod-v14" From ccd92bebdefe14115ced6eb193e14ef493d77299 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 16:16:54 +0200 Subject: [PATCH 08/25] =?UTF-8?q?=F0=9F=93=9D=20add=20comment=20to=20cloud?= =?UTF-8?q?front?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- atp-indexer/terraform/app.tf | 1 + staking-dashboard/terraform/main.tf | 1 + 2 files changed, 2 insertions(+) diff --git a/atp-indexer/terraform/app.tf b/atp-indexer/terraform/app.tf index 54f6d13bf..55364eb26 100644 --- a/atp-indexer/terraform/app.tf +++ b/atp-indexer/terraform/app.tf @@ -528,6 +528,7 @@ resource "aws_cloudfront_response_headers_policy" "cors_policy" { resource "aws_cloudfront_distribution" "cf" { + comment = "ATP Indexer (${var.env}${var.deployment_suffix})" enabled = true default_root_object = "" web_acl_id = local.backend_waf_arn diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index d7519fdfd..437562d29 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -233,6 +233,7 @@ resource "aws_cloudfront_response_headers_policy" "api_cors" { } resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { + comment = "Staking Dashboard (${var.env}) — frontend + /api/* proxy to indexer" enabled = true default_root_object = "index.html" web_acl_id = module.website_waf.web_acl_arn From 07105c41e9f6de6625f6dfef4480e6a771614439 Mon Sep 17 00:00:00 2001 From: Koen Date: Fri, 20 Feb 2026 16:47:12 +0200 Subject: [PATCH 09/25] =?UTF-8?q?=F0=9F=92=9A=20=20comment=20for=20first?= =?UTF-8?q?=20deploy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- staking-dashboard/terraform/main.tf | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 437562d29..ba2207c78 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -344,9 +344,13 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { # The indexer origin domain is updated by the blue-green cron via AWS CLI. # Ignore origin changes so Terraform doesn't revert the switchover. # The S3 origin never changes so this is safe. - lifecycle { - ignore_changes = [origin] - } + # + # IMPORTANT: For the first deploy to a new environment, comment out the + # lifecycle block below so Terraform can create the indexerOrigin. + # After the first successful apply, uncomment it. + # lifecycle { + # ignore_changes = [origin] + # } } # From 0ee56ad35c4bf844a20d4fde2f9c451b0932b539 Mon Sep 17 00:00:00 2001 From: Koen Date: Sun, 22 Feb 2026 15:22:24 +0200 Subject: [PATCH 10/25] =?UTF-8?q?=F0=9F=92=9A=20=20fix=20403s=20on=20the?= =?UTF-8?q?=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- staking-dashboard/terraform/main.tf | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index ba2207c78..4fb80cd1f 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -275,8 +275,10 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { # CachingDisabled — the per-color indexer CloudFront handles caching cache_policy_id = "4135ea2d-6df8-44a3-9df3-4b5a84be39ad" - # AllViewer — forward all headers to origin - origin_request_policy_id = "216adef6-5c7f-47e4-b989-5492eafa07d3" + # AllViewerExceptHostHeader — forward all headers except Host. + # The Host header must NOT be forwarded because the indexer CloudFront + # would reject it (dev.stake.aztec.network isn't in its aliases → 403). + origin_request_policy_id = "b689b0a8-53d0-40ab-baf2-68738e2966ac" response_headers_policy_id = aws_cloudfront_response_headers_policy.api_cors.id } @@ -348,9 +350,9 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { # IMPORTANT: For the first deploy to a new environment, comment out the # lifecycle block below so Terraform can create the indexerOrigin. # After the first successful apply, uncomment it. - # lifecycle { - # ignore_changes = [origin] - # } + lifecycle { + ignore_changes = [origin] + } } # From edce3781e6e8a6a4d394392584668101ae07fd38 Mon Sep 17 00:00:00 2001 From: Koen Date: Sun, 22 Feb 2026 15:57:36 +0200 Subject: [PATCH 11/25] =?UTF-8?q?=F0=9F=92=9A=20=20exclude=20API=20from=20?= =?UTF-8?q?bot=20control?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- staking-dashboard/terraform/waf.tf | 1 + terraform/modules/waf/main.tf | 22 ++++++++++++++++++++++ terraform/modules/waf/variables.tf | 6 ++++++ 3 files changed, 29 insertions(+) diff --git a/staking-dashboard/terraform/waf.tf b/staking-dashboard/terraform/waf.tf index 8beca3682..3f346622a 100644 --- a/staking-dashboard/terraform/waf.tf +++ b/staking-dashboard/terraform/waf.tf @@ -20,6 +20,7 @@ module "website_waf" { enable_ip_reputation_list = true enable_anon_ip_rule_set = false enable_bot_control_rule_set = true + bot_control_excluded_uri_prefix = "/api/" # API paths proxied to indexer; bots/scripts need access enable_xss_rule_set = true # Block specific Ukrainian regions diff --git a/terraform/modules/waf/main.tf b/terraform/modules/waf/main.tf index d8d427411..a6a9fc705 100644 --- a/terraform/modules/waf/main.tf +++ b/terraform/modules/waf/main.tf @@ -345,6 +345,28 @@ resource "aws_wafv2_web_acl" "this" { managed_rule_group_statement { name = "AWSManagedRulesBotControlRuleSet" vendor_name = "AWS" + + # Optionally exclude a URI prefix (e.g. /api/) from bot evaluation + dynamic "scope_down_statement" { + for_each = var.bot_control_excluded_uri_prefix != "" ? [1] : [] + content { + not_statement { + statement { + byte_match_statement { + search_string = var.bot_control_excluded_uri_prefix + positional_constraint = "STARTS_WITH" + field_to_match { + uri_path {} + } + text_transformation { + priority = 0 + type = "LOWERCASE" + } + } + } + } + } + } } } diff --git a/terraform/modules/waf/variables.tf b/terraform/modules/waf/variables.tf index fdcabbd3b..a78b9582d 100644 --- a/terraform/modules/waf/variables.tf +++ b/terraform/modules/waf/variables.tf @@ -151,6 +151,12 @@ variable "custom_vpn_ip_list" { default = [] } +variable "bot_control_excluded_uri_prefix" { + description = "URI prefix to exclude from Bot Control evaluation (e.g. '/api/'). Requests matching this prefix bypass bot checks." + type = string + default = "" +} + variable "blocked_ukrainian_regions" { description = "List of Ukrainian region codes to block (ISO 3166-2 region code without country prefix, e.g., \"14\" for Donetsk, \"09\" for Luhansk, \"43\" for Crimea). Full list: https://en.wikipedia.org/wiki/ISO_3166-2:UA" type = list(string) From b4cecb0b85bef1a042717dc06836f061bdb131e9 Mon Sep 17 00:00:00 2001 From: Koen Date: Sun, 22 Feb 2026 16:23:22 +0200 Subject: [PATCH 12/25] =?UTF-8?q?=F0=9F=92=9A=20=20automatic=20failover=20?= =?UTF-8?q?to=20backup=20deployment=20if=20behind?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check-indexer-sync.yaml | 21 +++-- DEPLOYMENT.md | 72 +++++++++----- atp-indexer/src/api/index.ts | 4 + atp-indexer/src/api/middleware/sync-guard.ts | 99 ++++++++++++++++++++ staking-dashboard/terraform/data.tf | 27 ++++-- staking-dashboard/terraform/main.tf | 64 +++++++++---- 6 files changed, 231 insertions(+), 56 deletions(-) create mode 100644 atp-indexer/src/api/middleware/sync-guard.ts diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 57b2f7c46..197ac71da 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -159,22 +159,27 @@ jobs: echo "is_synced=false" >> $GITHUB_OUTPUT fi - - name: Switch frontend CloudFront indexer origin + - name: Swap origin group primary to target color if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' run: | FRONTEND_DIST_ID="${{ steps.state.outputs.frontend_dist_id }}" - NEW_ORIGIN="${{ steps.state.outputs.target_cf_domain }}" + TARGET_COLOR="${{ steps.state.outputs.target_color }}" + TARGET_ORIGIN_ID="${TARGET_COLOR}IndexerOrigin" - echo "Switching indexer origin on frontend CloudFront $FRONTEND_DIST_ID to: $NEW_ORIGIN" + echo "Making ${TARGET_ORIGIN_ID} primary in origin group on $FRONTEND_DIST_ID" # Get current distribution config aws cloudfront get-distribution-config --id "$FRONTEND_DIST_ID" --output json > /tmp/cf-config.json ETAG=$(jq -r '.ETag' /tmp/cf-config.json) - # Update the "indexerOrigin" origin's domain name (not the S3 origin) - jq --arg domain "$NEW_ORIGIN" \ - '(.DistributionConfig.Origins.Items[] | select(.Id == "indexerOrigin")).DomainName = $domain' \ - /tmp/cf-config.json | jq '.DistributionConfig' > /tmp/cf-config-updated.json + # Reorder origin group members: put target color first (= primary) + jq --arg target "$TARGET_ORIGIN_ID" ' + .DistributionConfig.OriginGroups.Items[0].Members.Items |= + (map(select(.OriginId == $target)) + map(select(.OriginId != $target))) + ' /tmp/cf-config.json | jq '.DistributionConfig' > /tmp/cf-config-updated.json + + echo "New origin group member order:" + jq '.OriginGroups.Items[0].Members.Items[].OriginId' /tmp/cf-config-updated.json # Apply the update aws cloudfront update-distribution \ @@ -189,7 +194,7 @@ jobs: --paths "/api/*" \ --no-cli-pager - echo "Frontend CloudFront indexer origin updated and /api/* cache invalidated" + echo "Origin group updated — ${TARGET_ORIGIN_ID} is now primary" - name: Update deployment state if: steps.state.outputs.has_pending == 'true' && steps.timeout.outputs.timed_out == 'false' && steps.sync.outputs.is_synced == 'true' diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 3cea53c42..d301e4cd5 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -5,7 +5,7 @@ This repo contains two deployable components: 1. **Staking Dashboard** (`staking-dashboard/`) — React frontend served from S3 via CloudFront 2. **ATP Indexer** (`atp-indexer/`) — Ponder blockchain indexer running on ECS Fargate -Both are deployed per-environment (`dev`, `staging`, `testnet`, `prod`). The indexer has two instances per environment — **red** and **green** — to enable zero-downtime deployments. +Both are deployed per-environment (`dev`, `staging`, `testnet`, `prod`). The indexer has two instances per environment — **red** and **green** — to enable zero-downtime deployments and automatic failover. ## Architecture @@ -19,29 +19,29 @@ Both are deployed per-environment (`dev`, `staging`, `testnet`, `prod`). The ind │ │ /static │ │ /api/* │ │ - ┌──────┴──┐ ┌─┴─────────────┐ - │ S3 │ │ indexerOrigin │ ← points to live color - │ Bucket │ │ (CF domain) │ - └─────────┘ └──────┬────────┘ - │ - ┌────────────┴────────────┐ - │ │ - ┌──────┴──────┐ ┌───────┴─────┐ - │ Red CF │ │ Green CF │ - └──────┬──────┘ └──────┬──────┘ - ┌──────┴──────┐ ┌──────┴──────┐ - │ Red ALB │ │ Green ALB │ - └──────┬──────┘ └──────┴──────┘ - │ │ - Red ECS Green ECS - (indexer+server) (indexer+server) + ┌──────┴──┐ ┌─┴──────────────────┐ + │ S3 │ │ Origin Group │ + │ Bucket │ │ (failover 502/503/504) + └─────────┘ └──┬──────────────┬──┘ + │ primary │ secondary + ┌──────┴──────┐ ┌─────┴───────┐ + │ Red CF │ │ Green CF │ + └──────┬──────┘ └──────┬──────┘ + ┌──────┴──────┐ ┌──────┴──────┐ + │ Red ALB │ │ Green ALB │ + └──────┬──────┘ └──────┴──────┘ + │ │ + Red ECS Green ECS + (indexer+server) (indexer+server) ``` -The frontend CloudFront distribution has two origins: +The frontend CloudFront distribution has three origins: - **S3** for static assets (default behavior) -- **indexerOrigin** for `/api/*` requests, pointing to whichever indexer color is live +- **redIndexerOrigin** and **greenIndexerOrigin** combined in an **origin group** for `/api/*` requests -This means the frontend always uses its own domain for API calls (`/api/*`). Indexer switchovers only update the CloudFront origin — no frontend redeploy needed. +The origin group provides **automatic failover**: if the primary indexer returns 502/503/504, CloudFront retries the request on the secondary indexer within the same request cycle. The indexer's sync-guard middleware returns 503 when it falls behind (>200 blocks), triggering this failover automatically. + +The blue-green cron swaps which color is primary in the origin group. No frontend redeploy is needed for switchovers. ## Environments @@ -95,7 +95,7 @@ Runs on a cron every 30 minutes. For each environment with a `pending_switchover 1. Hits `GET /api/sync-status` on the backup's CloudFront domain 2. If not synced yet → exits, retries next cron run 3. If synced → performs the switchover: - - Updates the frontend CloudFront's `indexerOrigin` to point to the new live color (via AWS CLI) + - Swaps the origin group member order so the new live color becomes **primary** (via AWS CLI) - Invalidates `/api/*` cache - Updates the S3 deployment state (`live_color` = new color, `pending_switchover` = null) - Triggers `Deploy ATP Indexer` for the old live (so both colors end up on the latest code) @@ -103,6 +103,17 @@ Runs on a cron every 30 minutes. For each environment with a `pending_switchover Can also be triggered manually to check a specific environment immediately. +### Automatic Failover + +Even without a pending switchover, if the live indexer falls behind or goes down, the **CloudFront origin group** handles failover automatically: + +1. The sync-guard middleware (`atp-indexer/src/api/middleware/sync-guard.ts`) checks sync status every 30 seconds +2. When the indexer is ≥200 blocks behind, all API endpoints (except `/api/sync-status` and `/api/health`) return **503** +3. CloudFront sees the 503 and retries the request on the **secondary** origin +4. If the secondary is healthy, the user gets a response seamlessly — no manual intervention needed + +This means failover is **instant** (per-request), not dependent on cron timing. + ### Sync Status Endpoint `GET /api/sync-status` returns: @@ -118,7 +129,7 @@ Can also be triggered manually to check a specific environment immediately. } ``` -The indexer is considered synced when `behindBlocks < 50` and `hasData` is true (at least one provider exists in the database). +The indexer is considered synced when `behindBlocks < 50` and `hasData` is true (at least one provider exists in the database). This endpoint always returns 200 (never blocked by the sync-guard middleware) so the blue-green cron can always check status. ### Deploying to a Single Color (Manual) @@ -177,16 +188,27 @@ This reads the CloudFront domains from Terraform state and creates the S3 deploy ## Terraform -The frontend CloudFront distribution's `indexerOrigin` is managed with `lifecycle { ignore_changes = [origin] }` so that Terraform doesn't revert origin changes made by the blue-green cron via AWS CLI. +The frontend CloudFront distribution uses an **origin group** with both red and green indexer CloudFronts. The origin group member order (which is primary) is managed by the blue-green cron via AWS CLI. The `lifecycle { ignore_changes = [origin_group] }` block prevents Terraform from reverting the cron's changes. The origins themselves have fixed domains and are fully managed by Terraform. SPA routing is handled by a CloudFront Function (`spa_routing`) on the default behavior's viewer-request event instead of a 404 `custom_error_response`, because `custom_error_response` is distribution-wide and would intercept API 404s. +### Migrating from single indexerOrigin to origin group + +For existing environments that still have a single `indexerOrigin`: + +1. Temporarily comment out the `lifecycle` block in `staking-dashboard/terraform/main.tf` +2. Run `terraform apply` — this replaces the old origin with red/green origins + origin group +3. Uncomment the `lifecycle` block +4. Apply again (no-op, just registers the lifecycle) + ## Troubleshooting **Switchover stuck / timed out:** The cron clears pending switchovers after 2 hours. Check the backup's `/api/sync-status` endpoint directly. If the indexer is erroring, check ECS logs. **Switchover never triggers:** Verify the S3 state file has a `pending_switchover` set. The cron only runs every 30 minutes — trigger `Check Indexer Sync & Switchover` manually for faster feedback. -**Wrong color is live:** Manually run `Deploy ATP Indexer` targeting the correct color, then update the S3 state file's `live_color` field directly. +**Wrong color is live:** Manually run `Deploy ATP Indexer` targeting the correct color, then update the S3 state file's `live_color` field directly. To also change the origin group primary, use `aws cloudfront get-distribution-config` / `update-distribution` to reorder the origin group members. + +**Terraform wants to revert the origin group:** The `lifecycle { ignore_changes = [origin_group] }` block should prevent this. If it's happening, check that the block is still present in `staking-dashboard/terraform/main.tf`. -**Terraform wants to revert the origin:** The `lifecycle { ignore_changes = [origin] }` block should prevent this. If it's happening, check that the block is still present in `staking-dashboard/terraform/main.tf`. +**API returns 503:** The sync-guard middleware returns 503 when the indexer is ≥200 blocks behind. Check `/api/sync-status` directly on the indexer's CloudFront domain to see the actual sync status. If both indexers are behind, both will return 503 and no failover target is available — investigate why indexing is stalled. diff --git a/atp-indexer/src/api/index.ts b/atp-indexer/src/api/index.ts index 8e5dd11dc..1c44072b7 100644 --- a/atp-indexer/src/api/index.ts +++ b/atp-indexer/src/api/index.ts @@ -2,6 +2,7 @@ import { Hono } from "hono"; import { cors } from 'hono/cors'; import { logger } from 'hono/logger'; import { globalLimiter } from './middleware/rate-limit'; +import { syncGuard } from './middleware/sync-guard'; import { healthRoutes } from './routes/health.routes'; import { providerRoutes } from './routes/provider.routes'; import { stakingRoutes } from './routes/staking.routes'; @@ -23,6 +24,9 @@ app.use('*', cors({ maxAge: 86400, })); +// Return 503 when indexer is significantly behind — triggers CloudFront origin group failover +app.use('/api/*', syncGuard.middleware()); + // Apply rate limiting only if enabled (disabled by default) if (config.RATE_LIMIT_ENABLED) { app.use('/api/*', globalLimiter); diff --git a/atp-indexer/src/api/middleware/sync-guard.ts b/atp-indexer/src/api/middleware/sync-guard.ts new file mode 100644 index 000000000..1e371cc91 --- /dev/null +++ b/atp-indexer/src/api/middleware/sync-guard.ts @@ -0,0 +1,99 @@ +import type { Context, Next } from 'hono'; +import { db } from 'ponder:api'; +import { max, count } from 'drizzle-orm'; +import { deposit, provider, atpPosition } from 'ponder:schema'; +import { getPublicClient } from '../../utils/viem-client'; + +/** + * Sync guard middleware — returns 503 when the indexer is significantly behind + * the chain head. This triggers CloudFront origin group failover to the + * secondary (backup) indexer. + * + * Background check runs every 30s. Excluded paths (/api/sync-status, /api/health) + * always pass through so the blue-green cron can still query sync status. + */ + +const BEHIND_THRESHOLD = 200; +const CHECK_INTERVAL_MS = 30_000; +const INITIAL_DELAY_MS = 5_000; + +const EXCLUDED_PREFIXES = ['/api/sync-status', '/api/health']; + +class SyncGuard { + private behindBlocks = 0; + private hasData = false; + private healthy = true; + private initialized = false; + + constructor() { + setTimeout(() => this.check(), INITIAL_DELAY_MS); + setInterval(() => this.check(), CHECK_INTERVAL_MS); + } + + private async check() { + try { + const client = getPublicClient(); + + const [chainHeadBlock, depositMax, providerMax, atpMax, providerCount] = + await Promise.all([ + client.getBlockNumber(), + db.select({ maxBlock: max(deposit.blockNumber) }).from(deposit), + db.select({ maxBlock: max(provider.blockNumber) }).from(provider), + db.select({ maxBlock: max(atpPosition.blockNumber) }).from(atpPosition), + db.select({ count: count() }).from(provider), + ]); + + const chainHead = Number(chainHeadBlock); + const maxBlocks = [ + depositMax[0]?.maxBlock, + providerMax[0]?.maxBlock, + atpMax[0]?.maxBlock, + ] + .filter((b): b is bigint => b !== null && b !== undefined) + .map(Number); + + const indexedBlock = maxBlocks.length > 0 ? Math.max(...maxBlocks) : 0; + this.hasData = Number(providerCount[0].count) > 0; + this.behindBlocks = chainHead - indexedBlock; + this.healthy = this.behindBlocks < BEHIND_THRESHOLD && this.hasData; + this.initialized = true; + + if (!this.healthy) { + console.warn( + `[sync-guard] Unhealthy: ${this.behindBlocks} blocks behind (threshold: ${BEHIND_THRESHOLD}, hasData: ${this.hasData})` + ); + } + } catch (error) { + console.error('[sync-guard] Check failed:', error); + // Keep previous state on transient failures + } + } + + middleware() { + return async (c: Context, next: Next) => { + const path = c.req.path; + + if (EXCLUDED_PREFIXES.some((p) => path.startsWith(p))) { + await next(); + return; + } + + // Return 503 once initialized and unhealthy. + // Before initialization, pass through (assume healthy). + if (this.initialized && !this.healthy) { + c.header('Retry-After', '30'); + return c.json( + { + error: 'Service temporarily unavailable — indexer is syncing', + behindBlocks: this.behindBlocks, + }, + 503 + ); + } + + await next(); + }; + } +} + +export const syncGuard = new SyncGuard(); diff --git a/staking-dashboard/terraform/data.tf b/staking-dashboard/terraform/data.tf index e99b5dfc7..be48eac47 100644 --- a/staking-dashboard/terraform/data.tf +++ b/staking-dashboard/terraform/data.tf @@ -1,8 +1,19 @@ -data "terraform_remote_state" "atp-indexer" { +# Red indexer (deployment suffix "") +data "terraform_remote_state" "atp-indexer-red" { backend = "s3" config = { bucket = "aztec-token-sale-terraform-state" - key = "${var.env}${var.indexer_deployment_suffix}/backends/atp-indexer/terraform.tfstate" + key = "${var.env}/backends/atp-indexer/terraform.tfstate" + region = "eu-west-2" + } +} + +# Green indexer (deployment suffix "-green") +data "terraform_remote_state" "atp-indexer-green" { + backend = "s3" + config = { + bucket = "aztec-token-sale-terraform-state" + key = "${var.env}-green/backends/atp-indexer/terraform.tfstate" region = "eu-west-2" } } @@ -19,12 +30,14 @@ data "terraform_remote_state" "shared" { # Local references to backend service URLs locals { - atp_indexer_url = "https://${data.terraform_remote_state.atp-indexer.outputs.cf_domain_name}" - atp_indexer_cf_domain = data.terraform_remote_state.atp-indexer.outputs.cf_domain_name - cloudfront_logs_bucket = try(data.terraform_remote_state.shared.outputs.cloudfront_logs_bucket_domain_name, "") + # Both indexer CF domains — green may not exist yet for new environments + red_indexer_cf_domain = data.terraform_remote_state.atp-indexer-red.outputs.cf_domain_name + green_indexer_cf_domain = try(data.terraform_remote_state.atp-indexer-green.outputs.cf_domain_name, "") + has_green_indexer = local.green_indexer_cf_domain != "" + + cloudfront_logs_bucket = try(data.terraform_remote_state.shared.outputs.cloudfront_logs_bucket_domain_name, "") } output "atp_indexer_url" { - value = local.atp_indexer_url + value = "https://${local.red_indexer_cf_domain}" } - diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 4fb80cd1f..2f4097db5 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -248,12 +248,10 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { origin_access_control_id = aws_cloudfront_origin_access_control.oac-staking-dashboard.id } - # Origin 2: Live indexer CloudFront (proxied for /api/* requests). - # The blue-green cron workflow updates this origin's domain via AWS CLI - # when switching between red/green indexers. + # Origin 2: Red indexer CloudFront origin { - domain_name = local.atp_indexer_cf_domain - origin_id = "indexerOrigin" + domain_name = local.red_indexer_cf_domain + origin_id = "redIndexerOrigin" custom_origin_config { http_port = 80 @@ -263,12 +261,45 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { } } - # /api/* requests → indexer origin + # Origin 3: Green indexer CloudFront + origin { + domain_name = local.green_indexer_cf_domain + origin_id = "greenIndexerOrigin" + + custom_origin_config { + http_port = 80 + https_port = 443 + origin_protocol_policy = "https-only" + origin_ssl_protocols = ["TLSv1.2"] + } + } + + # Origin group: automatic failover when primary returns 502/503/504. + # The sync-guard middleware on the indexer returns 503 when behind, + # triggering CloudFront to retry on the secondary indexer. + # The blue-green cron swaps member order to control which color is primary. + origin_group { + origin_id = "indexerOriginGroup" + + failover_criteria { + status_codes = [502, 503, 504] + } + + member { + origin_id = "redIndexerOrigin" + } + + member { + origin_id = "greenIndexerOrigin" + } + } + + # /api/* requests → indexer origin group (automatic failover) ordered_cache_behavior { path_pattern = "/api/*" allowed_methods = ["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"] cached_methods = ["GET", "HEAD", "OPTIONS"] - target_origin_id = "indexerOrigin" + target_origin_id = "indexerOriginGroup" viewer_protocol_policy = "redirect-to-https" @@ -343,16 +374,17 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { } } - # The indexer origin domain is updated by the blue-green cron via AWS CLI. - # Ignore origin changes so Terraform doesn't revert the switchover. - # The S3 origin never changes so this is safe. + # The blue-green cron swaps origin_group member order via AWS CLI + # to control which indexer is primary. Ignore origin_group so Terraform + # doesn't revert the switchover. Origins themselves have fixed domains + # (red/green CF) and are fully managed by Terraform. # - # IMPORTANT: For the first deploy to a new environment, comment out the - # lifecycle block below so Terraform can create the indexerOrigin. - # After the first successful apply, uncomment it. - lifecycle { - ignore_changes = [origin] - } + # MIGRATION from single indexerOrigin: temporarily comment out the + # lifecycle block, apply, then uncomment. This lets Terraform replace + # the old single origin with the red/green origins + origin group. + # lifecycle { + # ignore_changes = [origin_group] + # } } # From 99c4ea0332c34fc399846de25a2b1439d856d6fa Mon Sep 17 00:00:00 2001 From: Koen Date: Sun, 22 Feb 2026 16:25:51 +0200 Subject: [PATCH 13/25] :bug: fix allowed methods for cache --- staking-dashboard/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 2f4097db5..424cce64a 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -297,7 +297,7 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { # /api/* requests → indexer origin group (automatic failover) ordered_cache_behavior { path_pattern = "/api/*" - allowed_methods = ["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"] + allowed_methods = ["GET", "HEAD", "OPTIONS"] cached_methods = ["GET", "HEAD", "OPTIONS"] target_origin_id = "indexerOriginGroup" From ce7c6ae72b090cfa5047f2b44f429a26617389c0 Mon Sep 17 00:00:00 2001 From: Koen Date: Sun, 22 Feb 2026 16:58:27 +0200 Subject: [PATCH 14/25] =?UTF-8?q?=F0=9F=92=9A=20=20use=20a=20local=20waf?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- atp-indexer/terraform/app.tf | 2 +- atp-indexer/terraform/data.tf | 2 -- atp-indexer/terraform/waf.tf | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 atp-indexer/terraform/waf.tf diff --git a/atp-indexer/terraform/app.tf b/atp-indexer/terraform/app.tf index 55364eb26..0384ba36b 100644 --- a/atp-indexer/terraform/app.tf +++ b/atp-indexer/terraform/app.tf @@ -531,7 +531,7 @@ resource "aws_cloudfront_distribution" "cf" { comment = "ATP Indexer (${var.env}${var.deployment_suffix})" enabled = true default_root_object = "" - web_acl_id = local.backend_waf_arn + web_acl_id = module.indexer_waf.web_acl_arn # Use custom domain with certificate # aliases = ["indexer.${var.env}.stake.aztec.network"] diff --git a/atp-indexer/terraform/data.tf b/atp-indexer/terraform/data.tf index df27d15a1..4050f0510 100644 --- a/atp-indexer/terraform/data.tf +++ b/atp-indexer/terraform/data.tf @@ -18,8 +18,6 @@ locals { ecs_cluster_id = data.terraform_remote_state.shared.outputs.ecs_cluster_id aws_services_security_group_id = data.terraform_remote_state.shared.outputs.aws_services_security_group_id vpc_internal_security_group_id = data.terraform_remote_state.shared.outputs.vpc_internal_security_group_id - backend_waf_arn = data.terraform_remote_state.shared.outputs.backend_waf_arn - # CloudFront secret header configuration (shared across all backend services) # Use try() to handle cases where outputs don't exist yet in remote state cloudfront_secret_header_ssm_name = try(data.terraform_remote_state.shared.outputs.cloudfront_secret_header_ssm_name, "") diff --git a/atp-indexer/terraform/waf.tf b/atp-indexer/terraform/waf.tf new file mode 100644 index 000000000..401a9be5e --- /dev/null +++ b/atp-indexer/terraform/waf.tf @@ -0,0 +1,33 @@ +# WAF for ATP indexer CloudFront distribution +module "indexer_waf" { + source = "../../terraform/modules/waf" + + providers = { + aws = aws.us_east_1 + } + + name = "${local.full_name}-waf" + scope = "CLOUDFRONT" + + rate_limit = 5000 + max_request_size_kb = 8 + + # Managed rule sets + enable_managed_core_rule_set = true + enable_known_bad_inputs_rule_set = true + enable_sql_injection_rule_set = false # Read-only API + enable_ip_reputation_list = true + enable_anon_ip_rule_set = false + enable_bot_control_rule_set = false # API must be accessible to scripts, crons, and CF-to-CF proxying + enable_xss_rule_set = true + + # API only serves GET/HEAD/OPTIONS + allowed_methods = ["GET", "HEAD", "OPTIONS"] + + # No geo-blocking — handled by the frontend CloudFront WAF + enable_geo_blocking = false + + tags = merge(local.common_tags, { + Type = "security" + }) +} From 430b149fecfc5587d08e581282796734af70c0e9 Mon Sep 17 00:00:00 2001 From: Koen Date: Sun, 22 Feb 2026 17:14:07 +0200 Subject: [PATCH 15/25] =?UTF-8?q?=F0=9F=90=9B=20=20read=20index=20progres?= =?UTF-8?q?=20correctly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- atp-indexer/src/api/handlers/sync-status.ts | 42 ++++-------------- atp-indexer/src/api/middleware/sync-guard.ts | 39 +++++------------ atp-indexer/src/utils/indexer-progress.ts | 45 ++++++++++++++++++++ 3 files changed, 65 insertions(+), 61 deletions(-) create mode 100644 atp-indexer/src/utils/indexer-progress.ts diff --git a/atp-indexer/src/api/handlers/sync-status.ts b/atp-indexer/src/api/handlers/sync-status.ts index 9921d81f7..a02c50230 100644 --- a/atp-indexer/src/api/handlers/sync-status.ts +++ b/atp-indexer/src/api/handlers/sync-status.ts @@ -1,8 +1,8 @@ import type { Context } from 'hono'; import { db } from 'ponder:api'; -import { max, count } from 'drizzle-orm'; -import { deposit, provider, atpPosition } from 'ponder:schema'; -import { getPublicClient } from '../../utils/viem-client'; +import { count } from 'drizzle-orm'; +import { provider } from 'ponder:schema'; +import { getIndexerProgress } from '../../utils/indexer-progress'; interface SyncStatusResponse { synced: boolean; @@ -22,43 +22,19 @@ const SYNC_THRESHOLD_BLOCKS = 50; */ export async function handleSyncStatus(c: Context): Promise { try { - const client = getPublicClient(); - - const [ - chainHeadBlock, - depositMaxBlock, - providerMaxBlock, - atpMaxBlock, - providerCountResult, - ] = await Promise.all([ - client.getBlockNumber(), - db.select({ maxBlock: max(deposit.blockNumber) }).from(deposit), - db.select({ maxBlock: max(provider.blockNumber) }).from(provider), - db.select({ maxBlock: max(atpPosition.blockNumber) }).from(atpPosition), + const [progress, providerCountResult] = await Promise.all([ + getIndexerProgress(), db.select({ count: count() }).from(provider), ]); - const chainHead = Number(chainHeadBlock); - - // Take the highest block number across all tables - const maxBlocks = [ - depositMaxBlock[0]?.maxBlock, - providerMaxBlock[0]?.maxBlock, - atpMaxBlock[0]?.maxBlock, - ] - .filter((b): b is bigint => b !== null && b !== undefined) - .map(Number); - - const indexedBlock = maxBlocks.length > 0 ? Math.max(...maxBlocks) : 0; const hasData = Number(providerCountResult[0].count) > 0; - const behindBlocks = chainHead - indexedBlock; - const synced = behindBlocks < SYNC_THRESHOLD_BLOCKS && hasData; + const synced = progress.behindBlocks < SYNC_THRESHOLD_BLOCKS && hasData; const response: SyncStatusResponse = { synced, - indexedBlock, - chainHead, - behindBlocks, + indexedBlock: progress.indexedBlock, + chainHead: progress.chainHead, + behindBlocks: progress.behindBlocks, hasData, timestamp: new Date().toISOString(), }; diff --git a/atp-indexer/src/api/middleware/sync-guard.ts b/atp-indexer/src/api/middleware/sync-guard.ts index 1e371cc91..7c08aa4e5 100644 --- a/atp-indexer/src/api/middleware/sync-guard.ts +++ b/atp-indexer/src/api/middleware/sync-guard.ts @@ -1,8 +1,8 @@ import type { Context, Next } from 'hono'; import { db } from 'ponder:api'; -import { max, count } from 'drizzle-orm'; -import { deposit, provider, atpPosition } from 'ponder:schema'; -import { getPublicClient } from '../../utils/viem-client'; +import { count } from 'drizzle-orm'; +import { provider } from 'ponder:schema'; +import { getIndexerProgress } from '../../utils/indexer-progress'; /** * Sync guard middleware — returns 503 when the indexer is significantly behind @@ -21,7 +21,6 @@ const EXCLUDED_PREFIXES = ['/api/sync-status', '/api/health']; class SyncGuard { private behindBlocks = 0; - private hasData = false; private healthy = true; private initialized = false; @@ -32,35 +31,19 @@ class SyncGuard { private async check() { try { - const client = getPublicClient(); + const [progress, providerCount] = await Promise.all([ + getIndexerProgress(), + db.select({ count: count() }).from(provider), + ]); - const [chainHeadBlock, depositMax, providerMax, atpMax, providerCount] = - await Promise.all([ - client.getBlockNumber(), - db.select({ maxBlock: max(deposit.blockNumber) }).from(deposit), - db.select({ maxBlock: max(provider.blockNumber) }).from(provider), - db.select({ maxBlock: max(atpPosition.blockNumber) }).from(atpPosition), - db.select({ count: count() }).from(provider), - ]); - - const chainHead = Number(chainHeadBlock); - const maxBlocks = [ - depositMax[0]?.maxBlock, - providerMax[0]?.maxBlock, - atpMax[0]?.maxBlock, - ] - .filter((b): b is bigint => b !== null && b !== undefined) - .map(Number); - - const indexedBlock = maxBlocks.length > 0 ? Math.max(...maxBlocks) : 0; - this.hasData = Number(providerCount[0].count) > 0; - this.behindBlocks = chainHead - indexedBlock; - this.healthy = this.behindBlocks < BEHIND_THRESHOLD && this.hasData; + const hasData = Number(providerCount[0].count) > 0; + this.behindBlocks = progress.behindBlocks; + this.healthy = this.behindBlocks < BEHIND_THRESHOLD && hasData; this.initialized = true; if (!this.healthy) { console.warn( - `[sync-guard] Unhealthy: ${this.behindBlocks} blocks behind (threshold: ${BEHIND_THRESHOLD}, hasData: ${this.hasData})` + `[sync-guard] Unhealthy: ${this.behindBlocks} blocks behind (threshold: ${BEHIND_THRESHOLD}, hasData: ${hasData})` ); } } catch (error) { diff --git a/atp-indexer/src/utils/indexer-progress.ts b/atp-indexer/src/utils/indexer-progress.ts new file mode 100644 index 000000000..8126f17a7 --- /dev/null +++ b/atp-indexer/src/utils/indexer-progress.ts @@ -0,0 +1,45 @@ +import { sql } from 'drizzle-orm'; +import { db } from 'ponder:api'; +import { getPublicClient } from './viem-client'; + +/** + * Block number is encoded at positions 26–41 (16 digits) in Ponder's + * 75-character checkpoint string. + */ +const BLOCK_NUMBER_OFFSET = 26; +const BLOCK_NUMBER_LENGTH = 16; + +function decodeBlockNumber(checkpoint: string): number { + return Number(checkpoint.slice(BLOCK_NUMBER_OFFSET, BLOCK_NUMBER_OFFSET + BLOCK_NUMBER_LENGTH)); +} + +/** + * Get the indexer's actual processing progress by reading Ponder's + * internal `_ponder_checkpoint` table. + * + * This is more accurate than MAX(block_number) on event tables, which only + * reflects the block of the last *emitted event*, not the last *processed block*. + */ +export async function getIndexerProgress(): Promise<{ + indexedBlock: number; + chainHead: number; + behindBlocks: number; +}> { + const client = getPublicClient(); + + const [chainHeadBlock, checkpointRows] = await Promise.all([ + client.getBlockNumber(), + db.execute(sql`SELECT "latestCheckpoint" FROM "_ponder_checkpoint" LIMIT 1`), + ]); + + const chainHead = Number(chainHeadBlock); + + // drizzle execute() returns { rows: [...] } for node-postgres + const rows = (checkpointRows as { rows: { latestCheckpoint: string }[] }).rows; + const latestCheckpoint = rows?.[0]?.latestCheckpoint; + + const indexedBlock = latestCheckpoint ? decodeBlockNumber(latestCheckpoint) : 0; + const behindBlocks = chainHead - indexedBlock; + + return { indexedBlock, chainHead, behindBlocks }; +} From 797f61b4b83473b7ccfa7b491a768b82d4a4ad32 Mon Sep 17 00:00:00 2001 From: Koen Date: Sun, 22 Feb 2026 17:21:37 +0200 Subject: [PATCH 16/25] :bug: fix utils --- atp-indexer/src/api/handlers/sync-status.ts | 2 +- atp-indexer/src/api/middleware/sync-guard.ts | 2 +- atp-indexer/src/{ => api}/utils/indexer-progress.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename atp-indexer/src/{ => api}/utils/indexer-progress.ts (96%) diff --git a/atp-indexer/src/api/handlers/sync-status.ts b/atp-indexer/src/api/handlers/sync-status.ts index a02c50230..02d4b8323 100644 --- a/atp-indexer/src/api/handlers/sync-status.ts +++ b/atp-indexer/src/api/handlers/sync-status.ts @@ -2,7 +2,7 @@ import type { Context } from 'hono'; import { db } from 'ponder:api'; import { count } from 'drizzle-orm'; import { provider } from 'ponder:schema'; -import { getIndexerProgress } from '../../utils/indexer-progress'; +import { getIndexerProgress } from '../utils/indexer-progress'; interface SyncStatusResponse { synced: boolean; diff --git a/atp-indexer/src/api/middleware/sync-guard.ts b/atp-indexer/src/api/middleware/sync-guard.ts index 7c08aa4e5..5d6df20f9 100644 --- a/atp-indexer/src/api/middleware/sync-guard.ts +++ b/atp-indexer/src/api/middleware/sync-guard.ts @@ -2,7 +2,7 @@ import type { Context, Next } from 'hono'; import { db } from 'ponder:api'; import { count } from 'drizzle-orm'; import { provider } from 'ponder:schema'; -import { getIndexerProgress } from '../../utils/indexer-progress'; +import { getIndexerProgress } from '../utils/indexer-progress'; /** * Sync guard middleware — returns 503 when the indexer is significantly behind diff --git a/atp-indexer/src/utils/indexer-progress.ts b/atp-indexer/src/api/utils/indexer-progress.ts similarity index 96% rename from atp-indexer/src/utils/indexer-progress.ts rename to atp-indexer/src/api/utils/indexer-progress.ts index 8126f17a7..ad7cfca10 100644 --- a/atp-indexer/src/utils/indexer-progress.ts +++ b/atp-indexer/src/api/utils/indexer-progress.ts @@ -1,6 +1,6 @@ import { sql } from 'drizzle-orm'; import { db } from 'ponder:api'; -import { getPublicClient } from './viem-client'; +import { getPublicClient } from '../../utils/viem-client'; /** * Block number is encoded at positions 26–41 (16 digits) in Ponder's From abb7a0c3de814ca65f12753ae3037b2fc9a9e628 Mon Sep 17 00:00:00 2001 From: Koen Date: Sun, 22 Feb 2026 17:28:48 +0200 Subject: [PATCH 17/25] :bug: fix latest column casing --- atp-indexer/src/api/utils/indexer-progress.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/atp-indexer/src/api/utils/indexer-progress.ts b/atp-indexer/src/api/utils/indexer-progress.ts index ad7cfca10..2ff2efa91 100644 --- a/atp-indexer/src/api/utils/indexer-progress.ts +++ b/atp-indexer/src/api/utils/indexer-progress.ts @@ -29,14 +29,14 @@ export async function getIndexerProgress(): Promise<{ const [chainHeadBlock, checkpointRows] = await Promise.all([ client.getBlockNumber(), - db.execute(sql`SELECT "latestCheckpoint" FROM "_ponder_checkpoint" LIMIT 1`), + db.execute(sql`SELECT "latest_checkpoint" FROM "_ponder_checkpoint" LIMIT 1`), ]); const chainHead = Number(chainHeadBlock); // drizzle execute() returns { rows: [...] } for node-postgres - const rows = (checkpointRows as { rows: { latestCheckpoint: string }[] }).rows; - const latestCheckpoint = rows?.[0]?.latestCheckpoint; + const rows = (checkpointRows as unknown as { rows: { latest_checkpoint: string }[] }).rows; + const latestCheckpoint = rows?.[0]?.latest_checkpoint; const indexedBlock = latestCheckpoint ? decodeBlockNumber(latestCheckpoint) : 0; const behindBlocks = chainHead - indexedBlock; From 5f96c7efeaf9e6d7d752ebbd7c011e9646bd3c3f Mon Sep 17 00:00:00 2001 From: Koen Date: Mon, 23 Feb 2026 09:41:36 +0200 Subject: [PATCH 18/25] :rocket: bump dev db schema --- db-schemas.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db-schemas.json b/db-schemas.json index 717518f41..894b592e3 100644 --- a/db-schemas.json +++ b/db-schemas.json @@ -1,6 +1,6 @@ { "atp-indexer": { - "dev": "atp-indexer-dev-v02", + "dev": "atp-indexer-dev-v03", "staging": "atp-indexer-staging-v01", "testnet": "atp-indexer-testnet-v03", "prod": "atp-indexer-prod-v14" From f2e0aa326f10675d424c411c74cb4eac3c6617f7 Mon Sep 17 00:00:00 2001 From: Koen Date: Mon, 23 Feb 2026 10:07:18 +0200 Subject: [PATCH 19/25] :rocket: deploy to dev --- .../workflows/deploy-indexer-bluegreen.yaml | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/deploy-indexer-bluegreen.yaml b/.github/workflows/deploy-indexer-bluegreen.yaml index b657671ec..91b0b49e7 100644 --- a/.github/workflows/deploy-indexer-bluegreen.yaml +++ b/.github/workflows/deploy-indexer-bluegreen.yaml @@ -7,6 +7,9 @@ name: Deploy Indexer (Blue-Green) # once the backup finishes re-indexing. on: + push: + branches: + - km/automted-deployment workflow_dispatch: inputs: environment: @@ -38,11 +41,11 @@ permissions: jobs: deploy-to-backup: runs-on: ubuntu-latest - environment: ${{ inputs.environment }} + environment: ${{ inputs.environment || 'dev' }} env: - ENV: ${{ inputs.environment }} - DRY_RUN: ${{ inputs.dry_run }} - FORCE: ${{ inputs.force }} + ENV: ${{ inputs.environment || 'dev' }} + DRY_RUN: ${{ inputs.dry_run || false }} + FORCE: ${{ inputs.force || false }} # AWS Configuration AWS_ACCOUNT: ${{ secrets.AWS_ACCOUNT }} @@ -95,7 +98,7 @@ jobs: - name: Read deployment state from S3 id: state run: | - STATE_KEY="deployment-state/${{ inputs.environment }}.json" + STATE_KEY="deployment-state/${ENV}.json" STATE_BUCKET="aztec-token-sale-terraform-state" if aws s3 cp "s3://${STATE_BUCKET}/${STATE_KEY}" /tmp/deploy-state.json 2>/dev/null; then @@ -121,7 +124,7 @@ jobs: fi - name: Check for pending switchover - if: steps.state.outputs.has_pending == 'true' && inputs.force == false + if: steps.state.outputs.has_pending == 'true' && env.FORCE == 'false' run: | echo "::error::A switchover is already pending. Use force=true to override." echo "Current state:" @@ -132,7 +135,6 @@ jobs: working-directory: atp-indexer run: | BACKUP="${{ steps.state.outputs.backup_color }}" - ENV="${{ inputs.environment }}" echo "Deploying to ${ENV} ($BACKUP)" if [ "$BACKUP" = "green" ]; then @@ -142,7 +144,7 @@ jobs: fi - name: Update deployment state with pending switchover - if: inputs.dry_run == false + if: env.DRY_RUN == 'false' run: | BACKUP="${{ steps.state.outputs.backup_color }}" NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ") @@ -158,12 +160,12 @@ jobs: cat /tmp/deploy-state-updated.json aws s3 cp /tmp/deploy-state-updated.json \ - "s3://aztec-token-sale-terraform-state/deployment-state/${{ inputs.environment }}.json" \ + "s3://aztec-token-sale-terraform-state/deployment-state/${ENV}.json" \ --content-type "application/json" echo "### Deployment Summary" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "- **Environment:** ${{ inputs.environment }}" >> $GITHUB_STEP_SUMMARY + echo "- **Environment:** ${ENV}" >> $GITHUB_STEP_SUMMARY echo "- **Deployed to:** $BACKUP (backup)" >> $GITHUB_STEP_SUMMARY echo "- **Live:** ${{ steps.state.outputs.live_color }}" >> $GITHUB_STEP_SUMMARY echo "- **Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY From 8ab8276a181ae8ef5e7df6c966cc00487a74ae5d Mon Sep 17 00:00:00 2001 From: Koen Date: Mon, 23 Feb 2026 10:26:36 +0200 Subject: [PATCH 20/25] :rocket: test indexer check --- .github/workflows/check-indexer-sync.yaml | 3 +++ .github/workflows/deploy-indexer-bluegreen.yaml | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 197ac71da..6601555be 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -8,6 +8,9 @@ name: Check Indexer Sync & Switchover # 3. Triggers a deploy of the old live backend (so both end up updated) on: + push: + branches: + - km/automted-deployment schedule: - cron: '*/30 * * * *' workflow_dispatch: diff --git a/.github/workflows/deploy-indexer-bluegreen.yaml b/.github/workflows/deploy-indexer-bluegreen.yaml index 91b0b49e7..d537f6a3b 100644 --- a/.github/workflows/deploy-indexer-bluegreen.yaml +++ b/.github/workflows/deploy-indexer-bluegreen.yaml @@ -7,9 +7,6 @@ name: Deploy Indexer (Blue-Green) # once the backup finishes re-indexing. on: - push: - branches: - - km/automted-deployment workflow_dispatch: inputs: environment: From 8604cb0f8ff8464ecfa6c9ee23edd9094f03db3f Mon Sep 17 00:00:00 2001 From: Koen Date: Mon, 23 Feb 2026 10:28:42 +0200 Subject: [PATCH 21/25] :rocket: only update dev on push --- .github/workflows/check-indexer-sync.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 6601555be..404703c41 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -51,6 +51,12 @@ jobs: TARGET="${{ inputs.environment }}" CURRENT="${{ matrix.environment }}" + if [ "$EVENT" = "push" ] && [ "$CURRENT" != "dev" ]; then + echo "Skipping $CURRENT (push only runs dev)" + echo "run=false" >> $GITHUB_OUTPUT + exit 0 + fi + if [ "$EVENT" = "schedule" ] || [ -z "$TARGET" ] || [ "$TARGET" = "$CURRENT" ]; then echo "run=true" >> $GITHUB_OUTPUT else From 5b8aa3e518cade2e78bda75768d028b2b4965915 Mon Sep 17 00:00:00 2001 From: Koen Date: Mon, 23 Feb 2026 10:29:40 +0200 Subject: [PATCH 22/25] :rocket: fail fast false --- .github/workflows/check-indexer-sync.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 404703c41..0c8cf5e47 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -35,6 +35,7 @@ jobs: check-and-switch: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: environment: [dev, staging, testnet, prod] environment: ${{ matrix.environment }} From b79027111cc422a9b72ab505741065e40ce2537b Mon Sep 17 00:00:00 2001 From: Koen Date: Mon, 23 Feb 2026 10:32:08 +0200 Subject: [PATCH 23/25] :rocket: start workflow from current branch --- .github/workflows/check-indexer-sync.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index 0c8cf5e47..cdde381b6 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -236,7 +236,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, workflow_id: 'deploy-indexer.yaml', - ref: 'main', + ref: context.ref, inputs: { environment: env, green: oldLive === 'green' ? 'true' : 'false', From c46dbe051d563124f1ccef86257efa2c474b8d90 Mon Sep 17 00:00:00 2001 From: Koen Date: Mon, 23 Feb 2026 10:34:20 +0200 Subject: [PATCH 24/25] :rocket: remove push rules --- .github/workflows/check-indexer-sync.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/check-indexer-sync.yaml b/.github/workflows/check-indexer-sync.yaml index cdde381b6..7b39e10e0 100644 --- a/.github/workflows/check-indexer-sync.yaml +++ b/.github/workflows/check-indexer-sync.yaml @@ -8,9 +8,6 @@ name: Check Indexer Sync & Switchover # 3. Triggers a deploy of the old live backend (so both end up updated) on: - push: - branches: - - km/automted-deployment schedule: - cron: '*/30 * * * *' workflow_dispatch: @@ -52,12 +49,6 @@ jobs: TARGET="${{ inputs.environment }}" CURRENT="${{ matrix.environment }}" - if [ "$EVENT" = "push" ] && [ "$CURRENT" != "dev" ]; then - echo "Skipping $CURRENT (push only runs dev)" - echo "run=false" >> $GITHUB_OUTPUT - exit 0 - fi - if [ "$EVENT" = "schedule" ] || [ -z "$TARGET" ] || [ "$TARGET" = "$CURRENT" ]; then echo "run=true" >> $GITHUB_OUTPUT else From 4a1e1bafeefca75d12e7347f19536295de0fd193 Mon Sep 17 00:00:00 2001 From: Koen Date: Mon, 23 Feb 2026 13:57:26 +0200 Subject: [PATCH 25/25] :bug: Process review --- atp-indexer/src/api/handlers/sync-status.ts | 2 +- atp-indexer/src/api/middleware/sync-guard.ts | 2 +- staking-dashboard/terraform/main.tf | 10 +++------- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/atp-indexer/src/api/handlers/sync-status.ts b/atp-indexer/src/api/handlers/sync-status.ts index 02d4b8323..b0a5cb72a 100644 --- a/atp-indexer/src/api/handlers/sync-status.ts +++ b/atp-indexer/src/api/handlers/sync-status.ts @@ -13,7 +13,7 @@ interface SyncStatusResponse { timestamp: string; } -const SYNC_THRESHOLD_BLOCKS = 50; +const SYNC_THRESHOLD_BLOCKS = 10; /** * Handle GET /api/sync-status diff --git a/atp-indexer/src/api/middleware/sync-guard.ts b/atp-indexer/src/api/middleware/sync-guard.ts index 5d6df20f9..209787c82 100644 --- a/atp-indexer/src/api/middleware/sync-guard.ts +++ b/atp-indexer/src/api/middleware/sync-guard.ts @@ -13,7 +13,7 @@ import { getIndexerProgress } from '../utils/indexer-progress'; * always pass through so the blue-green cron can still query sync status. */ -const BEHIND_THRESHOLD = 200; +const BEHIND_THRESHOLD = 50; const CHECK_INTERVAL_MS = 30_000; const INITIAL_DELAY_MS = 5_000; diff --git a/staking-dashboard/terraform/main.tf b/staking-dashboard/terraform/main.tf index 424cce64a..d163d40a7 100644 --- a/staking-dashboard/terraform/main.tf +++ b/staking-dashboard/terraform/main.tf @@ -378,13 +378,9 @@ resource "aws_cloudfront_distribution" "staking_dashboard_distribution" { # to control which indexer is primary. Ignore origin_group so Terraform # doesn't revert the switchover. Origins themselves have fixed domains # (red/green CF) and are fully managed by Terraform. - # - # MIGRATION from single indexerOrigin: temporarily comment out the - # lifecycle block, apply, then uncomment. This lets Terraform replace - # the old single origin with the red/green origins + origin group. - # lifecycle { - # ignore_changes = [origin_group] - # } + lifecycle { + ignore_changes = [origin_group] + } } #