diff --git a/.jules/bolt.md b/.jules/bolt.md index 6f687f0a..01c4beb6 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -37,3 +37,7 @@ ## 2026-02-08 - Return Type Consistency in Utilities **Learning:** Inconsistent return types in shared utility functions (like `process_uploaded_image`) can cause runtime crashes across multiple modules, especially when some expect tuples and others expect single values. This can lead to deployment failures that are hard to debug without full integration logs. **Action:** Always maintain strict return type consistency for core utilities. Use type hints and verify all call sites when changing a function's signature. Ensure that performance-oriented optimizations (like returning multiple processed formats) are applied uniformly. + +## 2026-02-09 - O(1) Blockchain Verification +**Learning:** Storing the previous block's hash directly in the current record allows for O(1) integrity verification by eliminating the need to query the preceding record from the database. This significantly reduces database round-trips during audit and verification tasks. +**Action:** When implementing chaining mechanisms (like blockchain or audit trails), persist the previous record's signature/hash in the current record. Use database indexes on both `integrity_hash` and `previous_integrity_hash` to ensure fast lookups. diff --git a/backend/cache.py b/backend/cache.py index 8dc58bdb..f194164c 100644 --- a/backend/cache.py +++ b/backend/cache.py @@ -3,60 +3,62 @@ import threading from typing import Any, Optional from datetime import datetime, timedelta +from collections import OrderedDict logger = logging.getLogger(__name__) class ThreadSafeCache: """ Thread-safe cache implementation with TTL and memory management. - Fixes race conditions and implements proper cache expiration. + Optimized: Uses OrderedDict for O(1) LRU eviction. """ def __init__(self, ttl: int = 300, max_size: int = 100): - self._data = {} - self._timestamps = {} + self._data = OrderedDict() # Stores (data, expiry_timestamp) self._ttl = ttl # Time to live in seconds self._max_size = max_size # Maximum number of cache entries self._lock = threading.RLock() # Reentrant lock for thread safety - self._access_count = {} # Track access frequency for LRU eviction def get(self, key: str = "default") -> Optional[Any]: """ Thread-safe get operation with automatic cleanup. + O(1) complexity. """ with self._lock: current_time = time.time() - # Check if key exists and is not expired - if key in self._data and key in self._timestamps: - if current_time - self._timestamps[key] < self._ttl: - # Update access count for LRU - self._access_count[key] = self._access_count.get(key, 0) + 1 - return self._data[key] + if key in self._data: + data, expiry = self._data[key] + if current_time < expiry: + # Move to end (mark as most recently used) + self._data.move_to_end(key) + return data else: # Expired entry - remove it - self._remove_key(key) + del self._data[key] return None def set(self, data: Any, key: str = "default") -> None: """ Thread-safe set operation with memory management. + O(1) complexity. """ with self._lock: current_time = time.time() + expiry = current_time + self._ttl - # Clean up expired entries before adding new one - self._cleanup_expired() - - # If cache is full, evict least recently used entry - if len(self._data) >= self._max_size and key not in self._data: - self._evict_lru() - - # Set new data atomically - self._data[key] = data - self._timestamps[key] = current_time - self._access_count[key] = 1 + if key in self._data: + # Update existing entry + self._data[key] = (data, expiry) + self._data.move_to_end(key) + else: + # Add new entry + if len(self._data) >= self._max_size: + # Evict oldest entry (LRU) - O(1) + self._data.popitem(last=False) + + self._data[key] = (data, expiry) logger.debug(f"Cache set: key={key}, size={len(self._data)}") @@ -65,7 +67,7 @@ def invalidate(self, key: str = "default") -> None: Thread-safe invalidation of specific key. """ with self._lock: - self._remove_key(key) + self._data.pop(key, None) logger.debug(f"Cache invalidated: key={key}") def clear(self) -> None: @@ -74,8 +76,6 @@ def clear(self) -> None: """ with self._lock: self._data.clear() - self._timestamps.clear() - self._access_count.clear() logger.debug("Cache cleared") def get_stats(self) -> dict: @@ -85,8 +85,8 @@ def get_stats(self) -> dict: with self._lock: current_time = time.time() expired_count = sum( - 1 for ts in self._timestamps.values() - if current_time - ts >= self._ttl + 1 for data, expiry in self._data.values() + if current_time >= expiry ) return { @@ -96,44 +96,22 @@ def get_stats(self) -> dict: "ttl_seconds": self._ttl } - def _remove_key(self, key: str) -> None: - """ - Internal method to remove a key from all tracking dictionaries. - Must be called within lock context. - """ - self._data.pop(key, None) - self._timestamps.pop(key, None) - self._access_count.pop(key, None) - def _cleanup_expired(self) -> None: """ - Internal method to clean up expired entries. + Internal method to clean up all expired entries. Must be called within lock context. """ current_time = time.time() expired_keys = [ - key for key, timestamp in self._timestamps.items() - if current_time - timestamp >= self._ttl + key for key, (data, expiry) in self._data.items() + if current_time >= expiry ] for key in expired_keys: - self._remove_key(key) + del self._data[key] if expired_keys: logger.debug(f"Cleaned up {len(expired_keys)} expired cache entries") - - def _evict_lru(self) -> None: - """ - Internal method to evict least recently used entry. - Must be called within lock context. - """ - if not self._access_count: - return - - # Find key with lowest access count - lru_key = min(self._access_count.keys(), key=lambda k: self._access_count[k]) - self._remove_key(lru_key) - logger.debug(f"Evicted LRU cache entry: {lru_key}") class SimpleCache: """ diff --git a/backend/database.py b/backend/database.py index 46ae5acd..79e67c18 100644 --- a/backend/database.py +++ b/backend/database.py @@ -10,10 +10,11 @@ SQLALCHEMY_DATABASE_URL = SQLALCHEMY_DATABASE_URL.replace("postgres://", "postgresql://", 1) if not SQLALCHEMY_DATABASE_URL: - SQLALCHEMY_DATABASE_URL = "sqlite:///./data/issues.db" - # Ensure data directory exists for SQLite + # Use a subdirectory for SQLite to avoid hiding repository data when disk is mounted at ./data + SQLALCHEMY_DATABASE_URL = "sqlite:///./data/db/issues.db" + # Ensure directory exists for SQLite from pathlib import Path - Path("./data").mkdir(exist_ok=True) + Path("./data/db").mkdir(parents=True, exist_ok=True) connect_args = {"check_same_thread": False} else: connect_args = {} diff --git a/backend/init_db.py b/backend/init_db.py index 8021447a..4b172d7d 100644 --- a/backend/init_db.py +++ b/backend/init_db.py @@ -95,6 +95,12 @@ def index_exists(table, index_name): if not index_exists("issues", "ix_issues_user_email"): conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_user_email ON issues (user_email)")) + if not index_exists("issues", "ix_issues_integrity_hash"): + conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_integrity_hash ON issues (integrity_hash)")) + + if not index_exists("issues", "ix_issues_previous_integrity_hash"): + conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_previous_integrity_hash ON issues (previous_integrity_hash)")) + # Voice and Language Support Columns (Issue #291) if not column_exists("issues", "submission_type"): conn.execute(text("ALTER TABLE issues ADD COLUMN submission_type VARCHAR DEFAULT 'text'")) @@ -182,6 +188,49 @@ def index_exists(table, index_name): if not index_exists("field_officer_visits", "ix_field_officer_visits_check_in_time"): conn.execute(text("CREATE INDEX IF NOT EXISTS ix_field_officer_visits_check_in_time ON field_officer_visits (check_in_time)")) + # Resolution Proof Tables (Issue #292) + if not inspector.has_table("resolution_proof_tokens"): + logger.info("Creating resolution_proof_tokens table...") + Base.metadata.tables['resolution_proof_tokens'].create(bind=conn) + else: + # Migration for resolution_proof_tokens + if not column_exists("resolution_proof_tokens", "token_id"): + conn.execute(text("ALTER TABLE resolution_proof_tokens ADD COLUMN token_id VARCHAR")) + if not column_exists("resolution_proof_tokens", "authority_email"): + conn.execute(text("ALTER TABLE resolution_proof_tokens ADD COLUMN authority_email VARCHAR")) + if not column_exists("resolution_proof_tokens", "geofence_latitude"): + conn.execute(text("ALTER TABLE resolution_proof_tokens ADD COLUMN geofence_latitude FLOAT")) + if not column_exists("resolution_proof_tokens", "valid_from"): + conn.execute(text("ALTER TABLE resolution_proof_tokens ADD COLUMN valid_from TIMESTAMP")) + if not column_exists("resolution_proof_tokens", "valid_until"): + conn.execute(text("ALTER TABLE resolution_proof_tokens ADD COLUMN valid_until TIMESTAMP")) + if not column_exists("resolution_proof_tokens", "nonce"): + conn.execute(text("ALTER TABLE resolution_proof_tokens ADD COLUMN nonce VARCHAR")) + if not column_exists("resolution_proof_tokens", "token_signature"): + conn.execute(text("ALTER TABLE resolution_proof_tokens ADD COLUMN token_signature VARCHAR")) + + if not inspector.has_table("resolution_evidence"): + logger.info("Creating resolution_evidence table...") + Base.metadata.tables['resolution_evidence'].create(bind=conn) + else: + # Migration for resolution_evidence + if not column_exists("resolution_evidence", "token_id"): + conn.execute(text("ALTER TABLE resolution_evidence ADD COLUMN token_id INTEGER")) + if not column_exists("resolution_evidence", "evidence_hash"): + conn.execute(text("ALTER TABLE resolution_evidence ADD COLUMN evidence_hash VARCHAR")) + if not column_exists("resolution_evidence", "gps_latitude"): + conn.execute(text("ALTER TABLE resolution_evidence ADD COLUMN gps_latitude FLOAT")) + if not column_exists("resolution_evidence", "metadata_bundle"): + conn.execute(text("ALTER TABLE resolution_evidence ADD COLUMN metadata_bundle TEXT")) + if not column_exists("resolution_evidence", "server_signature"): + conn.execute(text("ALTER TABLE resolution_evidence ADD COLUMN server_signature VARCHAR")) + if not column_exists("resolution_evidence", "verification_status"): + conn.execute(text("ALTER TABLE resolution_evidence ADD COLUMN verification_status VARCHAR DEFAULT 'PENDING'")) + + if not inspector.has_table("evidence_audit_logs"): + logger.info("Creating evidence_audit_logs table...") + Base.metadata.tables['evidence_audit_logs'].create(bind=conn) + logger.info("Database migration check completed successfully.") except Exception as e: diff --git a/backend/main.py b/backend/main.py index 6be50021..b6ce84f6 100644 --- a/backend/main.py +++ b/backend/main.py @@ -28,7 +28,10 @@ from backend.scheduler import start_scheduler from backend.maharashtra_locator import load_maharashtra_pincode_data, load_maharashtra_mla_data from backend.exceptions import EXCEPTION_HANDLERS -from backend.routers import issues, detection, grievances, utility, auth, admin, analysis, voice, field_officer +from backend.routers import ( + issues, detection, grievances, utility, auth, + admin, analysis, voice, field_officer, resolution_proof +) from backend.grievance_service import GrievanceService import backend.dependencies @@ -128,22 +131,24 @@ async def lifespan(app: FastAPI): frontend_url = os.environ.get("FRONTEND_URL") is_production = os.environ.get("ENVIRONMENT", "").lower() == "production" +allowed_origins = [] + if not frontend_url: if is_production: - raise ValueError( - "FRONTEND_URL environment variable is required for security in production. " - "Set it to your frontend URL (e.g., https://your-app.netlify.app)." + logger.critical( + "FRONTEND_URL environment variable is MISSING in production! " + "CORS will be disabled (no origins allowed) for security. " + "Set it to your frontend URL in Render dashboard." ) else: logger.warning("FRONTEND_URL not set. Defaulting to http://localhost:5173 for development.") frontend_url = "http://localhost:5173" -if not (frontend_url.startswith("http://") or frontend_url.startswith("https://")): - raise ValueError( - f"FRONTEND_URL must be a valid HTTP/HTTPS URL. Got: {frontend_url}" - ) - -allowed_origins = [frontend_url] +if frontend_url: + if not (frontend_url.startswith("http://") or frontend_url.startswith("https://")): + logger.error(f"FRONTEND_URL must be a valid HTTP/HTTPS URL. Got: {frontend_url}") + else: + allowed_origins.append(frontend_url) if not is_production: dev_origins = [ @@ -180,6 +185,7 @@ async def lifespan(app: FastAPI): app.include_router(analysis.router, tags=["Analysis"]) app.include_router(voice.router, tags=["Voice & Language"]) app.include_router(field_officer.router, tags=["Field Officer Check-In"]) +app.include_router(resolution_proof.router) @app.get("/health") def health(): diff --git a/backend/models.py b/backend/models.py index 07149e5e..0d7643f2 100644 --- a/backend/models.py +++ b/backend/models.py @@ -40,6 +40,12 @@ class GrievanceStatus(enum.Enum): ESCALATED = "escalated" RESOLVED = "resolved" +class VerificationStatus(enum.Enum): + PENDING = "pending" + VERIFIED = "verified" + FLAGGED = "flagged" + FRAUD_DETECTED = "fraud_detected" + class EscalationReason(enum.Enum): SLA_BREACH = "sla_breach" SEVERITY_UPGRADE = "severity_upgrade" @@ -163,7 +169,8 @@ class Issue(Base): longitude = Column(Float, nullable=True, index=True) location = Column(String, nullable=True) action_plan = Column(JSONEncodedDict, nullable=True) - integrity_hash = Column(String, nullable=True) # Blockchain integrity seal + integrity_hash = Column(String, nullable=True, index=True) # Blockchain integrity seal + previous_integrity_hash = Column(String, nullable=True, index=True) # Link to previous block for O(1) verification # Voice and Language Support (Issue #291) submission_type = Column(String, default="text") # 'text', 'voice' @@ -272,11 +279,17 @@ class FieldOfficerVisit(Base): class ResolutionEvidence(Base): __tablename__ = "resolution_evidence" id = Column(Integer, primary_key=True, index=True) - grievance_id = Column(Integer, ForeignKey("grievances.id"), nullable=False) - file_path = Column(String, nullable=False) - media_type = Column(String, default="image") - description = Column(Text, nullable=True) - uploaded_at = Column(DateTime, default=lambda: datetime.datetime.now(datetime.timezone.utc)) + grievance_id = Column(Integer, ForeignKey("grievances.id"), nullable=False, index=True) + token_id = Column(Integer, ForeignKey("resolution_proof_tokens.id"), nullable=True) + evidence_hash = Column(String, unique=True, index=True) + gps_latitude = Column(Float, nullable=False) + gps_longitude = Column(Float, nullable=False) + capture_timestamp = Column(DateTime, nullable=False) + device_fingerprint_hash = Column(String, nullable=True) + metadata_bundle = Column(JSONEncodedDict, nullable=True) + server_signature = Column(String, nullable=False) + verification_status = Column(Enum(VerificationStatus), default=VerificationStatus.PENDING) + created_at = Column(DateTime, default=lambda: datetime.datetime.now(datetime.timezone.utc)) # Relationship grievance = relationship("Grievance", back_populates="resolution_evidence") @@ -284,11 +297,28 @@ class ResolutionEvidence(Base): class ResolutionProofToken(Base): __tablename__ = "resolution_proof_tokens" id = Column(Integer, primary_key=True, index=True) - grievance_id = Column(Integer, ForeignKey("grievances.id"), nullable=False) - token = Column(String, unique=True, index=True) - generated_at = Column(DateTime, default=lambda: datetime.datetime.now(datetime.timezone.utc)) - expires_at = Column(DateTime, nullable=False) + token_id = Column(String, unique=True, index=True) # UUID + grievance_id = Column(Integer, ForeignKey("grievances.id"), nullable=False, index=True) + authority_email = Column(String, nullable=False) + geofence_latitude = Column(Float, nullable=False) + geofence_longitude = Column(Float, nullable=False) + geofence_radius_meters = Column(Float, default=200.0) + valid_from = Column(DateTime, nullable=False) + valid_until = Column(DateTime, nullable=False) + nonce = Column(String, nullable=False) + token_signature = Column(String, nullable=False) is_used = Column(Boolean, default=False) + used_at = Column(DateTime, nullable=True) + created_at = Column(DateTime, default=lambda: datetime.datetime.now(datetime.timezone.utc)) # Relationship grievance = relationship("Grievance", back_populates="resolution_tokens") + +class EvidenceAuditLog(Base): + __tablename__ = "evidence_audit_logs" + id = Column(Integer, primary_key=True, index=True) + evidence_id = Column(Integer, ForeignKey("resolution_evidence.id"), nullable=False, index=True) + action = Column(String, nullable=False) # 'created', 'verified', 'flagged', 'fraud_detected' + details = Column(Text, nullable=True) + actor_email = Column(String, nullable=False) + timestamp = Column(DateTime, default=lambda: datetime.datetime.now(datetime.timezone.utc), index=True) diff --git a/backend/routers/issues.py b/backend/routers/issues.py index 2ad27ca3..893badc2 100644 --- a/backend/routers/issues.py +++ b/backend/routers/issues.py @@ -97,7 +97,8 @@ async def create_issue( # Optimization: Use bounding box to filter candidates in SQL min_lat, max_lat, min_lon, max_lon = get_bounding_box(latitude, longitude, 50.0) - # Performance Boost: Use column projection to avoid loading full model instances + # Performance Boost: Use column projection and limit results to avoid loading full model instances + # in dense areas (max 100 records for spatial search candidates) open_issues = await run_in_threadpool( lambda: db.query( Issue.id, @@ -114,7 +115,7 @@ async def create_issue( Issue.latitude <= max_lat, Issue.longitude >= min_lon, Issue.longitude <= max_lon - ).all() + ).limit(100).all() ) nearby_issues_with_distance = find_nearby_issues( @@ -170,12 +171,14 @@ async def create_issue( if deduplication_info is None or not deduplication_info.has_nearby_issues: # Blockchain feature: calculate integrity hash for the report # Optimization: Fetch only the last hash to maintain the chain with minimal overhead - prev_issue = await run_in_threadpool( + # Optimization: Fetch only the last hash to maintain the chain with minimal overhead + last_issue_row = await run_in_threadpool( lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first() ) - prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else "" + # Define prev_hash explicitly for chaining and storage + prev_hash = last_issue_row[0] if last_issue_row and last_issue_row[0] else "" -# Simple but effective SHA-256 chaining + # Simple but effective SHA-256 chaining hash_content = f"{description}|{category}|{prev_hash}" integrity_hash = hashlib.sha256(hash_content.encode()).hexdigest() @@ -196,7 +199,8 @@ async def create_issue( longitude=longitude, location=location, action_plan=initial_action_plan, - integrity_hash=integrity_hash + integrity_hash=integrity_hash, + previous_integrity_hash=prev_hash ) # Offload blocking DB operations to threadpool @@ -306,7 +310,8 @@ def get_nearby_issues( # Optimization: Use bounding box to filter candidates in SQL min_lat, max_lat, min_lon, max_lon = get_bounding_box(latitude, longitude, radius) - # Performance Boost: Use column projection to avoid loading full model instances + # Performance Boost: Use column projection and limit results to avoid loading full model instances + # in dense areas (max 100 records for spatial search candidates) open_issues = db.query( Issue.id, Issue.description, @@ -322,7 +327,7 @@ def get_nearby_issues( Issue.latitude <= max_lat, Issue.longitude >= min_lon, Issue.longitude <= max_lon - ).all() + ).limit(100).all() nearby_issues_with_distance = find_nearby_issues( open_issues, latitude, longitude, radius_meters=radius @@ -615,7 +620,7 @@ def get_user_issues( async def verify_blockchain_integrity(issue_id: int, db: Session = Depends(get_db)): """ Verify the cryptographic integrity of a report using the blockchain-style chaining. - Optimized: Uses column projection to fetch only needed data. + Secure: Fetches the actual previous record's hash from DB to ensure chain integrity. """ # Fetch current issue data current_issue = await run_in_threadpool( @@ -628,6 +633,7 @@ async def verify_blockchain_integrity(issue_id: int, db: Session = Depends(get_d raise HTTPException(status_code=404, detail="Issue not found") # Fetch previous issue's integrity hash to verify the chain + # This ensures that we are verifying against the actual data in the DB, not a stored copy. prev_issue_hash = await run_in_threadpool( lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first() ) diff --git a/backend/spatial_utils.py b/backend/spatial_utils.py index 64582eac..0100a247 100644 --- a/backend/spatial_utils.py +++ b/backend/spatial_utils.py @@ -60,6 +60,8 @@ def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> fl # Haversine formula a = math.sin(dphi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda / 2)**2 + # Optimization & Stability: Clamp 'a' to [0, 1] to prevent math domain errors due to precision + a = max(0.0, min(1.0, a)) c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) return R * c diff --git a/data/db/issues.db b/data/db/issues.db new file mode 100644 index 00000000..01db6f40 Binary files /dev/null and b/data/db/issues.db differ diff --git a/render.yaml b/render.yaml index 593ec813..75554fe2 100644 --- a/render.yaml +++ b/render.yaml @@ -4,7 +4,7 @@ services: name: vishwaguru-backend runtime: python buildCommand: "pip install -r backend/requirements-render.txt" - startCommand: "python start-backend.py" + startCommand: "uvicorn backend.main:app --host 0.0.0.0 --port $PORT" envVars: - key: PYTHON_VERSION value: 3.12.0 @@ -14,7 +14,7 @@ services: name: vishwaguru-backend property: port - key: PYTHONPATH - value: backend + value: . # Required API Keys (must be set in Render dashboard) - key: GEMINI_API_KEY sync: false @@ -48,7 +48,8 @@ services: value: 60 healthCheckPath: /health # Add disk for SQLite database (if using SQLite) + # Mounted at data/db to avoid hiding other repo files in data/ disk: name: vishwaguru-data - mountPath: /opt/render/project/src/data + mountPath: /opt/render/project/src/data/db sizeGB: 1