Add input sanitization middleware and enhance health check endpoint

JavaDogWebDesign · claude · JavaDogWebDesign · commit e547296fabce · 2025-12-10T12:56:41.000-05:00
- Add InputSanitizer middleware to detect/log dangerous input patterns (XSS, injection) - Enhance /health endpoint with DB table verification, storage write checks, uptime, and version info - Returns 503 when any health check fails for better monitoring integration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/app/app.py b/app/app.py
@@ -3,9 +3,16 @@
 import os
 import secrets
 import logging
+import time
+import tempfile
 from database import init_database, get_db_connection
-from utils.config import MAX_CONTENT_LENGTH
+from utils.config import MAX_CONTENT_LENGTH, USER_DATA_BASE_FOLDER
 from utils.helpers import get_currency_symbol, get_user_currency, format_currency, format_date, get_user_date_format, DATE_FORMAT_MAP, DEFAULT_DATE_FORMAT
+from utils.sanitization import InputSanitizer
+
+# Track application start time for uptime reporting
+APP_START_TIME = time.time()
+APP_VERSION = '2.0.0'  # Update this with your versioning scheme
 
 # Configure logging
 logging.basicConfig(
@@ -86,6 +93,11 @@
     limiter = None
     app.limiter = None
 
+# Input Sanitization Middleware
+# Automatically detects and logs potentially dangerous input (XSS, injection attempts)
+sanitizer = InputSanitizer(app, logger)
+logger.info("Input sanitization middleware enabled")
+
 # Register Jinja template filters
 app.template_filter('currency')(format_currency)
 
@@ -212,24 +224,104 @@ def set_security_headers(response):
 # Health Check Endpoint - For monitoring and container orchestration
 # ============================================================================
 
-@app.route('/health')
-def health_check():
-    """Health check endpoint for monitoring"""
+def check_database_health():
+    """Check database connectivity and basic functionality."""
     try:
-        # Check database connection
         conn = get_db_connection()
         conn.execute('SELECT 1')
+        # Check if tables exist
+        tables = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='users'"
+        ).fetchone()
         conn.close()
-        return jsonify({
-            'status': 'healthy',
-            'database': 'connected'
-        }), 200
+        return {
+            'status': 'ok',
+            'tables_exist': tables is not None
+        }
+    except Exception as e:
+        return {
+            'status': 'error',
+            'error': str(e)
+        }
+
+
+def check_storage_health():
+    """Check file storage is accessible and writable."""
+    try:
+        # Check data directory exists
+        data_dir = os.path.join(os.path.dirname(__file__), USER_DATA_BASE_FOLDER)
+        if not os.path.exists(data_dir):
+            os.makedirs(data_dir, exist_ok=True)
+
+        # Test write capability
+        test_file = os.path.join(data_dir, '.health_check')
+        with open(test_file, 'w') as f:
+            f.write('health_check')
+        os.remove(test_file)
+
+        return {
+            'status': 'ok',
+            'writable': True,
+            'path': data_dir
+        }
+    except PermissionError:
+        return {
+            'status': 'error',
+            'error': 'Permission denied - cannot write to data directory'
+        }
     except Exception as e:
-        return jsonify({
-            'status': 'unhealthy',
-            'database': 'disconnected',
+        return {
+            'status': 'error',
             'error': str(e)
-        }), 503
+        }
+
+
+def get_uptime():
+    """Calculate application uptime."""
+    uptime_seconds = int(time.time() - APP_START_TIME)
+    days, remainder = divmod(uptime_seconds, 86400)
+    hours, remainder = divmod(remainder, 3600)
+    minutes, seconds = divmod(remainder, 60)
+
+    if days > 0:
+        return f"{days}d {hours}h {minutes}m"
+    elif hours > 0:
+        return f"{hours}h {minutes}m {seconds}s"
+    elif minutes > 0:
+        return f"{minutes}m {seconds}s"
+    else:
+        return f"{seconds}s"
+
+
+@app.route('/health')
+def health_check():
+    """
+    Comprehensive health check endpoint for monitoring.
+
+    Returns:
+        - 200: All systems healthy
+        - 503: One or more systems unhealthy
+    """
+    checks = {
+        'database': check_database_health(),
+        'storage': check_storage_health()
+    }
+
+    # Determine overall health
+    all_healthy = all(
+        check.get('status') == 'ok'
+        for check in checks.values()
+    )
+
+    response = {
+        'status': 'healthy' if all_healthy else 'unhealthy',
+        'version': APP_VERSION,
+        'uptime': get_uptime(),
+        'environment': os.environ.get('FLASK_ENV', 'development'),
+        'checks': checks
+    }
+
+    return jsonify(response), 200 if all_healthy else 503
 
 if __name__ == '__main__':
     # Initialize database and run migrations
diff --git a/app/utils/sanitization.py b/app/utils/sanitization.py
@@ -0,0 +1,181 @@
+"""Input sanitization utilities for XSS and injection prevention"""
+import re
+import html
+from functools import wraps
+from flask import request, g
+
+# HTML tags that should never appear in user input
+DANGEROUS_PATTERNS = [
+    re.compile(r'<script[^>]*>.*?</script>', re.IGNORECASE | re.DOTALL),
+    re.compile(r'javascript:', re.IGNORECASE),
+    re.compile(r'on\w+\s*=', re.IGNORECASE),  # onclick=, onerror=, etc.
+    re.compile(r'<iframe[^>]*>', re.IGNORECASE),
+    re.compile(r'<object[^>]*>', re.IGNORECASE),
+    re.compile(r'<embed[^>]*>', re.IGNORECASE),
+]
+
+# Fields that should NOT be sanitized (e.g., passwords, file contents)
+SKIP_SANITIZATION_FIELDS = {'password', 'password_hash', 'current_password', 'new_password', 'confirm_password'}
+
+
+def sanitize_string(value):
+    """
+    Sanitize a string value by escaping HTML entities.
+
+    Args:
+        value: String to sanitize
+
+    Returns:
+        Sanitized string with HTML entities escaped
+    """
+    if not isinstance(value, str):
+        return value
+
+    # Escape HTML entities to prevent XSS
+    sanitized = html.escape(value, quote=True)
+
+    return sanitized
+
+
+def contains_dangerous_pattern(value):
+    """
+    Check if a string contains potentially dangerous patterns.
+
+    Args:
+        value: String to check
+
+    Returns:
+        bool: True if dangerous pattern found
+    """
+    if not isinstance(value, str):
+        return False
+
+    for pattern in DANGEROUS_PATTERNS:
+        if pattern.search(value):
+            return True
+    return False
+
+
+def sanitize_dict(data, skip_fields=None):
+    """
+    Recursively sanitize all string values in a dictionary.
+
+    Args:
+        data: Dictionary to sanitize
+        skip_fields: Set of field names to skip
+
+    Returns:
+        Sanitized dictionary
+    """
+    if skip_fields is None:
+        skip_fields = SKIP_SANITIZATION_FIELDS
+
+    if isinstance(data, dict):
+        return {
+            key: sanitize_dict(value, skip_fields) if key not in skip_fields else value
+            for key, value in data.items()
+        }
+    elif isinstance(data, list):
+        return [sanitize_dict(item, skip_fields) for item in data]
+    elif isinstance(data, str):
+        return sanitize_string(data)
+    else:
+        return data
+
+
+def get_sanitized_input():
+    """
+    Get sanitized versions of request form data, args, and JSON.
+    Stores results in Flask's g object for reuse within request.
+
+    Returns:
+        dict with 'form', 'args', and 'json' keys containing sanitized data
+    """
+    if hasattr(g, '_sanitized_input'):
+        return g._sanitized_input
+
+    sanitized = {
+        'form': sanitize_dict(request.form.to_dict()) if request.form else {},
+        'args': sanitize_dict(request.args.to_dict()) if request.args else {},
+        'json': sanitize_dict(request.get_json(silent=True) or {}) if request.is_json else {}
+    }
+
+    g._sanitized_input = sanitized
+    return sanitized
+
+
+def log_dangerous_input(field_name, value, logger=None):
+    """
+    Log when dangerous input is detected (for security monitoring).
+
+    Args:
+        field_name: Name of the field containing dangerous input
+        value: The dangerous value (truncated for logging)
+        logger: Logger instance to use
+    """
+    if logger:
+        truncated = value[:100] + '...' if len(value) > 100 else value
+        logger.warning(f"Potentially dangerous input detected in field '{field_name}': {truncated}")
+
+
+class InputSanitizer:
+    """
+    Middleware class for input sanitization.
+    Can be configured with custom settings.
+    """
+
+    def __init__(self, app=None, logger=None):
+        self.app = app
+        self.logger = logger
+        self.enabled = True
+
+        if app is not None:
+            self.init_app(app)
+
+    def init_app(self, app):
+        """Initialize the sanitizer with a Flask app."""
+        self.app = app
+
+        @app.before_request
+        def sanitize_request_input():
+            """Pre-process and flag dangerous input before route handlers."""
+            if not self.enabled:
+                return
+
+            # Check form data
+            if request.form:
+                for key, value in request.form.items():
+                    if key not in SKIP_SANITIZATION_FIELDS and contains_dangerous_pattern(value):
+                        log_dangerous_input(key, value, self.logger)
+                        g._has_dangerous_input = True
+
+            # Check query args
+            if request.args:
+                for key, value in request.args.items():
+                    if contains_dangerous_pattern(value):
+                        log_dangerous_input(key, value, self.logger)
+                        g._has_dangerous_input = True
+
+            # Check JSON body
+            if request.is_json:
+                json_data = request.get_json(silent=True)
+                if json_data:
+                    self._check_json_recursive(json_data)
+
+            # Pre-compute sanitized input for use in routes
+            get_sanitized_input()
+
+    def _check_json_recursive(self, data, path=''):
+        """Recursively check JSON data for dangerous patterns."""
+        if isinstance(data, dict):
+            for key, value in data.items():
+                current_path = f"{path}.{key}" if path else key
+                if key not in SKIP_SANITIZATION_FIELDS:
+                    self._check_json_recursive(value, current_path)
+        elif isinstance(data, list):
+            for i, item in enumerate(data):
+                self._check_json_recursive(item, f"{path}[{i}]")
+        elif isinstance(data, str):
+            if contains_dangerous_pattern(data):
+                log_dangerous_input(path, data, self.logger)
+                g._has_dangerous_input = True