diff --git a/evalbench/generators/models/claude_code.py b/evalbench/generators/models/claude_code.py index 4be102c2..5d9a7cbb 100644 --- a/evalbench/generators/models/claude_code.py +++ b/evalbench/generators/models/claude_code.py @@ -3,6 +3,7 @@ import os import json import logging +import shlex import sys @@ -39,6 +40,10 @@ def __init__(self, querygenerator_config): os.makedirs(self.fake_home, exist_ok=True) os.makedirs(self.claude_config_dir, exist_ok=True) + # When running as root, chown fake_home so the non-root claudeuser + # (used to run Claude Code) can write to it. + self._chown_for_claudeuser = os.getuid() == 0 + self.env = querygenerator_config.get("env", {}) self.env["HOME"] = self.fake_home @@ -274,6 +279,30 @@ def _run_claude_code(self, cli_cmd: CLICommand): for tool in allowed_tools: command.extend(["--allowedTools", tool]) + # Claude Code refuses --dangerously-skip-permissions when running as + # root. Wrap with `su` to drop privileges to a non-root user. + # Recursively chown the fake_home so claudeuser can write to it + # (covers .claude dir, gcloud creds, MCP config copied during init). + if self._chown_for_claudeuser: + subprocess.run( + ["chown", "-R", "claudeuser", self.fake_home], + check=False, + ) + # Build env var exports for critical authentication/config vars. + # su doesn't inherit env by default, so we explicitly export them. + env_exports = [] + for key in [ + "HOME", "PATH", "GOOGLE_APPLICATION_CREDENTIALS", + "ANTHROPIC_VERTEX_PROJECT_ID", "ANTHROPIC_API_KEY", + "CLOUD_ML_REGION", "GOOGLE_CLOUD_PROJECT" + ]: + if key in env and env[key]: + env_exports.append(f"export {key}={shlex.quote(env[key])}") + + cli_cmd = " ".join(shlex.quote(c) for c in command) + full_cmd = "; ".join(env_exports + [cli_cmd]) if env_exports else cli_cmd + command = ["su", "-s", "/bin/bash", "claudeuser", "-c", full_cmd] + logging.info(f"Running Claude Code CLI: {' '.join(command)}") result = self._execute_cli_command(command, env=env) diff --git a/evalbench_service/Dockerfile b/evalbench_service/Dockerfile index 87aa0f48..2a22ba5e 100644 --- a/evalbench_service/Dockerfile +++ b/evalbench_service/Dockerfile @@ -26,6 +26,10 @@ RUN uv pip install --system --break-system-packages -r requirements.txt RUN uv pip install --system --break-system-packages ./viewer RUN uv pip install --system --break-system-packages . +# Create a non-root user for Claude Code. It refuses +# --dangerously-skip-permissions when running as root. +RUN useradd -m -s /bin/bash claudeuser + RUN ln -s /usr/bin/python3 /usr/bin/python RUN make proto -f ./Makefile RUN mkdir /tmp_session_files /tmp_sessions