From a310d4b3cf6b48b7571d5c9b52d5a38dc1b548b1 Mon Sep 17 00:00:00 2001 From: David Blanc Brioir Date: Sun, 14 Dec 2025 00:45:40 +0100 Subject: [PATCH] Initial commit: Linear-integrated autonomous coding agent with Initializer Bis support --- .claude/settings.local.json | 9 + .gitignore | 9 + LICENSE | 21 + README.md | 280 ++++++++++ agent.py | 231 ++++++++ autonomous_agent_demo.py | 138 +++++ client.py | 169 ++++++ linear_config.py | 38 ++ progress.py | 86 +++ prompts.py | 63 +++ prompts/app_spec.txt | 681 +++++++++++++++++++++++ prompts/app_spec_mistral_extensible.txt | 448 +++++++++++++++ prompts/app_spec_theme_customization.txt | 403 ++++++++++++++ prompts/coding_prompt.md | 304 ++++++++++ prompts/initializer_bis_prompt.md | 187 +++++++ prompts/initializer_prompt.md | 202 +++++++ requirements.txt | 1 + security.py | 359 ++++++++++++ test_security.py | 290 ++++++++++ 19 files changed, 3919 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 agent.py create mode 100644 autonomous_agent_demo.py create mode 100644 client.py create mode 100644 linear_config.py create mode 100644 progress.py create mode 100644 prompts.py create mode 100644 prompts/app_spec.txt create mode 100644 prompts/app_spec_mistral_extensible.txt create mode 100644 prompts/app_spec_theme_customization.txt create mode 100644 prompts/coding_prompt.md create mode 100644 prompts/initializer_bis_prompt.md create mode 100644 prompts/initializer_prompt.md create mode 100644 requirements.txt create mode 100644 security.py create mode 100644 test_security.py diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..c85f664 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(test:*)", + "Bash(cat:*)", + "Bash(netstat:*)" + ] + } +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cc88114 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# Agent-generated output directories +generations/ + +# Log files +logs/ + +.env +venv +__pycache__ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..44b90e6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Cole Medin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..21b7326 --- /dev/null +++ b/README.md @@ -0,0 +1,280 @@ +# Autonomous Coding Agent Demo (Linear-Integrated) + +A minimal harness demonstrating long-running autonomous coding with the Claude Agent SDK. This demo implements a two-agent pattern (initializer + coding agent) with **Linear as the core project management system** for tracking all work. + +## Key Features + +- **Linear Integration**: All work is tracked as Linear issues, not local files +- **Real-time Visibility**: Watch agent progress directly in your Linear workspace +- **Session Handoff**: Agents communicate via Linear comments, not text files +- **Two-Agent Pattern**: Initializer creates Linear project & issues, coding agents implement them +- **Initializer Bis**: Add new features to existing projects without re-initializing +- **Browser Testing**: Puppeteer MCP for UI verification +- **Claude Opus 4.5**: Uses Claude's most capable model by default + +## Prerequisites + +### 1. Install Claude Code CLI and Python SDK + +```bash +# Install Claude Code CLI (latest version required) +npm install -g @anthropic-ai/claude-code + +# Install Python dependencies +pip install -r requirements.txt +``` + +### 2. Set Up Authentication + +You need two authentication tokens: + +**Claude Code OAuth Token:** +```bash +# Generate the token using Claude Code CLI +claude setup-token + +# Set the environment variable +export CLAUDE_CODE_OAUTH_TOKEN='your-oauth-token-here' +``` + +**Linear API Key:** +```bash +# Get your API key from: https://linear.app/YOUR-TEAM/settings/api +export LINEAR_API_KEY='lin_api_xxxxxxxxxxxxx' +``` + +### 3. Verify Installation + +```bash +claude --version # Should be latest version +pip show claude-code-sdk # Check SDK is installed +``` + +## Quick Start + +```bash +# Initialize a new project +python autonomous_agent_demo.py --project-dir ./my_project + +# Add new features to an existing project +python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_theme_customization.txt +``` + +For testing with limited iterations: +```bash +python autonomous_agent_demo.py --project-dir ./my_project --max-iterations 3 +``` + +## How It Works + +### Linear-Centric Workflow + +``` +┌─────────────────────────────────────────────────────────────┐ +│ LINEAR-INTEGRATED WORKFLOW │ +├─────────────────────────────────────────────────────────────┤ +│ app_spec.txt ──► Initializer Agent ──► Linear Issues (50) │ +│ │ │ +│ ┌─────────────────────────▼──────────┐ │ +│ │ LINEAR WORKSPACE │ │ +│ │ ┌────────────────────────────┐ │ │ +│ │ │ Issue: Auth - Login flow │ │ │ +│ │ │ Status: Todo → In Progress │ │ │ +│ │ │ Comments: [session notes] │ │ │ +│ │ └────────────────────────────┘ │ │ +│ └────────────────────────────────────┘ │ +│ │ │ +│ Coding Agent queries Linear │ +│ ├── Search for Todo issues │ +│ ├── Update status to In Progress │ +│ ├── Implement & test with Puppeteer │ +│ ├── Add comment with implementation notes│ +│ └── Update status to Done │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Two-Agent Pattern + +1. **Initializer Agent (Session 1):** + - Reads `app_spec.txt` + - Lists teams and creates a new Linear project + - Creates 50 Linear issues with detailed test steps + - Creates a META issue for session tracking + - Sets up project structure, `init.sh`, and git + +2. **Coding Agent (Sessions 2+):** + - Queries Linear for highest-priority Todo issue + - Runs verification tests on previously completed features + - Claims issue (status → In Progress) + - Implements the feature + - Tests via Puppeteer browser automation + - Adds implementation comment to issue + - Marks complete (status → Done) + - Updates META issue with session summary + +### Initializer Bis: Adding New Features + +The **Initializer Bis** agent allows you to add new features to an existing project without re-initializing it. This is useful when you want to extend your application with additional functionality. + +**How it works:** +1. Create a new specification file (e.g., `app_spec_theme_customization.txt`) in the `prompts/` directory +2. Run the agent with `--new-spec` flag pointing to your new spec file +3. The Initializer Bis agent will: + - Read the existing project state from `.linear_project.json` + - Read the new specification file + - Create new Linear issues for each `` tag in the spec + - Add these issues to the existing Linear project + - Update the META issue with information about the new features + - Copy the new spec file to the project directory + +**Example:** +```bash +# Add theme customization features to an existing project +python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_theme_customization.txt +``` + +This will create multiple Linear issues (one per `` tag) that will be worked on by subsequent coding agent sessions. + +### Session Handoff via Linear + +Instead of local text files, agents communicate through: +- **Issue Comments**: Implementation details, blockers, context +- **META Issue**: Session summaries and handoff notes +- **Issue Status**: Todo / In Progress / Done workflow + +## Environment Variables + +| Variable | Description | Required | +|----------|-------------|----------| +| `CLAUDE_CODE_OAUTH_TOKEN` | Claude Code OAuth token (from `claude setup-token`) | Yes | +| `LINEAR_API_KEY` | Linear API key for MCP access | Yes | + +## Command Line Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--project-dir` | Directory for the project | `./autonomous_demo_project` | +| `--max-iterations` | Max agent iterations | Unlimited | +| `--model` | Claude model to use | `claude-opus-4-5-20251101` | +| `--new-spec` | Name of new specification file to add (e.g., 'app_spec_new1.txt'). Use this to add new features to an existing project. | None | + +## Project Structure + +``` +linear-agent-harness/ +├── autonomous_agent_demo.py # Main entry point +├── agent.py # Agent session logic +├── client.py # Claude SDK + MCP client configuration +├── security.py # Bash command allowlist and validation +├── progress.py # Progress tracking utilities +├── prompts.py # Prompt loading utilities +├── linear_config.py # Linear configuration constants +├── prompts/ +│ ├── app_spec.txt # Application specification +│ ├── app_spec_theme_customization.txt # Example: Theme customization spec +│ ├── app_spec_mistral_extensible.txt # Example: Mistral provider spec +│ ├── initializer_prompt.md # First session prompt (creates Linear issues) +│ ├── initializer_bis_prompt.md # Prompt for adding new features +│ └── coding_prompt.md # Continuation session prompt (works issues) +└── requirements.txt # Python dependencies +``` + +## Generated Project Structure + +After running, your project directory will contain: + +``` +my_project/ +├── .linear_project.json # Linear project state (marker file) +├── app_spec.txt # Copied specification +├── app_spec_theme_customization.txt # New spec file (if using --new-spec) +├── init.sh # Environment setup script +├── .claude_settings.json # Security settings +└── [application files] # Generated application code +``` + +## MCP Servers Used + +| Server | Transport | Purpose | +|--------|-----------|---------| +| **Linear** | HTTP (Streamable HTTP) | Project management - issues, status, comments | +| **Puppeteer** | stdio | Browser automation for UI testing | + +## Security Model + +This demo uses defense-in-depth security (see `security.py` and `client.py`): + +1. **OS-level Sandbox:** Bash commands run in an isolated environment +2. **Filesystem Restrictions:** File operations restricted to project directory +3. **Bash Allowlist:** Only specific commands permitted (npm, node, git, etc.) +4. **MCP Permissions:** Tools explicitly allowed in security settings + +## Linear Setup + +Before running, ensure you have: + +1. A Linear workspace with at least one team +2. An API key with read/write permissions (from Settings > API) +3. The agent will automatically detect your team and create a project + +The initializer agent will create: +- A new Linear project named after your app +- 50 feature issues based on `app_spec.txt` +- 1 META issue for session tracking and handoff + +All subsequent coding agents will work from this Linear project. + +## Customization + +### Changing the Application + +Edit `prompts/app_spec.txt` to specify a different application to build. + +### Adding New Features to Existing Projects + +1. Create a new specification file in `prompts/` directory (e.g., `app_spec_new_feature.txt`) +2. Format it with `` tags following the same structure as `app_spec.txt` +3. Run with `--new-spec` flag: + ```bash + python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_new_feature.txt + ``` +4. The Initializer Bis agent will create new Linear issues for each feature in the spec file + +### Adjusting Issue Count + +Edit `prompts/initializer_prompt.md` and change "50 issues" to your desired count. + +### Modifying Allowed Commands + +Edit `security.py` to add or remove commands from `ALLOWED_COMMANDS`. + +## Troubleshooting + +**"CLAUDE_CODE_OAUTH_TOKEN not set"** +Run `claude setup-token` to generate a token, then export it. + +**"LINEAR_API_KEY not set"** +Get your API key from `https://linear.app/YOUR-TEAM/settings/api` + +**"Appears to hang on first run"** +Normal behavior. The initializer is creating a Linear project and 50 issues with detailed descriptions. Watch for `[Tool: mcp__linear__create_issue]` output. + +**"Command blocked by security hook"** +The agent tried to run a disallowed command. Add it to `ALLOWED_COMMANDS` in `security.py` if needed. + +**"MCP server connection failed"** +Verify your `LINEAR_API_KEY` is valid and has appropriate permissions. The Linear MCP server uses HTTP transport at `https://mcp.linear.app/mcp`. + +## Viewing Progress + +Open your Linear workspace to see: +- The project created by the initializer agent +- All 50 issues organized under the project +- Real-time status changes (Todo → In Progress → Done) +- Implementation comments on each issue +- Session summaries on the META issue +- New issues added by Initializer Bis when using `--new-spec` + +## License + +MIT License - see [LICENSE](LICENSE) for details. diff --git a/agent.py b/agent.py new file mode 100644 index 0000000..76a6c08 --- /dev/null +++ b/agent.py @@ -0,0 +1,231 @@ +""" +Agent Session Logic +=================== + +Core agent interaction functions for running autonomous coding sessions. +""" + +import asyncio +from pathlib import Path +from typing import Optional + +from claude_code_sdk import ClaudeSDKClient + +from client import create_client +from progress import print_session_header, print_progress_summary, is_linear_initialized +from prompts import ( + get_initializer_prompt, + get_initializer_bis_prompt, + get_coding_prompt, + copy_spec_to_project, + copy_new_spec_to_project, +) + + +# Configuration +AUTO_CONTINUE_DELAY_SECONDS = 3 + + +async def run_agent_session( + client: ClaudeSDKClient, + message: str, + project_dir: Path, +) -> tuple[str, str]: + """ + Run a single agent session using Claude Agent SDK. + + Args: + client: Claude SDK client + message: The prompt to send + project_dir: Project directory path + + Returns: + (status, response_text) where status is: + - "continue" if agent should continue working + - "error" if an error occurred + """ + print("Sending prompt to Claude Agent SDK...\n") + + try: + # Send the query + await client.query(message) + + # Collect response text and show tool use + response_text = "" + async for msg in client.receive_response(): + msg_type = type(msg).__name__ + + # Handle AssistantMessage (text and tool use) + if msg_type == "AssistantMessage" and hasattr(msg, "content"): + for block in msg.content: + block_type = type(block).__name__ + + if block_type == "TextBlock" and hasattr(block, "text"): + response_text += block.text + print(block.text, end="", flush=True) + elif block_type == "ToolUseBlock" and hasattr(block, "name"): + print(f"\n[Tool: {block.name}]", flush=True) + if hasattr(block, "input"): + input_str = str(block.input) + if len(input_str) > 200: + print(f" Input: {input_str[:200]}...", flush=True) + else: + print(f" Input: {input_str}", flush=True) + + # Handle UserMessage (tool results) + elif msg_type == "UserMessage" and hasattr(msg, "content"): + for block in msg.content: + block_type = type(block).__name__ + + if block_type == "ToolResultBlock": + result_content = getattr(block, "content", "") + is_error = getattr(block, "is_error", False) + + # Check if command was blocked by security hook + if "blocked" in str(result_content).lower(): + print(f" [BLOCKED] {result_content}", flush=True) + elif is_error: + # Show errors (truncated) + error_str = str(result_content)[:500] + print(f" [Error] {error_str}", flush=True) + else: + # Tool succeeded - just show brief confirmation + print(" [Done]", flush=True) + + print("\n" + "-" * 70 + "\n") + return "continue", response_text + + except Exception as e: + print(f"Error during agent session: {e}") + return "error", str(e) + + +async def run_autonomous_agent( + project_dir: Path, + model: str, + max_iterations: Optional[int] = None, + new_spec_filename: Optional[str] = None, +) -> None: + """ + Run the autonomous agent loop. + + Args: + project_dir: Directory for the project + model: Claude model to use + max_iterations: Maximum number of iterations (None for unlimited) + """ + print("\n" + "=" * 70) + print(" AUTONOMOUS CODING AGENT DEMO") + print("=" * 70) + print(f"\nProject directory: {project_dir}") + print(f"Model: {model}") + if max_iterations: + print(f"Max iterations: {max_iterations}") + else: + print("Max iterations: Unlimited (will run until completion)") + print() + + # Create project directory + project_dir.mkdir(parents=True, exist_ok=True) + + # Check if this is a fresh start, continuation, or adding new specs + # We use .linear_project.json as the marker for initialization + is_first_run = not is_linear_initialized(project_dir) + use_initializer_bis = new_spec_filename is not None and not is_first_run + + if is_first_run: + print("Fresh start - will use initializer agent") + print() + print("=" * 70) + print(" NOTE: First session takes 10-20+ minutes!") + print(" The agent is creating 50 Linear issues and setting up the project.") + print(" This may appear to hang - it's working. Watch for [Tool: ...] output.") + print("=" * 70) + print() + # Copy the app spec into the project directory for the agent to read + copy_spec_to_project(project_dir) + elif use_initializer_bis: + print("Adding new specifications - will use initializer bis agent") + print() + print("=" * 70) + print(f" NOTE: Adding new features from {new_spec_filename}") + print(" The agent will create new Linear issues for the additional features.") + print(" This may take several minutes. Watch for [Tool: ...] output.") + print("=" * 70) + print() + # Copy the new spec file into the project directory + copy_new_spec_to_project(project_dir, new_spec_filename) + print_progress_summary(project_dir) + else: + print("Continuing existing project (Linear initialized)") + print_progress_summary(project_dir) + + # Main loop + iteration = 0 + + while True: + iteration += 1 + + # Check max iterations + if max_iterations and iteration > max_iterations: + print(f"\nReached max iterations ({max_iterations})") + print("To continue, run the script again without --max-iterations") + break + + # Print session header + is_initializer_session = is_first_run or (use_initializer_bis and iteration == 1) + is_bis_session = use_initializer_bis and iteration == 1 + print_session_header(iteration, is_initializer_session, is_bis_session) + + # Create client (fresh context) + client = create_client(project_dir, model) + + # Choose prompt based on session type + if is_first_run: + prompt = get_initializer_prompt() + is_first_run = False # Only use initializer once + elif use_initializer_bis and iteration == 1: + prompt = get_initializer_bis_prompt() + use_initializer_bis = False # Only use initializer bis once + else: + prompt = get_coding_prompt() + + # Run session with async context manager + async with client: + status, response = await run_agent_session(client, prompt, project_dir) + + # Handle status + if status == "continue": + print(f"\nAgent will auto-continue in {AUTO_CONTINUE_DELAY_SECONDS}s...") + print_progress_summary(project_dir) + await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS) + + elif status == "error": + print("\nSession encountered an error") + print("Will retry with a fresh session...") + await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS) + + # Small delay between sessions + if max_iterations is None or iteration < max_iterations: + print("\nPreparing next session...\n") + await asyncio.sleep(1) + + # Final summary + print("\n" + "=" * 70) + print(" SESSION COMPLETE") + print("=" * 70) + print(f"\nProject directory: {project_dir}") + print_progress_summary(project_dir) + + # Print instructions for running the generated application + print("\n" + "-" * 70) + print(" TO RUN THE GENERATED APPLICATION:") + print("-" * 70) + print(f"\n cd {project_dir.resolve()}") + print(" ./init.sh # Run the setup script") + print(" # Or manually:") + print(" npm install && npm run dev") + print("\n Then open http://localhost:3000 (or check init.sh for the URL)") + print("-" * 70) + + print("\nDone!") diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py new file mode 100644 index 0000000..7d9b487 --- /dev/null +++ b/autonomous_agent_demo.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +Autonomous Coding Agent Demo +============================ + +A minimal harness demonstrating long-running autonomous coding with Claude. +This script implements the two-agent pattern (initializer + coding agent) and +incorporates all the strategies from the long-running agents guide. + +Example Usage: + python autonomous_agent_demo.py --project-dir ./claude_clone_demo + python autonomous_agent_demo.py --project-dir ./claude_clone_demo --max-iterations 5 +""" + +import argparse +import asyncio +import os +from pathlib import Path + +from agent import run_autonomous_agent + + +# Configuration +# Using Claude Opus 4.5 as default for best coding and agentic performance +# See: https://www.anthropic.com/news/claude-opus-4-5 +DEFAULT_MODEL = "claude-opus-4-5-20251101" + + +def parse_args() -> argparse.Namespace: + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Autonomous Coding Agent Demo - Long-running agent harness", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Start fresh project + python autonomous_agent_demo.py --project-dir ./claude_clone + + # Use a specific model + python autonomous_agent_demo.py --project-dir ./claude_clone --model claude-sonnet-4-5-20250929 + + # Limit iterations for testing + python autonomous_agent_demo.py --project-dir ./claude_clone --max-iterations 5 + + # Continue existing project + python autonomous_agent_demo.py --project-dir ./claude_clone + + # Add new specifications to existing project + python autonomous_agent_demo.py --project-dir ./claude_clone --new-spec app_spec_new1.txt + +Environment Variables: + CLAUDE_CODE_OAUTH_TOKEN Claude Code OAuth token (required) + LINEAR_API_KEY Linear API key (required) + """, + ) + + parser.add_argument( + "--project-dir", + type=Path, + default=Path("./autonomous_demo_project"), + help="Directory for the project (default: generations/autonomous_demo_project). Relative paths automatically placed in generations/ directory.", + ) + + parser.add_argument( + "--max-iterations", + type=int, + default=None, + help="Maximum number of agent iterations (default: unlimited)", + ) + + parser.add_argument( + "--model", + type=str, + default=DEFAULT_MODEL, + help=f"Claude model to use (default: {DEFAULT_MODEL})", + ) + + parser.add_argument( + "--new-spec", + type=str, + default=None, + help="Name of new specification file to add (e.g., 'app_spec_new1.txt'). Use this to add new features to an existing project.", + ) + + return parser.parse_args() + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + # Check for Claude Code OAuth token + if not os.environ.get("CLAUDE_CODE_OAUTH_TOKEN"): + print("Error: CLAUDE_CODE_OAUTH_TOKEN environment variable not set") + print("\nRun 'claude setup-token' after installing the Claude Code CLI.") + print("\nThen set it:") + print(" export CLAUDE_CODE_OAUTH_TOKEN='your-token-here'") + return + + # Check for Linear API key + if not os.environ.get("LINEAR_API_KEY"): + print("Error: LINEAR_API_KEY environment variable not set") + print("\nGet your API key from: https://linear.app/YOUR-TEAM/settings/api") + print("\nThen set it:") + print(" export LINEAR_API_KEY='lin_api_xxxxxxxxxxxxx'") + return + + # Automatically place projects in generations/ directory unless already specified + project_dir = args.project_dir + if not str(project_dir).startswith("generations/"): + # Convert relative paths to be under generations/ + if project_dir.is_absolute(): + # If absolute path, use as-is + pass + else: + # Prepend generations/ to relative paths + project_dir = Path("generations") / project_dir + + # Run the agent + try: + asyncio.run( + run_autonomous_agent( + project_dir=project_dir, + model=args.model, + max_iterations=args.max_iterations, + new_spec_filename=args.new_spec, + ) + ) + except KeyboardInterrupt: + print("\n\nInterrupted by user") + print("To resume, run the same command again") + except Exception as e: + print(f"\nFatal error: {e}") + raise + + +if __name__ == "__main__": + main() diff --git a/client.py b/client.py new file mode 100644 index 0000000..0961566 --- /dev/null +++ b/client.py @@ -0,0 +1,169 @@ +""" +Claude SDK Client Configuration +=============================== + +Functions for creating and configuring the Claude Agent SDK client. +""" + +import json +import os +from pathlib import Path + +from claude_code_sdk import ClaudeCodeOptions, ClaudeSDKClient +from claude_code_sdk.types import HookMatcher + +from security import bash_security_hook + + +# Puppeteer MCP tools for browser automation +PUPPETEER_TOOLS = [ + "mcp__puppeteer__puppeteer_navigate", + "mcp__puppeteer__puppeteer_screenshot", + "mcp__puppeteer__puppeteer_click", + "mcp__puppeteer__puppeteer_fill", + "mcp__puppeteer__puppeteer_select", + "mcp__puppeteer__puppeteer_hover", + "mcp__puppeteer__puppeteer_evaluate", +] + +# Linear MCP tools for project management +# Official Linear MCP server at mcp.linear.app +LINEAR_TOOLS = [ + # Team & Project discovery + "mcp__linear__list_teams", + "mcp__linear__get_team", + "mcp__linear__list_projects", + "mcp__linear__get_project", + "mcp__linear__create_project", + "mcp__linear__update_project", + # Issue management + "mcp__linear__list_issues", + "mcp__linear__get_issue", + "mcp__linear__create_issue", + "mcp__linear__update_issue", + "mcp__linear__list_my_issues", + # Comments + "mcp__linear__list_comments", + "mcp__linear__create_comment", + # Workflow + "mcp__linear__list_issue_statuses", + "mcp__linear__get_issue_status", + "mcp__linear__list_issue_labels", + # Users + "mcp__linear__list_users", + "mcp__linear__get_user", +] + +# Built-in tools +BUILTIN_TOOLS = [ + "Read", + "Write", + "Edit", + "Glob", + "Grep", + "Bash", +] + + +def create_client(project_dir: Path, model: str) -> ClaudeSDKClient: + """ + Create a Claude Agent SDK client with multi-layered security. + + Args: + project_dir: Directory for the project + model: Claude model to use + + Returns: + Configured ClaudeSDKClient + + Security layers (defense in depth): + 1. Sandbox - OS-level bash command isolation prevents filesystem escape + 2. Permissions - File operations restricted to project_dir only + 3. Security hooks - Bash commands validated against an allowlist + (see security.py for ALLOWED_COMMANDS) + """ + api_key = os.environ.get("CLAUDE_CODE_OAUTH_TOKEN") + if not api_key: + raise ValueError( + "CLAUDE_CODE_OAUTH_TOKEN environment variable not set.\n" + "Run 'claude setup-token after installing the Claude Code CLI." + ) + + linear_api_key = os.environ.get("LINEAR_API_KEY") + if not linear_api_key: + raise ValueError( + "LINEAR_API_KEY environment variable not set.\n" + "Get your API key from: https://linear.app/YOUR-TEAM/settings/api" + ) + + # Create comprehensive security settings + # Note: Using relative paths ("./**") restricts access to project directory + # since cwd is set to project_dir + security_settings = { + "sandbox": {"enabled": True, "autoAllowBashIfSandboxed": True}, + "permissions": { + "defaultMode": "acceptEdits", # Auto-approve edits within allowed directories + "allow": [ + # Allow all file operations within the project directory + "Read(./**)", + "Write(./**)", + "Edit(./**)", + "Glob(./**)", + "Grep(./**)", + # Bash permission granted here, but actual commands are validated + # by the bash_security_hook (see security.py for allowed commands) + "Bash(*)", + # Allow Puppeteer MCP tools for browser automation + *PUPPETEER_TOOLS, + # Allow Linear MCP tools for project management + *LINEAR_TOOLS, + ], + }, + } + + # Ensure project directory exists before creating settings file + project_dir.mkdir(parents=True, exist_ok=True) + + # Write settings to a file in the project directory + settings_file = project_dir / ".claude_settings.json" + with open(settings_file, "w") as f: + json.dump(security_settings, f, indent=2) + + print(f"Created security settings at {settings_file}") + print(" - Sandbox enabled (OS-level bash isolation)") + print(f" - Filesystem restricted to: {project_dir.resolve()}") + print(" - Bash commands restricted to allowlist (see security.py)") + print(" - MCP servers: puppeteer (browser automation), linear (project management)") + print() + + return ClaudeSDKClient( + options=ClaudeCodeOptions( + model=model, + system_prompt="You are an expert full-stack developer building a production-quality web application. You use Linear for project management and tracking all your work.", + allowed_tools=[ + *BUILTIN_TOOLS, + *PUPPETEER_TOOLS, + *LINEAR_TOOLS, + ], + mcp_servers={ + "puppeteer": {"command": "npx", "args": ["puppeteer-mcp-server"]}, + # Linear MCP with Streamable HTTP transport (recommended over SSE) + # See: https://linear.app/docs/mcp + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "headers": { + "Authorization": f"Bearer {linear_api_key}" + } + } + }, + hooks={ + "PreToolUse": [ + HookMatcher(matcher="Bash", hooks=[bash_security_hook]), + ], + }, + max_turns=1000, + cwd=str(project_dir.resolve()), + settings=str(settings_file.resolve()), # Use absolute path + ) + ) diff --git a/linear_config.py b/linear_config.py new file mode 100644 index 0000000..434d1d2 --- /dev/null +++ b/linear_config.py @@ -0,0 +1,38 @@ +""" +Linear Configuration +==================== + +Configuration constants for Linear integration. +These values are used in prompts and for project state management. +""" + +import os + +# Environment variables (must be set before running) +LINEAR_API_KEY = os.environ.get("LINEAR_API_KEY") +LINEAR_TEAM_ID = os.environ.get("LINEAR_TEAM_ID") + +# Default number of issues to create (can be overridden via command line) +DEFAULT_ISSUE_COUNT = 50 + +# Issue status workflow (Linear default states) +STATUS_TODO = "Todo" +STATUS_IN_PROGRESS = "In Progress" +STATUS_DONE = "Done" + +# Label categories (map to feature types) +LABEL_FUNCTIONAL = "functional" +LABEL_STYLE = "style" +LABEL_INFRASTRUCTURE = "infrastructure" + +# Priority mapping (Linear uses 0-4 where 1=Urgent, 4=Low, 0=No priority) +PRIORITY_URGENT = 1 +PRIORITY_HIGH = 2 +PRIORITY_MEDIUM = 3 +PRIORITY_LOW = 4 + +# Local marker file to track Linear project initialization +LINEAR_PROJECT_MARKER = ".linear_project.json" + +# Meta issue title for project tracking and session handoff +META_ISSUE_TITLE = "[META] Project Progress Tracker" diff --git a/progress.py b/progress.py new file mode 100644 index 0000000..862d18b --- /dev/null +++ b/progress.py @@ -0,0 +1,86 @@ +""" +Progress Tracking Utilities +=========================== + +Functions for tracking and displaying progress of the autonomous coding agent. +Progress is tracked via Linear issues, with local state cached in .linear_project.json. +""" + +import json +from pathlib import Path + +from linear_config import LINEAR_PROJECT_MARKER + + +def load_linear_project_state(project_dir: Path) -> dict | None: + """ + Load the Linear project state from the marker file. + + Args: + project_dir: Directory containing .linear_project.json + + Returns: + Project state dict or None if not initialized + """ + marker_file = project_dir / LINEAR_PROJECT_MARKER + + if not marker_file.exists(): + return None + + try: + with open(marker_file, "r") as f: + return json.load(f) + except (json.JSONDecodeError, IOError): + return None + + +def is_linear_initialized(project_dir: Path) -> bool: + """ + Check if Linear project has been initialized. + + Args: + project_dir: Directory to check + + Returns: + True if .linear_project.json exists and is valid + """ + state = load_linear_project_state(project_dir) + return state is not None and state.get("initialized", False) + + +def print_session_header(session_num: int, is_initializer: bool, is_initializer_bis: bool = False) -> None: + """Print a formatted header for the session.""" + if is_initializer_bis: + session_type = "INITIALIZER BIS" + elif is_initializer: + session_type = "INITIALIZER" + else: + session_type = "CODING AGENT" + + print("\n" + "=" * 70) + print(f" SESSION {session_num}: {session_type}") + print("=" * 70) + print() + + +def print_progress_summary(project_dir: Path) -> None: + """ + Print a summary of current progress. + + Since actual progress is tracked in Linear, this reads the local + state file for cached information. The agent updates Linear directly + and reports progress in session comments. + """ + state = load_linear_project_state(project_dir) + + if state is None: + print("\nProgress: Linear project not yet initialized") + return + + total = state.get("total_issues", 0) + meta_issue = state.get("meta_issue_id", "unknown") + + print(f"\nLinear Project Status:") + print(f" Total issues created: {total}") + print(f" META issue ID: {meta_issue}") + print(f" (Check Linear for current Done/In Progress/Todo counts)") diff --git a/prompts.py b/prompts.py new file mode 100644 index 0000000..f4c8a8b --- /dev/null +++ b/prompts.py @@ -0,0 +1,63 @@ +""" +Prompt Loading Utilities +======================== + +Functions for loading prompt templates from the prompts directory. +""" + +import shutil +from pathlib import Path + + +PROMPTS_DIR = Path(__file__).parent / "prompts" + + +def load_prompt(name: str) -> str: + """Load a prompt template from the prompts directory.""" + prompt_path = PROMPTS_DIR / f"{name}.md" + return prompt_path.read_text() + + +def get_initializer_prompt() -> str: + """Load the initializer prompt.""" + return load_prompt("initializer_prompt") + + +def get_coding_prompt() -> str: + """Load the coding agent prompt.""" + return load_prompt("coding_prompt") + + +def copy_spec_to_project(project_dir: Path) -> None: + """Copy the app spec file into the project directory for the agent to read.""" + spec_source = PROMPTS_DIR / "app_spec.txt" + spec_dest = project_dir / "app_spec.txt" + if not spec_dest.exists(): + shutil.copy(spec_source, spec_dest) + print("Copied app_spec.txt to project directory") + + +############################################################################################ +# New specifications added by davebb +############################################################################################ + +def get_initializer_bis_prompt() -> str: + """Load the initializer bis prompt for adding new specifications.""" + return load_prompt("initializer_bis_prompt") + + +def copy_new_spec_to_project(project_dir: Path, new_spec_filename: str) -> None: + """ + Copy a new specification file into the project directory for the agent to read. + + Args: + project_dir: Project directory path + new_spec_filename: Name of the new spec file (e.g., "app_spec_new1.txt") + """ + spec_source = PROMPTS_DIR / new_spec_filename + if not spec_source.exists(): + raise FileNotFoundError(f"New specification file not found: {spec_source}") + + spec_dest = project_dir / new_spec_filename + shutil.copy(spec_source, spec_dest) + print(f"Copied {new_spec_filename} to project directory") diff --git a/prompts/app_spec.txt b/prompts/app_spec.txt new file mode 100644 index 0000000..1e35f6d --- /dev/null +++ b/prompts/app_spec.txt @@ -0,0 +1,681 @@ + + Claude.ai Clone - AI Chat Interface + + + Build a fully functional clone of claude.ai, Anthropic's conversational AI interface. The application should + provide a clean, modern chat interface for interacting with Claude via the API, including features like + conversation management, artifact rendering, project organization, multiple model selection, and advanced + settings. The UI should closely match claude.ai's design using Tailwind CSS with a focus on excellent + user experience and responsive design. + + + + + You can use an API key located at /tmp/api-key for testing. You will not be allowed to read this file, but you can reference it in code. + + + React with Vite + Tailwind CSS (via CDN) + React hooks and context + React Router for navigation + React Markdown for message rendering + Syntax highlighting for code blocks + Only launch on port {frontend_port} + + + Node.js with Express + SQLite with better-sqlite3 + Claude API for chat completions + Server-Sent Events for streaming responses + + + RESTful endpoints + SSE for real-time message streaming + Integration with Claude API using Anthropic SDK + + + + + + - Repository includes .env with VITE_ANTHROPIC_API_KEY configured + - Frontend dependencies pre-installed via pnpm + - Backend code goes in /server directory + - Install backend dependencies as needed + + + + + + - Clean, centered chat layout with message bubbles + - Streaming message responses with typing indicator + - Markdown rendering with proper formatting + - Code blocks with syntax highlighting and copy button + - LaTeX/math equation rendering + - Image upload and display in messages + - Multi-turn conversations with context + - Message editing and regeneration + - Stop generation button during streaming + - Input field with auto-resize textarea + - Character count and token estimation + - Keyboard shortcuts (Enter to send, Shift+Enter for newline) + + + + - Artifact detection and rendering in side panel + - Code artifact viewer with syntax highlighting + - HTML/SVG preview with live rendering + - React component preview + - Mermaid diagram rendering + - Text document artifacts + - Artifact editing and re-prompting + - Full-screen artifact view + - Download artifact content + - Artifact versioning and history + + + + - Create new conversations + - Conversation list in sidebar + - Rename conversations + - Delete conversations + - Search conversations by title/content + - Pin important conversations + - Archive conversations + - Conversation folders/organization + - Duplicate conversation + - Export conversation (JSON, Markdown, PDF) + - Conversation timestamps (created, last updated) + - Unread message indicators + + + + - Create projects to group related conversations + - Project knowledge base (upload documents) + - Project-specific custom instructions + - Share projects with team (mock feature) + - Project settings and configuration + - Move conversations between projects + - Project templates + - Project analytics (usage stats) + + + + - Model selector dropdown with the following models: + - Claude Sonnet 4.5 (claude-sonnet-4-5-20250929) - default + - Claude Haiku 4.5 (claude-haiku-4-5-20251001) + - Claude Opus 4.1 (claude-opus-4-1-20250805) + - Model capabilities display + - Context window indicator + - Model-specific pricing info (display only) + - Switch models mid-conversation + - Model comparison view + + + + - Global custom instructions + - Project-specific custom instructions + - Conversation-specific system prompts + - Custom instruction templates + - Preview how instructions affect responses + + + + - Theme selection (Light, Dark, Auto) + - Font size adjustment + - Message density (compact, comfortable, spacious) + - Code theme selection + - Language preferences + - Accessibility options + - Keyboard shortcuts reference + - Data export options + - Privacy settings + - API key management + + + + - Temperature control slider + - Max tokens adjustment + - Top-p (nucleus sampling) control + - System prompt override + - Thinking/reasoning mode toggle + - Multi-modal input (text + images) + - Voice input (optional, mock UI) + - Response suggestions + - Related prompts + - Conversation branching + + + + - Share conversation via link (read-only) + - Export conversation formats + - Conversation templates + - Prompt library + - Share artifacts + - Team workspaces (mock UI) + + + + - Search across all conversations + - Filter by project, date, model + - Prompt library with categories + - Example conversations + - Quick actions menu + - Command palette (Cmd/Ctrl+K) + + + + - Token usage display per message + - Conversation cost estimation + - Daily/monthly usage dashboard + - Usage limits and warnings + - API quota tracking + + + + - Welcome screen for new users + - Feature tour highlights + - Example prompts to get started + - Quick tips and best practices + - Keyboard shortcuts tutorial + + + + - Full keyboard navigation + - Screen reader support + - ARIA labels and roles + - High contrast mode + - Focus management + - Reduced motion support + + + + - Mobile-first responsive layout + - Touch-optimized interface + - Collapsible sidebar on mobile + - Swipe gestures for navigation + - Adaptive artifact display + - Progressive Web App (PWA) support + + + + + + + - id, email, name, avatar_url + - created_at, last_login + - preferences (JSON: theme, font_size, etc.) + - custom_instructions + + + + - id, user_id, name, description, color + - custom_instructions, knowledge_base_path + - created_at, updated_at + - is_archived, is_pinned + + + + - id, user_id, project_id, title + - model, created_at, updated_at, last_message_at + - is_archived, is_pinned, is_deleted + - settings (JSON: temperature, max_tokens, etc.) + - token_count, message_count + + + + - id, conversation_id, role (user/assistant/system) + - content, created_at, edited_at + - tokens, finish_reason + - images (JSON array of image data) + - parent_message_id (for branching) + + + + - id, message_id, conversation_id + - type (code/html/svg/react/mermaid/text) + - title, identifier, language + - content, version + - created_at, updated_at + + + + - id, conversation_id, share_token + - created_at, expires_at, view_count + - is_public + + + + - id, user_id, title, description + - prompt_template, category, tags (JSON) + - is_public, usage_count + - created_at, updated_at + + + + - id, user_id, project_id, name, parent_folder_id + - created_at, position + + + + - id, folder_id, conversation_id + + + + - id, user_id, conversation_id, message_id + - model, input_tokens, output_tokens + - cost_estimate, created_at + + + + - id, user_id, key_name, api_key_hash + - created_at, last_used_at + - is_active + + + + + + + - POST /api/auth/login + - POST /api/auth/logout + - GET /api/auth/me + - PUT /api/auth/profile + + + + - GET /api/conversations + - POST /api/conversations + - GET /api/conversations/:id + - PUT /api/conversations/:id + - DELETE /api/conversations/:id + - POST /api/conversations/:id/duplicate + - POST /api/conversations/:id/export + - PUT /api/conversations/:id/archive + - PUT /api/conversations/:id/pin + - POST /api/conversations/:id/branch + + + + - GET /api/conversations/:id/messages + - POST /api/conversations/:id/messages + - PUT /api/messages/:id + - DELETE /api/messages/:id + - POST /api/messages/:id/regenerate + - GET /api/messages/stream (SSE endpoint) + + + + - GET /api/conversations/:id/artifacts + - GET /api/artifacts/:id + - PUT /api/artifacts/:id + - DELETE /api/artifacts/:id + - POST /api/artifacts/:id/fork + - GET /api/artifacts/:id/versions + + + + - GET /api/projects + - POST /api/projects + - GET /api/projects/:id + - PUT /api/projects/:id + - DELETE /api/projects/:id + - POST /api/projects/:id/knowledge + - GET /api/projects/:id/conversations + - PUT /api/projects/:id/settings + + + + - POST /api/conversations/:id/share + - GET /api/share/:token + - DELETE /api/share/:token + - PUT /api/share/:token/settings + + + + - GET /api/prompts/library + - POST /api/prompts/library + - GET /api/prompts/:id + - PUT /api/prompts/:id + - DELETE /api/prompts/:id + - GET /api/prompts/categories + - GET /api/prompts/examples + + + + - GET /api/search/conversations?q=query + - GET /api/search/messages?q=query + - GET /api/search/artifacts?q=query + - GET /api/search/prompts?q=query + + + + - GET /api/folders + - POST /api/folders + - PUT /api/folders/:id + - DELETE /api/folders/:id + - POST /api/folders/:id/items + - DELETE /api/folders/:id/items/:conversationId + + + + - GET /api/usage/daily + - GET /api/usage/monthly + - GET /api/usage/by-model + - GET /api/usage/conversations/:id + + + + - GET /api/settings + - PUT /api/settings + - GET /api/settings/custom-instructions + - PUT /api/settings/custom-instructions + + + + - POST /api/claude/chat (proxy to Claude API) + - POST /api/claude/chat/stream (streaming proxy) + - GET /api/claude/models + - POST /api/claude/images/upload + + + + + + - Three-column layout: sidebar (conversations), main (chat), panel (artifacts) + - Collapsible sidebar with resize handle + - Responsive breakpoints: mobile (single column), tablet (two column), desktop (three column) + - Persistent header with project/model selector + - Bottom input area with send button and options + + + + - New chat button (prominent) + - Project selector dropdown + - Search conversations input + - Conversations list (grouped by date: Today, Yesterday, Previous 7 days, etc.) + - Folder tree view (collapsible) + - Settings gear icon at bottom + - User profile at bottom + + + + - Conversation title (editable inline) + - Model selector badge + - Message history (scrollable) + - Welcome screen for new conversations + - Suggested prompts (empty state) + - Input area with formatting toolbar + - Attachment button for images + - Send button with loading state + - Stop generation button + + + + - Artifact header with title and type badge + - Code editor or preview pane + - Tabs for multiple artifacts + - Full-screen toggle + - Download button + - Edit/Re-prompt button + - Version selector + - Close panel button + + + + - Settings modal (tabbed interface) + - Share conversation modal + - Export options modal + - Project settings modal + - Prompt library modal + - Command palette overlay + - Keyboard shortcuts reference + + + + + + - Primary: Orange/amber accent (#CC785C claude-style) + - Background: White (light mode), Dark gray (#1A1A1A dark mode) + - Surface: Light gray (#F5F5F5 light), Darker gray (#2A2A2A dark) + - Text: Near black (#1A1A1A light), Off-white (#E5E5E5 dark) + - Borders: Light gray (#E5E5E5 light), Dark gray (#404040 dark) + - Code blocks: Monaco editor theme + + + + - Sans-serif system font stack (Inter, SF Pro, Roboto, system-ui) + - Headings: font-semibold + - Body: font-normal, leading-relaxed + - Code: Monospace (JetBrains Mono, Consolas, Monaco) + - Message text: text-base (16px), comfortable line-height + + + + + - User messages: Right-aligned, subtle background + - Assistant messages: Left-aligned, no background + - Markdown formatting with proper spacing + - Inline code with bg-gray-100 background + - Code blocks with syntax highlighting + - Copy button on code blocks + + + + - Primary: Orange/amber background, white text, rounded + - Secondary: Border style with hover fill + - Icon buttons: Square with hover background + - Disabled state: Reduced opacity, no pointer events + + + + - Rounded borders with focus ring + - Textarea auto-resize + - Placeholder text in gray + - Error states in red + - Character counter + + + + - Subtle border or shadow + - Rounded corners (8px) + - Padding: p-4 to p-6 + - Hover state: slight shadow increase + + + + + - Smooth transitions (150-300ms) + - Fade in for new messages + - Slide in for sidebar + - Typing indicator animation + - Loading spinner for generation + - Skeleton loaders for content + + + + + + 1. User types message in input field + 2. Optional: Attach images via button + 3. Click send or press Enter + 4. Message appears in chat immediately + 5. Typing indicator shows while waiting + 6. Response streams in word by word + 7. Code blocks render with syntax highlighting + 8. Artifacts detected and rendered in side panel + 9. Message complete, enable regenerate option + + + + 1. Assistant generates artifact in response + 2. Artifact panel slides in from right + 3. Content renders (code with highlighting or live preview) + 4. User can edit artifact inline + 5. "Re-prompt" button to iterate with Claude + 6. Download or copy artifact content + 7. Full-screen mode for detailed work + 8. Close panel to return to chat focus + + + + 1. Click "New Chat" to start fresh conversation + 2. Conversations auto-save with first message + 3. Auto-generate title from first exchange + 4. Click title to rename inline + 5. Drag conversations into folders + 6. Right-click for context menu (pin, archive, delete, export) + 7. Search filters conversations in real-time + 8. Click conversation to switch context + + + + + + Setup Project Foundation and Database + + - Initialize Express server with SQLite database + - Set up Claude API client with streaming support + - Create database schema with migrations + - Implement authentication endpoints + - Set up basic CORS and middleware + - Create health check endpoint + + + + + Build Core Chat Interface + + - Create main layout with sidebar and chat area + - Implement message display with markdown rendering + - Add streaming message support with SSE + - Build input area with auto-resize textarea + - Add code block syntax highlighting + - Implement stop generation functionality + - Add typing indicators and loading states + + + + + Conversation Management + + - Create conversation list in sidebar + - Implement new conversation creation + - Add conversation switching + - Build conversation rename functionality + - Implement delete with confirmation + - Add conversation search + - Create conversation grouping by date + + + + + Artifacts System + + - Build artifact detection from Claude responses + - Create artifact rendering panel + - Implement code artifact viewer + - Add HTML/SVG live preview + - Build artifact editing interface + - Add artifact versioning + - Implement full-screen artifact view + + + + + Projects and Organization + + - Create projects CRUD endpoints + - Build project selector UI + - Implement project-specific custom instructions + - Add folder system for conversations + - Create drag-and-drop organization + - Build project settings panel + + + + + Advanced Features + + - Add model selection dropdown + - Implement temperature and parameter controls + - Build image upload functionality + - Create message editing and regeneration + - Add conversation branching + - Implement export functionality + + + + + Settings and Customization + + - Build settings modal with tabs + - Implement theme switching (light/dark) + - Add custom instructions management + - Create keyboard shortcuts + - Build prompt library + - Add usage tracking dashboard + + + + + Sharing and Collaboration + + - Implement conversation sharing with tokens + - Create public share view + - Add export to multiple formats + - Build prompt templates + - Create example conversations + + + + + Polish and Optimization + + - Optimize for mobile responsiveness + - Add command palette (Cmd+K) + - Implement comprehensive keyboard navigation + - Add onboarding flow + - Create accessibility improvements + - Performance optimization and caching + + + + + + + - Streaming chat responses work smoothly + - Artifact detection and rendering accurate + - Conversation management intuitive and reliable + - Project organization clear and useful + - Image upload and display working + - All CRUD operations functional + + + + - Interface matches claude.ai design language + - Responsive on all device sizes + - Smooth animations and transitions + - Fast response times and minimal lag + - Intuitive navigation and workflows + - Clear feedback for all actions + + + + - Clean, maintainable code structure + - Proper error handling throughout + - Secure API key management + - Optimized database queries + - Efficient streaming implementation + - Comprehensive testing coverage + + + + - Consistent with claude.ai visual design + - Beautiful typography and spacing + - Smooth animations and micro-interactions + - Excellent contrast and accessibility + - Professional, polished appearance + - Dark mode fully implemented + + + diff --git a/prompts/app_spec_mistral_extensible.txt b/prompts/app_spec_mistral_extensible.txt new file mode 100644 index 0000000..0abcc86 --- /dev/null +++ b/prompts/app_spec_mistral_extensible.txt @@ -0,0 +1,448 @@ + + Claude.ai Clone - Multi-Provider Support (Mistral + Extensible) + + + This specification adds Mistral AI model support AND creates an extensible provider architecture + that makes it easy to add additional AI providers (OpenAI, Gemini, etc.) in the future. + This uses the "Open/Closed Principle" - open for extension, closed for modification. + + All changes are additive and backward-compatible. Existing Claude functionality remains unchanged. + + + + + - DO NOT modify existing Claude API integration code directly + - DO NOT change existing model selection logic for Claude models + - DO NOT modify existing database schema without safe migrations + - DO NOT break existing conversations or messages + - All new code must be in separate files/modules when possible + - Test thoroughly before marking issues as complete + - Maintain backward compatibility at all times + - Refactor Claude code to use BaseProvider WITHOUT changing functionality + + + + + + Create an abstract provider interface that all AI providers implement: + - BaseProvider (abstract class/interface) - defines common interface + - ClaudeProvider (existing code refactored to extend BaseProvider) + - MistralProvider (new, extends BaseProvider) + - OpenAIProvider (future, extends BaseProvider - easy to add) + - GeminiProvider (future, extends BaseProvider - easy to add) + + + + - Easy to add new providers without modifying existing code + - Consistent interface across all providers + - Isolated error handling per provider + - Unified model selection UI + - Shared functionality (streaming, error handling, logging) + - Future-proof architecture + + + + + + Extensible Provider Architecture (Foundation) + + Create a provider abstraction layer that allows easy addition of multiple AI providers. + This is the foundation that makes adding OpenAI, Gemini, etc. trivial in the future. + + BaseProvider abstract class should define: + - sendMessage(messages, options) -> Promise<response> + - streamMessage(messages, options) -> AsyncGenerator<chunk> + - getModels() -> Promise<array> of available models + - validateApiKey(key) -> Promise<boolean> + - getCapabilities() -> object with provider capabilities + - getName() -> string (provider name: 'claude', 'mistral', 'openai', etc.) + - getDefaultModel() -> string (default model ID for this provider) + + ProviderRegistry should: + - Register all available providers + - Provide list of all providers + - Check which providers are configured (have API keys) + - Enable/disable providers + + ProviderFactory should: + - Create provider instances based on model ID or provider name + - Handle provider selection logic + - Route requests to correct provider + + 1 + functional + + - Create server/providers/BaseProvider.js (abstract base class) + - Refactor existing Claude code to server/providers/ClaudeProvider.js (extends BaseProvider) + - Create server/providers/ProviderRegistry.js (manages all providers) + - Create server/providers/ProviderFactory.js (creates provider instances) + - Update existing routes to use ProviderFactory instead of direct Claude calls + - Keep all provider code in server/providers/ directory + + + 1. Verify Claude still works after refactoring to use BaseProvider + 2. Test that ProviderFactory creates ClaudeProvider correctly + 3. Test that ProviderRegistry lists Claude provider + 4. Verify error handling works correctly + 5. Test that adding a mock provider is straightforward + 6. Verify no regression in existing Claude functionality + + + + + Mistral Provider Implementation + + Implement MistralProvider extending BaseProvider. This should: + - Implement all BaseProvider abstract methods + - Handle Mistral-specific API calls (https://api.mistral.ai/v1/chat/completions) + - Support Mistral streaming (Server-Sent Events) + - Handle Mistral-specific error codes and messages + - Provide Mistral model list: + * mistral-large-latest (default) + * mistral-medium-latest + * mistral-small-latest + * mistral-7b-instruct + - Manage Mistral API authentication + - Return responses in unified format (same as Claude) + + 2 + functional + + - Create server/providers/MistralProvider.js + - Extend BaseProvider class + - Implement Mistral API integration using fetch or axios + - Register in ProviderRegistry + - Use same response format as ClaudeProvider for consistency + + + 1. Test MistralProvider.sendMessage() works with valid API key + 2. Test MistralProvider.streamMessage() works + 3. Test MistralProvider.getModels() returns correct models + 4. Test error handling for invalid API key + 5. Test error handling for API rate limits + 6. Verify it integrates with ProviderFactory + 7. Verify responses match expected format + + + + + Unified Model Selector (All Providers) + + Update model selector to dynamically load models from all registered providers. + The selector should: + - Query all providers for available models via GET /api/models + - Group models by provider (Claude, Mistral, etc.) + - Display provider badges/icons next to model names + - Show which provider each model belongs to + - Filter models by provider (optional toggle) + - Show provider-specific capabilities (streaming, images, etc.) + - Only show models from providers with configured API keys + - Handle providers gracefully (show "Configure API key" if not set) + + 2 + functional + + - Create API endpoint: GET /api/models (returns all models from all providers) + - Update frontend ModelSelector component to handle multiple providers + - Add provider grouping/filtering in UI + - Show provider badges/icons next to model names + - Group models by provider with collapsible sections + - Show provider status (configured/not configured) + + + 1. Verify model selector shows Claude models (existing functionality) + 2. Verify model selector shows Mistral models (if key configured) + 3. Test grouping by provider works + 4. Test filtering by provider works + 5. Verify provider badges display correctly + 6. Test that providers without API keys show "Configure" message + 7. Verify selecting a model works for both providers + + + + + Multi-Provider API Key Management + + Create unified API key management that supports multiple providers. Users should be able to: + - Manage API keys for each provider separately (Claude, Mistral, OpenAI, etc.) + - See which providers are available + - See which providers are configured (have API keys) + - Test each provider's API key independently + - Enable/disable providers (hide models if key not configured) + - See provider status indicators (configured/not configured/error) + - Update or remove API keys for any provider + - See usage statistics per provider + + 2 + functional + + - Create server/routes/providers.js with unified provider management + - Update settings UI to show provider cards (one per provider) + - Each provider card has: + * Provider name and logo/icon + * API key input field (masked) + * "Test Connection" button + * Status indicator (green/yellow/red) + * Enable/disable toggle + - Store keys in api_keys table with key_name = 'claude_api_key', 'mistral_api_key', etc. + - Use same encryption method for all providers + + + 1. Configure Claude API key (verify existing functionality still works) + 2. Configure Mistral API key + 3. Verify both keys are stored separately + 4. Test each provider's "Test Connection" button + 5. Remove one key and verify only that provider's models are hidden + 6. Verify provider status indicators update correctly + 7. Test that disabling a provider hides its models + + + + + Database Support for Multiple Providers (Future-Proof) + + Update database schema to support multiple providers in a future-proof way. + This should: + - Add provider field to conversations table (TEXT, default: 'claude') + - Add provider field to messages/usage_tracking (TEXT, default: 'claude') + - Use TEXT field (not ENUM) to allow easy addition of new providers without schema changes + - Migration should be safe, idempotent, and backward compatible + - All existing records default to 'claude' provider + - Add indexes for performance on provider queries + + 1 + functional + + - Create migration: server/migrations/add_provider_support.sql + - Use TEXT field (not ENUM) for provider name (allows 'claude', 'mistral', 'openai', etc.) + - Default all existing records to 'claude' + - Add indexes on provider columns for performance + - Make migration idempotent (can run multiple times safely) + - Create rollback script if needed + + + 1. Backup existing database + 2. Run migration script + 3. Verify all existing conversations have provider='claude' + 4. Verify all existing messages have provider='claude' (via usage_tracking) + 5. Create new conversation with Mistral provider + 6. Verify provider='mistral' is saved correctly + 7. Query conversations by provider (test index performance) + 8. Verify existing Claude conversations still work + 9. Test rollback script if needed + + + + + Unified Chat Endpoint (Works with Any Provider) + + Update chat endpoints to use ProviderFactory, making them work with any provider. + The endpoint should: + - Accept provider or model ID in request + - Use ProviderFactory to get correct provider + - Route request to appropriate provider + - Return unified response format + - Handle provider-specific errors gracefully + - Support streaming for all providers that support it + + 1 + functional + + - Update POST /api/chat to use ProviderFactory + - Update POST /api/chat/stream to use ProviderFactory + - Extract provider from model ID or accept provider parameter + - Route to correct provider instance + - Return unified response format + + + 1. Test POST /api/chat with Claude model (verify no regression) + 2. Test POST /api/chat with Mistral model + 3. Test POST /api/chat/stream with Claude (verify streaming still works) + 4. Test POST /api/chat/stream with Mistral + 5. Test error handling for invalid provider + 6. Test error handling for missing API key + + + + + + + How to Add OpenAI in the Future + + To add OpenAI support later, simply follow these steps (NO changes to existing code needed): + + 1. Create server/providers/OpenAIProvider.js extending BaseProvider + 2. Implement OpenAI API calls (https://api.openai.com/v1/chat/completions) + 3. Register in ProviderRegistry: ProviderRegistry.register('openai', OpenAIProvider) + 4. That's it! OpenAI models will automatically appear in model selector. + + Example OpenAIProvider structure: + - Extends BaseProvider + - Implements sendMessage() using OpenAI API + - Implements streamMessage() for streaming support + - Returns models: gpt-4, gpt-3.5-turbo, etc. + - Handles OpenAI-specific authentication and errors + + + + + + Same pattern works for any AI provider: + - Google Gemini (GeminiProvider) + - Cohere (CohereProvider) + - Any other AI API that follows similar patterns + Just create a new Provider class extending BaseProvider and register it. + + + + + + + server/ + providers/ + BaseProvider.js # Abstract base class (NEW) + ClaudeProvider.js # Refactored Claude (extends BaseProvider) + MistralProvider.js # New Mistral (extends BaseProvider) + ProviderRegistry.js # Manages all providers (NEW) + ProviderFactory.js # Creates provider instances (NEW) + routes/ + providers.js # Unified provider management (NEW) + chat.js # Updated to use ProviderFactory + migrations/ + add_provider_support.sql # Database migration (NEW) + + + + - Refactor Claude code to use BaseProvider WITHOUT changing functionality + - All providers are isolated - errors in one don't affect others + - Database changes are backward compatible (TEXT field, not ENUM) + - Existing conversations default to 'claude' provider + - Test Claude thoroughly after refactoring + - Use feature flags if needed to enable/disable providers + - Log all provider operations separately for debugging + + + + - Each provider handles its own errors + - Provider errors should NOT affect other providers + - Show user-friendly error messages + - Log errors with provider context + - Don't throw unhandled exceptions + + + + + + + Add provider support (TEXT field for extensibility) + + -- Add provider column to conversations (TEXT allows any provider name) + -- Default to 'claude' for backward compatibility + ALTER TABLE conversations + ADD COLUMN provider TEXT DEFAULT 'claude'; + + -- Add provider column to usage_tracking + ALTER TABLE usage_tracking + ADD COLUMN provider TEXT DEFAULT 'claude'; + + -- Add indexes for performance + CREATE INDEX IF NOT EXISTS idx_conversations_provider + ON conversations(provider); + + CREATE INDEX IF NOT EXISTS idx_usage_tracking_provider + ON usage_tracking(provider); + + + -- Rollback script (use with caution - may cause data issues) + DROP INDEX IF EXISTS idx_conversations_provider; + DROP INDEX IF EXISTS idx_usage_tracking_provider; + -- Note: SQLite doesn't support DROP COLUMN easily + -- Would need to recreate table without provider column + + + Using TEXT instead of ENUM allows adding new providers (OpenAI, Gemini, etc.) + without database schema changes in the future. This is future-proof. + + + + + + - All existing conversations default to provider='claude' + - All existing messages default to provider='claude' + - Migration is idempotent (can run multiple times safely) + - No data loss during migration + - Existing queries continue to work + + + + + + - GET /api/models - Get all models from all configured providers + - GET /api/providers - Get list of available providers and their status + - POST /api/providers/:provider/key - Set API key for specific provider + - POST /api/providers/:provider/test - Test provider API key + - GET /api/providers/:provider/status - Get provider configuration status + - DELETE /api/providers/:provider/key - Remove provider API key + + + + - POST /api/chat - Updated to use ProviderFactory (works with any provider) + * Accepts: { model: 'model-id', messages: [...], ... } + * Provider is determined from model ID or can be specified + - POST /api/chat/stream - Updated to use ProviderFactory (streaming for any provider) + * Same interface, works with any provider that supports streaming + + + + + + - No new dependencies required (use native fetch for Mistral API) + - Optional: @mistralai/mistralai (only if provides significant value) + - Keep dependencies minimal to avoid conflicts + + + + + + - Verify all existing Claude functionality still works + - Test that existing conversations load correctly + - Verify Claude model selection still works + - Test Claude API endpoints are unaffected + - Verify database queries for Claude still work + - Test Claude streaming still works + + + + - Test switching between Claude and Mistral models + - Test conversations with different providers + - Test error handling doesn't affect other providers + - Test migration doesn't break existing data + - Test ProviderFactory routes correctly + - Test unified model selector with multiple providers + + + + - Verify adding a mock provider is straightforward + - Test that ProviderFactory correctly routes to providers + - Verify provider isolation (errors don't propagate) + - Test that new providers automatically appear in UI + + + + + + - Claude functionality works exactly as before (no regression) + - Mistral models appear in selector and work correctly + - Users can switch between Claude and Mistral seamlessly + - API key management works for both providers + - Database migration is safe and backward compatible + + + + - Adding a new provider (like OpenAI) requires only creating one new file + - No changes needed to existing code when adding providers + - Provider architecture is documented and easy to follow + - Code is organized and maintainable + + + diff --git a/prompts/app_spec_theme_customization.txt b/prompts/app_spec_theme_customization.txt new file mode 100644 index 0000000..f95e5d3 --- /dev/null +++ b/prompts/app_spec_theme_customization.txt @@ -0,0 +1,403 @@ + + Claude.ai Clone - Advanced Theme Customization + + + This specification adds advanced theme customization features to the Claude.ai clone application. + Users will be able to customize accent colors, font sizes, message spacing, and choose from + preset color themes. All changes are additive and backward-compatible with existing theme functionality. + + The existing light/dark mode toggle remains unchanged and functional. + + + + + - DO NOT modify existing light/dark mode functionality + - DO NOT break existing theme persistence + - DO NOT change existing CSS classes without ensuring backward compatibility + - All new theme options must be optional (defaults should match current behavior) + - Test thoroughly to ensure existing themes still work + - Maintain backward compatibility at all times + - New theme preferences should be stored separately from existing theme settings + + + + + + Advanced Theme Customization + + Add advanced theme customization options. Users should be able to: + - Customize accent colors (beyond just light/dark mode) + - Choose from preset color themes (blue, green, purple, orange) + - Adjust font size globally (small, medium, large) + - Adjust message spacing (compact, comfortable, spacious) + - Preview theme changes before applying + - Save custom theme preferences + + The customization interface should be intuitive and provide real-time preview + of changes before they are applied. All preferences should persist across sessions. + + 3 + style + + - Create a new "Appearance" or "Theme" section in settings + - Add accent color picker with preset options (blue, green, purple, orange) + - Add font size slider/selector (small, medium, large) + - Add message spacing selector (compact, comfortable, spacious) + - Implement preview functionality that shows changes in real-time + - Store theme preferences in localStorage or backend (user preferences) + - Apply theme using CSS custom properties (CSS variables) + - Ensure theme works with both light and dark modes + + + 1. Open settings menu + 2. Navigate to "Appearance" or "Theme" section + 3. Select a different accent color (e.g., green) + 4. Verify accent color changes are visible in preview + 5. Adjust font size slider to "large" + 6. Verify font size changes in preview + 7. Adjust message spacing option to "spacious" + 8. Verify spacing changes in preview + 9. Click "Preview" to see changes applied temporarily + 10. Click "Apply" to save changes permanently + 11. Verify changes persist after page refresh + 12. Test with both light and dark mode + 13. Test reset to default theme + 14. Verify existing conversations display correctly with new theme + + + + + Accent Color Customization + + Allow users to customize the accent color used throughout the application. + This includes: + - Primary button colors + - Link colors + - Focus states + - Active states + - Selection highlights + - Progress indicators + + Preset options: + - Blue (default, matches Claude.ai) + - Green + - Purple + - Orange + + Users should be able to see a preview of each color before applying. + + 3 + style + + - Define accent colors as CSS custom properties + - Create color palette for each preset (light and dark variants) + - Add color picker UI component in settings + - Update all accent color usages to use CSS variables + - Ensure colors have proper contrast ratios for accessibility + - Store selected accent color in user preferences + + + 1. Open theme settings + 2. Select "Green" accent color + 3. Verify buttons, links, and highlights use green + 4. Switch to dark mode and verify green accent still works + 5. Test all preset colors (blue, green, purple, orange) + 6. Verify color persists after refresh + 7. Test accessibility (contrast ratios) + + + + + Global Font Size Adjustment + + Allow users to adjust the global font size for better readability. + Options: + - Small (12px base) + - Medium (14px base, default) + - Large (16px base) + + Font size should scale proportionally across all text elements: + - Message text + - UI labels + - Input fields + - Buttons + - Sidebar text + + 3 + style + + - Use CSS rem units for all font sizes + - Set base font size on root element + - Create font size presets (small, medium, large) + - Add font size selector in settings + - Store preference in user settings + - Ensure responsive design still works with different font sizes + + + 1. Open theme settings + 2. Select "Small" font size + 3. Verify all text is smaller throughout the app + 4. Select "Large" font size + 5. Verify all text is larger throughout the app + 6. Verify layout doesn't break with different font sizes + 7. Test with long messages to ensure wrapping works + 8. Verify preference persists after refresh + + + + + Message Spacing Customization + + Allow users to adjust the spacing between messages and within message bubbles. + Options: + - Compact: Minimal spacing (for users who prefer dense layouts) + - Comfortable: Default spacing (current behavior) + - Spacious: Increased spacing (for better readability) + + This affects: + - Vertical spacing between messages + - Padding within message bubbles + - Spacing between message elements (avatar, text, timestamp) + + 3 + style + + - Define spacing scale using CSS custom properties + - Create spacing presets (compact, comfortable, spacious) + - Apply spacing to message containers and bubbles + - Add spacing selector in settings + - Store preference in user settings + - Ensure spacing works well with different font sizes + + + 1. Open theme settings + 2. Select "Compact" spacing + 3. Verify messages are closer together + 4. Select "Spacious" spacing + 5. Verify messages have more space between them + 6. Test with long conversations to ensure scrolling works + 7. Verify spacing preference persists after refresh + 8. Test with different font sizes to ensure compatibility + + + + + Theme Preview Functionality + + Allow users to preview theme changes before applying them permanently. + The preview should: + - Show a sample conversation with the new theme applied + - Update in real-time as settings are changed + - Allow users to cancel and revert to previous theme + - Show both light and dark mode previews if applicable + + Users should be able to: + - See preview immediately when changing settings + - Click "Apply" to save changes + - Click "Cancel" to discard changes + - Click "Reset" to return to default theme + + 3 + functional + + - Create preview component showing sample conversation + - Apply theme changes temporarily to preview + - Store original theme state for cancel functionality + - Update preview in real-time as settings change + - Only persist changes when "Apply" is clicked + - Show clear visual feedback for preview vs. applied state + + + 1. Open theme settings + 2. Change accent color to green + 3. Verify preview updates immediately + 4. Change font size to large + 5. Verify preview updates with new font size + 6. Click "Cancel" and verify changes are reverted + 7. Make changes again and click "Apply" + 8. Verify changes are saved and applied to actual interface + 9. Test preview with both light and dark mode + + + + + + + frontend/ + components/ + ThemeSettings.jsx # New theme customization UI (NEW) + ThemePreview.jsx # Preview component (NEW) + styles/ + theme-variables.css # CSS custom properties for themes (NEW) + accent-colors.css # Accent color definitions (NEW) + hooks/ + useTheme.js # Updated to handle new theme options + utils/ + themeStorage.js # Theme preference persistence (NEW) + + + + Use CSS custom properties (CSS variables) for all theme values: + - --accent-color-primary + - --accent-color-hover + - --font-size-base + - --message-spacing-vertical + - --message-padding + + This allows easy theme switching without JavaScript manipulation. + + + + Store theme preferences in: + - localStorage for client-side persistence + - Or backend user preferences table if available + + Structure: + { + accentColor: 'blue' | 'green' | 'purple' | 'orange', + fontSize: 'small' | 'medium' | 'large', + messageSpacing: 'compact' | 'comfortable' | 'spacious', + theme: 'light' | 'dark' (existing) + } + + + + - Keep existing theme functionality intact + - Default values should match current behavior + - Use feature detection for new theme features + - Gracefully degrade if CSS custom properties not supported + - Test with existing conversations and UI elements + - Ensure accessibility standards are maintained + + + + + + Settings panel for theme customization + + - Accent Color: Radio buttons or color swatches for preset colors + - Font Size: Slider or dropdown (small, medium, large) + - Message Spacing: Radio buttons (compact, comfortable, spacious) + - Preview: Live preview of theme changes + - Actions: Apply, Cancel, Reset buttons + + + + + Preview component showing sample conversation + + - Sample user message + - Sample AI response + - Shows current accent color + - Shows current font size + - Shows current spacing + - Updates in real-time + + + + + + + Define CSS variables for each accent color preset: + --accent-blue: #2563eb; + --accent-green: #10b981; + --accent-purple: #8b5cf6; + --accent-orange: #f59e0b; + + Each should have hover, active, and focus variants. + + + + Define base font sizes: + --font-size-small: 0.75rem; (12px) + --font-size-medium: 0.875rem; (14px, default) + --font-size-large: 1rem; (16px) + + + + Define spacing scales: + --spacing-compact: 0.5rem; + --spacing-comfortable: 1rem; (default) + --spacing-spacious: 1.5rem; + + + + + + If storing preferences in backend: + - GET /api/user/preferences - Get user theme preferences + - PUT /api/user/preferences - Update user theme preferences + - GET /api/user/preferences/theme - Get theme preferences only + + + + If using localStorage only, no API endpoints needed. + Backend storage is optional but recommended for multi-device sync. + + + + + - All accent colors must meet WCAG AA contrast ratios (4.5:1 for text) + - Font size changes must not break screen reader compatibility + - Theme settings must be keyboard navigable + - Color choices should not be the only way to convey information + - Provide high contrast mode option if possible + + + + + - Verify existing light/dark mode toggle still works + - Verify existing theme persistence still works + - Test that default theme matches current behavior + - Verify existing conversations display correctly + - Test that all UI elements are styled correctly + + + + - Test each accent color preset + - Test each font size option + - Test each spacing option + - Test theme preview functionality + - Test theme persistence (localStorage/backend) + - Test theme reset to defaults + - Test theme with both light and dark modes + - Test theme changes in real-time + + + + - Test with different browsers (Chrome, Firefox, Safari, Edge) + - Test with different screen sizes (responsive design) + - Test with long conversations + - Test with different message types (text, code, artifacts) + - Test accessibility with screen readers + + + + + + - Users can customize accent colors from preset options + - Users can adjust global font size (small, medium, large) + - Users can adjust message spacing (compact, comfortable, spacious) + - Theme preview shows changes in real-time + - Theme preferences persist across sessions + - Existing light/dark mode functionality works unchanged + - All theme options work together harmoniously + + + + - Theme customization is intuitive and easy to use + - Preview provides clear feedback before applying changes + - Changes apply smoothly without flickering + - Settings are easy to find and access + - Reset to defaults is easily accessible + + + + - Code is well-organized and maintainable + - CSS custom properties are used consistently + - Theme preferences are stored reliably + - No performance degradation with theme changes + - Backward compatibility is maintained + + + diff --git a/prompts/coding_prompt.md b/prompts/coding_prompt.md new file mode 100644 index 0000000..8b7eaae --- /dev/null +++ b/prompts/coding_prompt.md @@ -0,0 +1,304 @@ +## YOUR ROLE - CODING AGENT + +You are continuing work on a long-running autonomous development task. +This is a FRESH context window - you have no memory of previous sessions. + +You have access to Linear for project management via MCP tools. Linear is your +single source of truth for what needs to be built and what's been completed. + +### STEP 1: GET YOUR BEARINGS (MANDATORY) + +Start by orienting yourself: + +```bash +# 1. See your working directory +pwd + +# 2. List files to understand project structure +ls -la + +# 3. Read the project specification to understand what you're building +cat app_spec.txt + +# 4. Read the Linear project state +cat .linear_project.json + +# 5. Check recent git history +git log --oneline -20 +``` + +Understanding the `app_spec.txt` is critical - it contains the full requirements +for the application you're building. + +### STEP 2: CHECK LINEAR STATUS + +Query Linear to understand current project state. The `.linear_project.json` file +contains the `project_id` and `team_id` you should use for all Linear queries. + +1. **Find the META issue** for session context: + Use `mcp__linear__list_issues` with the project ID from `.linear_project.json` + and search for "[META] Project Progress Tracker". + Read the issue description and recent comments for context from previous sessions. + +2. **Count progress:** + Use `mcp__linear__list_issues` with the project ID to get all issues, then count: + - Issues with status "Done" = completed + - Issues with status "Todo" = remaining + - Issues with status "In Progress" = currently being worked on + +3. **Check for in-progress work:** + If any issue is "In Progress", that should be your first priority. + A previous session may have been interrupted. + +### STEP 3: START SERVERS (IF NOT RUNNING) + +If `init.sh` exists, run it: +```bash +chmod +x init.sh +./init.sh +``` + +Otherwise, start servers manually and document the process. + +### STEP 4: VERIFICATION TEST (CRITICAL!) + +**MANDATORY BEFORE NEW WORK:** + +The previous session may have introduced bugs. Before implementing anything +new, you MUST run verification tests. + +Use `mcp__linear__list_issues` with the project ID and status "Done" to find 1-2 +completed features that are core to the app's functionality. + +Test these through the browser using Puppeteer: +- Navigate to the feature +- Verify it still works as expected +- Take screenshots to confirm + +**If you find ANY issues (functional or visual):** +- Use `mcp__linear__update_issue` to set status back to "In Progress" +- Add a comment explaining what broke +- Fix the issue BEFORE moving to new features +- This includes UI bugs like: + * White-on-white text or poor contrast + * Random characters displayed + * Incorrect timestamps + * Layout issues or overflow + * Buttons too close together + * Missing hover states + * Console errors + +### STEP 5: SELECT NEXT ISSUE TO WORK ON + +Use `mcp__linear__list_issues` with the project ID from `.linear_project.json`: +- Filter by `status`: "Todo" +- Sort by priority (1=urgent is highest) +- `limit`: 5 + +Review the highest-priority unstarted issues and select ONE to work on. + +### STEP 6: CLAIM THE ISSUE + +Before starting work, use `mcp__linear__update_issue` to: +- Set the issue's `status` to "In Progress" + +This signals to any other agents (or humans watching) that this issue is being worked on. + +### STEP 7: IMPLEMENT THE FEATURE + +Read the issue description for test steps and implement accordingly: + +1. Write the code (frontend and/or backend as needed) +2. Test manually using browser automation (see Step 8) +3. Fix any issues discovered +4. Verify the feature works end-to-end + +### STEP 8: VERIFY WITH BROWSER AUTOMATION + +**CRITICAL:** You MUST verify features through the actual UI. + +Use browser automation tools: +- `mcp__puppeteer__puppeteer_navigate` - Start browser and go to URL +- `mcp__puppeteer__puppeteer_screenshot` - Capture screenshot +- `mcp__puppeteer__puppeteer_click` - Click elements +- `mcp__puppeteer__puppeteer_fill` - Fill form inputs + +**DO:** +- Test through the UI with clicks and keyboard input +- Take screenshots to verify visual appearance +- Check for console errors in browser +- Verify complete user workflows end-to-end + +**DON'T:** +- Only test with curl commands (backend testing alone is insufficient) +- Use JavaScript evaluation to bypass UI (no shortcuts) +- Skip visual verification +- Mark issues Done without thorough verification + +### STEP 9: UPDATE LINEAR ISSUE (CAREFULLY!) + +After thorough verification: + +1. **Add implementation comment** using `mcp__linear__create_comment`: + ```markdown + ## Implementation Complete + + ### Changes Made + - [List of files changed] + - [Key implementation details] + + ### Verification + - Tested via Puppeteer browser automation + - Screenshots captured + - All test steps from issue description verified + + ### Git Commit + [commit hash and message] + ``` + +2. **Update status** using `mcp__linear__update_issue`: + - Set `status` to "Done" + +**ONLY update status to Done AFTER:** +- All test steps in the issue description pass +- Visual verification via screenshots +- No console errors +- Code committed to git + +### STEP 10: COMMIT YOUR PROGRESS + +Make a descriptive git commit: +```bash +git add . +git commit -m "Implement [feature name] + +- Added [specific changes] +- Tested with browser automation +- Linear issue: [issue identifier] +" +``` + +### STEP 11: UPDATE META ISSUE + +Add a comment to the "[META] Project Progress Tracker" issue with session summary: + +```markdown +## Session Complete - [Brief description] + +### Completed This Session +- [Issue title]: [Brief summary of implementation] + +### Current Progress +- X issues Done +- Y issues In Progress +- Z issues remaining in Todo + +### Verification Status +- Ran verification tests on [feature names] +- All previously completed features still working: [Yes/No] + +### Notes for Next Session +- [Any important context] +- [Recommendations for what to work on next] +- [Any blockers or concerns] +``` + +### STEP 12: END SESSION CLEANLY + +Before context fills up: +1. Commit all working code +2. If working on an issue you can't complete: + - Add a comment explaining progress and what's left + - Keep status as "In Progress" (don't revert to Todo) +3. Update META issue with session summary +4. Ensure no uncommitted changes +5. Leave app in working state (no broken features) + +--- + +## LINEAR WORKFLOW RULES + +**Status Transitions:** +- Todo → In Progress (when you start working) +- In Progress → Done (when verified complete) +- Done → In Progress (only if regression found) + +**Comments Are Your Memory:** +- Every implementation gets a detailed comment +- Session handoffs happen via META issue comments +- Comments are permanent - future agents will read them + +**NEVER:** +- Delete or archive issues +- Modify issue descriptions or test steps +- Work on issues already "In Progress" by someone else +- Mark "Done" without verification +- Leave issues "In Progress" when switching to another issue + +--- + +## TESTING REQUIREMENTS + +**ALL testing must use browser automation tools.** + +Available Puppeteer tools: +- `mcp__puppeteer__puppeteer_navigate` - Go to URL +- `mcp__puppeteer__puppeteer_screenshot` - Capture screenshot +- `mcp__puppeteer__puppeteer_click` - Click elements +- `mcp__puppeteer__puppeteer_fill` - Fill form inputs +- `mcp__puppeteer__puppeteer_select` - Select dropdown options +- `mcp__puppeteer__puppeteer_hover` - Hover over elements + +Test like a human user with mouse and keyboard. Don't take shortcuts. + +--- + +## SESSION PACING + +**How many issues should you complete per session?** + +This depends on the project phase: + +**Early phase (< 20% Done):** You may complete multiple issues per session when: +- Setting up infrastructure/scaffolding that unlocks many issues at once +- Fixing build issues that were blocking progress +- Auditing existing code and marking already-implemented features as Done + +**Mid/Late phase (> 20% Done):** Slow down to **1-2 issues per session**: +- Each feature now requires focused implementation and testing +- Quality matters more than quantity +- Clean handoffs are critical + +**After completing an issue, ask yourself:** +1. Is the app in a stable, working state right now? +2. Have I been working for a while? (You can't measure this precisely, but use judgment) +3. Would this be a good stopping point for handoff? + +If yes to all three → proceed to Step 11 (session summary) and end cleanly. +If no → you may continue to the next issue, but **commit first** and stay aware. + +**Golden rule:** It's always better to end a session cleanly with good handoff notes +than to start another issue and risk running out of context mid-implementation. + +--- + +## IMPORTANT REMINDERS + +**Your Goal:** Production-quality application with all Linear issues Done + +**This Session's Goal:** Make meaningful progress with clean handoff + +**Priority:** Fix regressions before implementing new features + +**Quality Bar:** +- Zero console errors +- Polished UI matching the design in app_spec.txt +- All features work end-to-end through the UI +- Fast, responsive, professional + +**Context is finite.** You cannot monitor your context usage, so err on the side +of ending sessions early with good handoff notes. The next agent will continue. + +--- + +Begin by running Step 1 (Get Your Bearings). diff --git a/prompts/initializer_bis_prompt.md b/prompts/initializer_bis_prompt.md new file mode 100644 index 0000000..428eed3 --- /dev/null +++ b/prompts/initializer_bis_prompt.md @@ -0,0 +1,187 @@ +## YOUR ROLE - INITIALIZER BIS AGENT (Adding New Specifications) + +You are an EXTENSION agent in a long-running autonomous development process. +Your job is to ADD NEW SPECIFICATIONS to an EXISTING project that has already been initialized. + +**IMPORTANT:** This project already exists and has been initialized. You are NOT creating a new project. +You are ADDING new features based on a new specification file. + +You have access to Linear for project management via MCP tools. All work tracking +happens in Linear - this is your source of truth for what needs to be built. + +### FIRST: Understand the Existing Project + +Start by reading the existing project state: + +1. **Read `.linear_project.json`:** + ```bash + cat .linear_project.json + ``` + This file contains: + - `project_id`: The Linear project ID (you'll use this for new issues) + - `team_id`: The team ID (you'll use this for new issues) + - `meta_issue_id`: The META issue ID (you'll add a comment here) + - `total_issues`: Current total number of issues + +2. **Read the original `app_spec.txt`** (if it exists) to understand what was already built: + ```bash + cat app_spec.txt + ``` + +3. **Check existing Linear issues** to understand what's already been done: + Use `mcp__linear__list_issues` with the project ID from `.linear_project.json` + to see existing issues and their statuses. + +### SECOND: Read the New Specification File + +Read the NEW specification file that was provided. This file contains the ADDITIONAL +features to be added to the existing project. The filename will be something like +`app_spec_new1.txt` or similar. + +```bash +# List files to find the new spec file +ls -la *.txt + +# Read the new specification file +cat app_spec_new*.txt +# (or whatever the filename is) +``` + +Read it carefully to understand what NEW features need to be added. + +### CRITICAL TASK: Create NEW Linear Issues + +Based on the NEW specification file, create NEW Linear issues for each NEW feature +using the `mcp__linear__create_issue` tool. + +**IMPORTANT:** +- Use the EXISTING `project_id` and `team_id` from `.linear_project.json` +- Do NOT create a new Linear project +- Do NOT modify existing issues +- Only create NEW issues for the NEW features + +**For each NEW feature, create an issue with:** + +``` +title: Brief feature name (e.g., "New Feature - Advanced search") +teamId: [Use the team ID from .linear_project.json] +projectId: [Use the project ID from .linear_project.json] +description: Markdown with feature details and test steps (see template below) +priority: 1-4 based on importance (1=urgent/foundational, 4=low/polish) +``` + +**Issue Description Template:** +```markdown +## Feature Description +[Brief description of what this NEW feature does and why it matters] + +## Category +[functional OR style] + +## Test Steps +1. Navigate to [page/location] +2. [Specific action to perform] +3. [Another action] +4. Verify [expected result] +5. [Additional verification steps as needed] + +## Acceptance Criteria +- [ ] [Specific criterion 1] +- [ ] [Specific criterion 2] +- [ ] [Specific criterion 3] + +## Note +This is a NEW feature added via initializer bis. It extends the existing application. +``` + +**Requirements for NEW Linear Issues:** +- Create issues ONLY for NEW features from the new spec file +- Do NOT duplicate features that already exist +- Mix of functional and style features (note category in description) +- Order by priority: foundational features get priority 1-2, polish features get 3-4 +- Include detailed test steps in each issue description +- All issues start in "Todo" status (default) +- Prefix issue titles with something like "[NEW]" if helpful to distinguish from existing issues + +**Priority Guidelines:** +- Priority 1 (Urgent): Core infrastructure additions, critical new features +- Priority 2 (High): Important user-facing new features +- Priority 3 (Medium): Secondary new features, enhancements +- Priority 4 (Low): Polish, nice-to-haves, edge cases + +**CRITICAL INSTRUCTION:** +Once created, issues can ONLY have their status changed (Todo → In Progress → Done). +Never delete issues, never modify descriptions after creation. +This ensures no functionality is missed across sessions. + +### NEXT TASK: Update Linear Project State + +Update the `.linear_project.json` file to reflect the new total number of issues: + +1. Read the current `.linear_project.json` +2. Count how many NEW issues you created +3. Add that number to the existing `total_issues` count +4. Update the file with the new total + +Example update: +```json +{ + "initialized": true, + "created_at": "[original timestamp]", + "team_id": "[existing team ID]", + "project_id": "[existing project ID]", + "project_name": "[existing project name]", + "meta_issue_id": "[existing meta issue ID]", + "total_issues": [original_count + new_issues_count], + "notes": "Project initialized by initializer agent. Extended by initializer bis with [X] new issues." +} +``` + +### NEXT TASK: Update META Issue + +Add a comment to the existing "[META] Project Progress Tracker" issue (use the `meta_issue_id` +from `.linear_project.json`) summarizing what you accomplished: + +```markdown +## Initializer Bis Session Complete - New Specifications Added + +### Accomplished +- Read new specification file: [filename] +- Created [X] NEW Linear issues for additional features +- Updated .linear_project.json with new total issue count +- [Any other relevant information] + +### New Issues Created +- Total new issues: [X] +- Priority 1: [X] +- Priority 2: [X] +- Priority 3: [X] +- Priority 4: [X] + +### Updated Linear Status +- Previous total issues: [Y] +- New total issues: [Y + X] +- All new issues start in "Todo" status + +### Notes for Next Session +- [Any important context about the new features] +- [Recommendations for what to work on next] +- [Any dependencies or integration points with existing features] +``` + +### ENDING THIS SESSION + +Before your context fills up: +1. Commit all work with descriptive messages +2. Add a comment to the META issue (as described above) +3. Ensure `.linear_project.json` is updated with the new total +4. Leave the environment in a clean, working state + +The next agent (coding agent) will continue from here with a fresh context window and will +see both the original issues and the new issues you created. + +--- + +**Remember:** You are EXTENDING an existing project, not creating a new one. +Focus on adding the new features cleanly without breaking existing functionality. +Production-ready integration is the goal. diff --git a/prompts/initializer_prompt.md b/prompts/initializer_prompt.md new file mode 100644 index 0000000..12b46da --- /dev/null +++ b/prompts/initializer_prompt.md @@ -0,0 +1,202 @@ +## YOUR ROLE - INITIALIZER AGENT (Session 1 of Many) + +You are the FIRST agent in a long-running autonomous development process. +Your job is to set up the foundation for all future coding agents. + +You have access to Linear for project management via MCP tools. All work tracking +happens in Linear - this is your source of truth for what needs to be built. + +### FIRST: Read the Project Specification + +Start by reading `app_spec.txt` in your working directory. This file contains +the complete specification for what you need to build. Read it carefully +before proceeding. + +### SECOND: Set Up Linear Project + +Before creating issues, you need to set up Linear: + +1. **Get the team ID:** + Use `mcp__linear__list_teams` to see available teams. + Note the team ID (e.g., "TEAM-123") for the team where you'll create issues. + +2. **Create a Linear project:** + Use `mcp__linear__create_project` to create a new project: + - `name`: Use the project name from app_spec.txt (e.g., "Claude.ai Clone") + - `teamIds`: Array with your team ID + - `description`: Brief project overview from app_spec.txt + + Save the returned project ID - you'll use it when creating issues. + +### CRITICAL TASK: Create Linear Issues + +Based on `app_spec.txt`, create Linear issues for each feature using the +`mcp__linear__create_issue` tool. Create 50 detailed issues that +comprehensively cover all features in the spec. + +**For each feature, create an issue with:** + +``` +title: Brief feature name (e.g., "Auth - User login flow") +teamId: [Use the team ID you found earlier] +projectId: [Use the project ID from the project you created] +description: Markdown with feature details and test steps (see template below) +priority: 1-4 based on importance (1=urgent/foundational, 4=low/polish) +``` + +**Issue Description Template:** +```markdown +## Feature Description +[Brief description of what this feature does and why it matters] + +## Category +[functional OR style] + +## Test Steps +1. Navigate to [page/location] +2. [Specific action to perform] +3. [Another action] +4. Verify [expected result] +5. [Additional verification steps as needed] + +## Acceptance Criteria +- [ ] [Specific criterion 1] +- [ ] [Specific criterion 2] +- [ ] [Specific criterion 3] +``` + +**Requirements for Linear Issues:** +- Create 50 issues total covering all features in the spec +- Mix of functional and style features (note category in description) +- Order by priority: foundational features get priority 1-2, polish features get 3-4 +- Include detailed test steps in each issue description +- All issues start in "Todo" status (default) + +**Priority Guidelines:** +- Priority 1 (Urgent): Core infrastructure, database, basic UI layout +- Priority 2 (High): Primary user-facing features, authentication +- Priority 3 (Medium): Secondary features, enhancements +- Priority 4 (Low): Polish, nice-to-haves, edge cases + +**CRITICAL INSTRUCTION:** +Once created, issues can ONLY have their status changed (Todo → In Progress → Done). +Never delete issues, never modify descriptions after creation. +This ensures no functionality is missed across sessions. + +### NEXT TASK: Create Meta Issue for Session Tracking + +Create a special issue titled "[META] Project Progress Tracker" with: + +```markdown +## Project Overview +[Copy the project name and brief overview from app_spec.txt] + +## Session Tracking +This issue is used for session handoff between coding agents. +Each agent should add a comment summarizing their session. + +## Key Milestones +- [ ] Project setup complete +- [ ] Core infrastructure working +- [ ] Primary features implemented +- [ ] All features complete +- [ ] Polish and refinement done + +## Notes +[Any important context about the project] +``` + +This META issue will be used by all future agents to: +- Read context from previous sessions (via comments) +- Write session summaries before ending +- Track overall project milestones + +### NEXT TASK: Create init.sh + +Create a script called `init.sh` that future agents can use to quickly +set up and run the development environment. The script should: + +1. Install any required dependencies +2. Start any necessary servers or services +3. Print helpful information about how to access the running application + +Base the script on the technology stack specified in `app_spec.txt`. + +### NEXT TASK: Initialize Git + +Create a git repository and make your first commit with: +- init.sh (environment setup script) +- README.md (project overview and setup instructions) +- Any initial project structure files + +Commit message: "Initial setup: project structure and init script" + +### NEXT TASK: Create Project Structure + +Set up the basic project structure based on what's specified in `app_spec.txt`. +This typically includes directories for frontend, backend, and any other +components mentioned in the spec. + +### NEXT TASK: Save Linear Project State + +Create a file called `.linear_project.json` with the following information: +```json +{ + "initialized": true, + "created_at": "[current timestamp]", + "team_id": "[ID of the team you used]", + "project_id": "[ID of the Linear project you created]", + "project_name": "[Name of the project from app_spec.txt]", + "meta_issue_id": "[ID of the META issue you created]", + "total_issues": 50, + "notes": "Project initialized by initializer agent" +} +``` + +This file tells future sessions that Linear has been set up. + +### OPTIONAL: Start Implementation + +If you have time remaining in this session, you may begin implementing +the highest-priority features. Remember: +- Use `mcp__linear__linear_search_issues` to find Todo issues with priority 1 +- Use `mcp__linear__linear_update_issue` to set status to "In Progress" +- Work on ONE feature at a time +- Test thoroughly before marking status as "Done" +- Add a comment to the issue with implementation notes +- Commit your progress before session ends + +### ENDING THIS SESSION + +Before your context fills up: +1. Commit all work with descriptive messages +2. Add a comment to the META issue summarizing what you accomplished: + ```markdown + ## Session 1 Complete - Initialization + + ### Accomplished + - Created 50 Linear issues from app_spec.txt + - Set up project structure + - Created init.sh + - Initialized git repository + - [Any features started/completed] + + ### Linear Status + - Total issues: 50 + - Done: X + - In Progress: Y + - Todo: Z + + ### Notes for Next Session + - [Any important context] + - [Recommendations for what to work on next] + ``` +3. Ensure `.linear_project.json` exists +4. Leave the environment in a clean, working state + +The next agent will continue from here with a fresh context window. + +--- + +**Remember:** You have unlimited time across many sessions. Focus on +quality over speed. Production-ready is the goal. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0c981f6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +claude-code-sdk>=0.0.25 diff --git a/security.py b/security.py new file mode 100644 index 0000000..8605bcf --- /dev/null +++ b/security.py @@ -0,0 +1,359 @@ +""" +Security Hooks for Autonomous Coding Agent +========================================== + +Pre-tool-use hooks that validate bash commands for security. +Uses an allowlist approach - only explicitly permitted commands can run. +""" + +import os +import shlex + + +# Allowed commands for development tasks +# Minimal set needed for the autonomous coding demo +ALLOWED_COMMANDS = { + # File inspection + "ls", + "cat", + "head", + "tail", + "wc", + "grep", + # File operations (agent uses SDK tools for most file ops, but cp/mkdir needed occasionally) + "cp", + "mkdir", + "chmod", # For making scripts executable; validated separately + # Directory + "pwd", + # Node.js development + "npm", + "node", + # Version control + "git", + # Process management + "ps", + "lsof", + "sleep", + "pkill", # For killing dev servers; validated separately + # Script execution + "init.sh", # Init scripts; validated separately +} + +# Commands that need additional validation even when in the allowlist +COMMANDS_NEEDING_EXTRA_VALIDATION = {"pkill", "chmod", "init.sh"} + + +def split_command_segments(command_string: str) -> list[str]: + """ + Split a compound command into individual command segments. + + Handles command chaining (&&, ||, ;) but not pipes (those are single commands). + + Args: + command_string: The full shell command + + Returns: + List of individual command segments + """ + import re + + # Split on && and || while preserving the ability to handle each segment + # This regex splits on && or || that aren't inside quotes + segments = re.split(r"\s*(?:&&|\|\|)\s*", command_string) + + # Further split on semicolons + result = [] + for segment in segments: + sub_segments = re.split(r'(? list[str]: + """ + Extract command names from a shell command string. + + Handles pipes, command chaining (&&, ||, ;), and subshells. + Returns the base command names (without paths). + + Args: + command_string: The full shell command + + Returns: + List of command names found in the string + """ + commands = [] + + # shlex doesn't treat ; as a separator, so we need to pre-process + import re + + # Split on semicolons that aren't inside quotes (simple heuristic) + # This handles common cases like "echo hello; ls" + segments = re.split(r'(? tuple[bool, str]: + """ + Validate pkill commands - only allow killing dev-related processes. + + Uses shlex to parse the command, avoiding regex bypass vulnerabilities. + + Returns: + Tuple of (is_allowed, reason_if_blocked) + """ + # Allowed process names for pkill + allowed_process_names = { + "node", + "npm", + "npx", + "vite", + "next", + } + + try: + tokens = shlex.split(command_string) + except ValueError: + return False, "Could not parse pkill command" + + if not tokens: + return False, "Empty pkill command" + + # Separate flags from arguments + args = [] + for token in tokens[1:]: + if not token.startswith("-"): + args.append(token) + + if not args: + return False, "pkill requires a process name" + + # The target is typically the last non-flag argument + target = args[-1] + + # For -f flag (full command line match), extract the first word as process name + # e.g., "pkill -f 'node server.js'" -> target is "node server.js", process is "node" + if " " in target: + target = target.split()[0] + + if target in allowed_process_names: + return True, "" + return False, f"pkill only allowed for dev processes: {allowed_process_names}" + + +def validate_chmod_command(command_string: str) -> tuple[bool, str]: + """ + Validate chmod commands - only allow making files executable with +x. + + Returns: + Tuple of (is_allowed, reason_if_blocked) + """ + try: + tokens = shlex.split(command_string) + except ValueError: + return False, "Could not parse chmod command" + + if not tokens or tokens[0] != "chmod": + return False, "Not a chmod command" + + # Look for the mode argument + # Valid modes: +x, u+x, a+x, etc. (anything ending with +x for execute permission) + mode = None + files = [] + + for token in tokens[1:]: + if token.startswith("-"): + # Skip flags like -R (we don't allow recursive chmod anyway) + return False, "chmod flags are not allowed" + elif mode is None: + mode = token + else: + files.append(token) + + if mode is None: + return False, "chmod requires a mode" + + if not files: + return False, "chmod requires at least one file" + + # Only allow +x variants (making files executable) + # This matches: +x, u+x, g+x, o+x, a+x, ug+x, etc. + import re + + if not re.match(r"^[ugoa]*\+x$", mode): + return False, f"chmod only allowed with +x mode, got: {mode}" + + return True, "" + + +def validate_init_script(command_string: str) -> tuple[bool, str]: + """ + Validate init.sh script execution - only allow ./init.sh. + + Returns: + Tuple of (is_allowed, reason_if_blocked) + """ + try: + tokens = shlex.split(command_string) + except ValueError: + return False, "Could not parse init script command" + + if not tokens: + return False, "Empty command" + + # The command should be exactly ./init.sh (possibly with arguments) + script = tokens[0] + + # Allow ./init.sh or paths ending in /init.sh + if script == "./init.sh" or script.endswith("/init.sh"): + return True, "" + + return False, f"Only ./init.sh is allowed, got: {script}" + + +def get_command_for_validation(cmd: str, segments: list[str]) -> str: + """ + Find the specific command segment that contains the given command. + + Args: + cmd: The command name to find + segments: List of command segments + + Returns: + The segment containing the command, or empty string if not found + """ + for segment in segments: + segment_commands = extract_commands(segment) + if cmd in segment_commands: + return segment + return "" + + +async def bash_security_hook(input_data, tool_use_id=None, context=None): + """ + Pre-tool-use hook that validates bash commands using an allowlist. + + Only commands in ALLOWED_COMMANDS are permitted. + + Args: + input_data: Dict containing tool_name and tool_input + tool_use_id: Optional tool use ID + context: Optional context + + Returns: + Empty dict to allow, or {"decision": "block", "reason": "..."} to block + """ + if input_data.get("tool_name") != "Bash": + return {} + + command = input_data.get("tool_input", {}).get("command", "") + if not command: + return {} + + # Extract all commands from the command string + commands = extract_commands(command) + + if not commands: + # Could not parse - fail safe by blocking + return { + "decision": "block", + "reason": f"Could not parse command for security validation: {command}", + } + + # Split into segments for per-command validation + segments = split_command_segments(command) + + # Check each command against the allowlist + for cmd in commands: + if cmd not in ALLOWED_COMMANDS: + return { + "decision": "block", + "reason": f"Command '{cmd}' is not in the allowed commands list", + } + + # Additional validation for sensitive commands + if cmd in COMMANDS_NEEDING_EXTRA_VALIDATION: + # Find the specific segment containing this command + cmd_segment = get_command_for_validation(cmd, segments) + if not cmd_segment: + cmd_segment = command # Fallback to full command + + if cmd == "pkill": + allowed, reason = validate_pkill_command(cmd_segment) + if not allowed: + return {"decision": "block", "reason": reason} + elif cmd == "chmod": + allowed, reason = validate_chmod_command(cmd_segment) + if not allowed: + return {"decision": "block", "reason": reason} + elif cmd == "init.sh": + allowed, reason = validate_init_script(cmd_segment) + if not allowed: + return {"decision": "block", "reason": reason} + + return {} diff --git a/test_security.py b/test_security.py new file mode 100644 index 0000000..82d8a7a --- /dev/null +++ b/test_security.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python3 +""" +Security Hook Tests +=================== + +Tests for the bash command security validation logic. +Run with: python test_security.py +""" + +import asyncio +import sys + +from security import ( + bash_security_hook, + extract_commands, + validate_chmod_command, + validate_init_script, +) + + +def test_hook(command: str, should_block: bool) -> bool: + """Test a single command against the security hook.""" + input_data = {"tool_name": "Bash", "tool_input": {"command": command}} + result = asyncio.run(bash_security_hook(input_data)) + was_blocked = result.get("decision") == "block" + + if was_blocked == should_block: + status = "PASS" + else: + status = "FAIL" + expected = "blocked" if should_block else "allowed" + actual = "blocked" if was_blocked else "allowed" + reason = result.get("reason", "") + print(f" {status}: {command!r}") + print(f" Expected: {expected}, Got: {actual}") + if reason: + print(f" Reason: {reason}") + return False + + print(f" {status}: {command!r}") + return True + + +def test_extract_commands(): + """Test the command extraction logic.""" + print("\nTesting command extraction:\n") + passed = 0 + failed = 0 + + test_cases = [ + ("ls -la", ["ls"]), + ("npm install && npm run build", ["npm", "npm"]), + ("cat file.txt | grep pattern", ["cat", "grep"]), + ("/usr/bin/node script.js", ["node"]), + ("VAR=value ls", ["ls"]), + ("git status || git init", ["git", "git"]), + ] + + for cmd, expected in test_cases: + result = extract_commands(cmd) + if result == expected: + print(f" PASS: {cmd!r} -> {result}") + passed += 1 + else: + print(f" FAIL: {cmd!r}") + print(f" Expected: {expected}, Got: {result}") + failed += 1 + + return passed, failed + + +def test_validate_chmod(): + """Test chmod command validation.""" + print("\nTesting chmod validation:\n") + passed = 0 + failed = 0 + + # Test cases: (command, should_be_allowed, description) + test_cases = [ + # Allowed cases + ("chmod +x init.sh", True, "basic +x"), + ("chmod +x script.sh", True, "+x on any script"), + ("chmod u+x init.sh", True, "user +x"), + ("chmod a+x init.sh", True, "all +x"), + ("chmod ug+x init.sh", True, "user+group +x"), + ("chmod +x file1.sh file2.sh", True, "multiple files"), + # Blocked cases + ("chmod 777 init.sh", False, "numeric mode"), + ("chmod 755 init.sh", False, "numeric mode 755"), + ("chmod +w init.sh", False, "write permission"), + ("chmod +r init.sh", False, "read permission"), + ("chmod -x init.sh", False, "remove execute"), + ("chmod -R +x dir/", False, "recursive flag"), + ("chmod --recursive +x dir/", False, "long recursive flag"), + ("chmod +x", False, "missing file"), + ] + + for cmd, should_allow, description in test_cases: + allowed, reason = validate_chmod_command(cmd) + if allowed == should_allow: + print(f" PASS: {cmd!r} ({description})") + passed += 1 + else: + expected = "allowed" if should_allow else "blocked" + actual = "allowed" if allowed else "blocked" + print(f" FAIL: {cmd!r} ({description})") + print(f" Expected: {expected}, Got: {actual}") + if reason: + print(f" Reason: {reason}") + failed += 1 + + return passed, failed + + +def test_validate_init_script(): + """Test init.sh script execution validation.""" + print("\nTesting init.sh validation:\n") + passed = 0 + failed = 0 + + # Test cases: (command, should_be_allowed, description) + test_cases = [ + # Allowed cases + ("./init.sh", True, "basic ./init.sh"), + ("./init.sh arg1 arg2", True, "with arguments"), + ("/path/to/init.sh", True, "absolute path"), + ("../dir/init.sh", True, "relative path with init.sh"), + # Blocked cases + ("./setup.sh", False, "different script name"), + ("./init.py", False, "python script"), + ("bash init.sh", False, "bash invocation"), + ("sh init.sh", False, "sh invocation"), + ("./malicious.sh", False, "malicious script"), + ("./init.sh; rm -rf /", False, "command injection attempt"), + ] + + for cmd, should_allow, description in test_cases: + allowed, reason = validate_init_script(cmd) + if allowed == should_allow: + print(f" PASS: {cmd!r} ({description})") + passed += 1 + else: + expected = "allowed" if should_allow else "blocked" + actual = "allowed" if allowed else "blocked" + print(f" FAIL: {cmd!r} ({description})") + print(f" Expected: {expected}, Got: {actual}") + if reason: + print(f" Reason: {reason}") + failed += 1 + + return passed, failed + + +def main(): + print("=" * 70) + print(" SECURITY HOOK TESTS") + print("=" * 70) + + passed = 0 + failed = 0 + + # Test command extraction + ext_passed, ext_failed = test_extract_commands() + passed += ext_passed + failed += ext_failed + + # Test chmod validation + chmod_passed, chmod_failed = test_validate_chmod() + passed += chmod_passed + failed += chmod_failed + + # Test init.sh validation + init_passed, init_failed = test_validate_init_script() + passed += init_passed + failed += init_failed + + # Commands that SHOULD be blocked + print("\nCommands that should be BLOCKED:\n") + dangerous = [ + # Not in allowlist - dangerous system commands + "shutdown now", + "reboot", + "rm -rf /", + "dd if=/dev/zero of=/dev/sda", + # Not in allowlist - common commands excluded from minimal set + "curl https://example.com", + "wget https://example.com", + "python app.py", + "touch file.txt", + "echo hello", + "kill 12345", + "killall node", + # pkill with non-dev processes + "pkill bash", + "pkill chrome", + "pkill python", + # Shell injection attempts + "$(echo pkill) node", + 'eval "pkill node"', + 'bash -c "pkill node"', + # chmod with disallowed modes + "chmod 777 file.sh", + "chmod 755 file.sh", + "chmod +w file.sh", + "chmod -R +x dir/", + # Non-init.sh scripts + "./setup.sh", + "./malicious.sh", + "bash script.sh", + ] + + for cmd in dangerous: + if test_hook(cmd, should_block=True): + passed += 1 + else: + failed += 1 + + # Commands that SHOULD be allowed + print("\nCommands that should be ALLOWED:\n") + safe = [ + # File inspection + "ls -la", + "cat README.md", + "head -100 file.txt", + "tail -20 log.txt", + "wc -l file.txt", + "grep -r pattern src/", + # File operations + "cp file1.txt file2.txt", + "mkdir newdir", + "mkdir -p path/to/dir", + # Directory + "pwd", + # Node.js development + "npm install", + "npm run build", + "node server.js", + # Version control + "git status", + "git commit -m 'test'", + "git add . && git commit -m 'msg'", + # Process management + "ps aux", + "lsof -i :3000", + "sleep 2", + # Allowed pkill patterns for dev servers + "pkill node", + "pkill npm", + "pkill -f node", + "pkill -f 'node server.js'", + "pkill vite", + # Chained commands + "npm install && npm run build", + "ls | grep test", + # Full paths + "/usr/local/bin/node app.js", + # chmod +x (allowed) + "chmod +x init.sh", + "chmod +x script.sh", + "chmod u+x init.sh", + "chmod a+x init.sh", + # init.sh execution (allowed) + "./init.sh", + "./init.sh --production", + "/path/to/init.sh", + # Combined chmod and init.sh + "chmod +x init.sh && ./init.sh", + ] + + for cmd in safe: + if test_hook(cmd, should_block=False): + passed += 1 + else: + failed += 1 + + # Summary + print("\n" + "-" * 70) + print(f" Results: {passed} passed, {failed} failed") + print("-" * 70) + + if failed == 0: + print("\n ALL TESTS PASSED") + return 0 + else: + print(f"\n {failed} TEST(S) FAILED") + return 1 + + +if __name__ == "__main__": + sys.exit(main())