Initial commit: Linear-integrated autonomous coding agent with Initializer Bis support
This commit is contained in:
9
.claude/settings.local.json
Normal file
9
.claude/settings.local.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(test:*)",
|
||||||
|
"Bash(cat:*)",
|
||||||
|
"Bash(netstat:*)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Agent-generated output directories
|
||||||
|
generations/
|
||||||
|
|
||||||
|
# Log files
|
||||||
|
logs/
|
||||||
|
|
||||||
|
.env
|
||||||
|
venv
|
||||||
|
__pycache__
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 Cole Medin
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
280
README.md
Normal file
280
README.md
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
# Autonomous Coding Agent Demo (Linear-Integrated)
|
||||||
|
|
||||||
|
A minimal harness demonstrating long-running autonomous coding with the Claude Agent SDK. This demo implements a two-agent pattern (initializer + coding agent) with **Linear as the core project management system** for tracking all work.
|
||||||
|
|
||||||
|
## Key Features
|
||||||
|
|
||||||
|
- **Linear Integration**: All work is tracked as Linear issues, not local files
|
||||||
|
- **Real-time Visibility**: Watch agent progress directly in your Linear workspace
|
||||||
|
- **Session Handoff**: Agents communicate via Linear comments, not text files
|
||||||
|
- **Two-Agent Pattern**: Initializer creates Linear project & issues, coding agents implement them
|
||||||
|
- **Initializer Bis**: Add new features to existing projects without re-initializing
|
||||||
|
- **Browser Testing**: Puppeteer MCP for UI verification
|
||||||
|
- **Claude Opus 4.5**: Uses Claude's most capable model by default
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
### 1. Install Claude Code CLI and Python SDK
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install Claude Code CLI (latest version required)
|
||||||
|
npm install -g @anthropic-ai/claude-code
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Set Up Authentication
|
||||||
|
|
||||||
|
You need two authentication tokens:
|
||||||
|
|
||||||
|
**Claude Code OAuth Token:**
|
||||||
|
```bash
|
||||||
|
# Generate the token using Claude Code CLI
|
||||||
|
claude setup-token
|
||||||
|
|
||||||
|
# Set the environment variable
|
||||||
|
export CLAUDE_CODE_OAUTH_TOKEN='your-oauth-token-here'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Linear API Key:**
|
||||||
|
```bash
|
||||||
|
# Get your API key from: https://linear.app/YOUR-TEAM/settings/api
|
||||||
|
export LINEAR_API_KEY='lin_api_xxxxxxxxxxxxx'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Verify Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
claude --version # Should be latest version
|
||||||
|
pip show claude-code-sdk # Check SDK is installed
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Initialize a new project
|
||||||
|
python autonomous_agent_demo.py --project-dir ./my_project
|
||||||
|
|
||||||
|
# Add new features to an existing project
|
||||||
|
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_theme_customization.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
For testing with limited iterations:
|
||||||
|
```bash
|
||||||
|
python autonomous_agent_demo.py --project-dir ./my_project --max-iterations 3
|
||||||
|
```
|
||||||
|
|
||||||
|
## How It Works
|
||||||
|
|
||||||
|
### Linear-Centric Workflow
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ LINEAR-INTEGRATED WORKFLOW │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ app_spec.txt ──► Initializer Agent ──► Linear Issues (50) │
|
||||||
|
│ │ │
|
||||||
|
│ ┌─────────────────────────▼──────────┐ │
|
||||||
|
│ │ LINEAR WORKSPACE │ │
|
||||||
|
│ │ ┌────────────────────────────┐ │ │
|
||||||
|
│ │ │ Issue: Auth - Login flow │ │ │
|
||||||
|
│ │ │ Status: Todo → In Progress │ │ │
|
||||||
|
│ │ │ Comments: [session notes] │ │ │
|
||||||
|
│ │ └────────────────────────────┘ │ │
|
||||||
|
│ └────────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ Coding Agent queries Linear │
|
||||||
|
│ ├── Search for Todo issues │
|
||||||
|
│ ├── Update status to In Progress │
|
||||||
|
│ ├── Implement & test with Puppeteer │
|
||||||
|
│ ├── Add comment with implementation notes│
|
||||||
|
│ └── Update status to Done │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Two-Agent Pattern
|
||||||
|
|
||||||
|
1. **Initializer Agent (Session 1):**
|
||||||
|
- Reads `app_spec.txt`
|
||||||
|
- Lists teams and creates a new Linear project
|
||||||
|
- Creates 50 Linear issues with detailed test steps
|
||||||
|
- Creates a META issue for session tracking
|
||||||
|
- Sets up project structure, `init.sh`, and git
|
||||||
|
|
||||||
|
2. **Coding Agent (Sessions 2+):**
|
||||||
|
- Queries Linear for highest-priority Todo issue
|
||||||
|
- Runs verification tests on previously completed features
|
||||||
|
- Claims issue (status → In Progress)
|
||||||
|
- Implements the feature
|
||||||
|
- Tests via Puppeteer browser automation
|
||||||
|
- Adds implementation comment to issue
|
||||||
|
- Marks complete (status → Done)
|
||||||
|
- Updates META issue with session summary
|
||||||
|
|
||||||
|
### Initializer Bis: Adding New Features
|
||||||
|
|
||||||
|
The **Initializer Bis** agent allows you to add new features to an existing project without re-initializing it. This is useful when you want to extend your application with additional functionality.
|
||||||
|
|
||||||
|
**How it works:**
|
||||||
|
1. Create a new specification file (e.g., `app_spec_theme_customization.txt`) in the `prompts/` directory
|
||||||
|
2. Run the agent with `--new-spec` flag pointing to your new spec file
|
||||||
|
3. The Initializer Bis agent will:
|
||||||
|
- Read the existing project state from `.linear_project.json`
|
||||||
|
- Read the new specification file
|
||||||
|
- Create new Linear issues for each `<feature>` tag in the spec
|
||||||
|
- Add these issues to the existing Linear project
|
||||||
|
- Update the META issue with information about the new features
|
||||||
|
- Copy the new spec file to the project directory
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
# Add theme customization features to an existing project
|
||||||
|
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_theme_customization.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
This will create multiple Linear issues (one per `<feature>` tag) that will be worked on by subsequent coding agent sessions.
|
||||||
|
|
||||||
|
### Session Handoff via Linear
|
||||||
|
|
||||||
|
Instead of local text files, agents communicate through:
|
||||||
|
- **Issue Comments**: Implementation details, blockers, context
|
||||||
|
- **META Issue**: Session summaries and handoff notes
|
||||||
|
- **Issue Status**: Todo / In Progress / Done workflow
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description | Required |
|
||||||
|
|----------|-------------|----------|
|
||||||
|
| `CLAUDE_CODE_OAUTH_TOKEN` | Claude Code OAuth token (from `claude setup-token`) | Yes |
|
||||||
|
| `LINEAR_API_KEY` | Linear API key for MCP access | Yes |
|
||||||
|
|
||||||
|
## Command Line Options
|
||||||
|
|
||||||
|
| Option | Description | Default |
|
||||||
|
|--------|-------------|---------|
|
||||||
|
| `--project-dir` | Directory for the project | `./autonomous_demo_project` |
|
||||||
|
| `--max-iterations` | Max agent iterations | Unlimited |
|
||||||
|
| `--model` | Claude model to use | `claude-opus-4-5-20251101` |
|
||||||
|
| `--new-spec` | Name of new specification file to add (e.g., 'app_spec_new1.txt'). Use this to add new features to an existing project. | None |
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
linear-agent-harness/
|
||||||
|
├── autonomous_agent_demo.py # Main entry point
|
||||||
|
├── agent.py # Agent session logic
|
||||||
|
├── client.py # Claude SDK + MCP client configuration
|
||||||
|
├── security.py # Bash command allowlist and validation
|
||||||
|
├── progress.py # Progress tracking utilities
|
||||||
|
├── prompts.py # Prompt loading utilities
|
||||||
|
├── linear_config.py # Linear configuration constants
|
||||||
|
├── prompts/
|
||||||
|
│ ├── app_spec.txt # Application specification
|
||||||
|
│ ├── app_spec_theme_customization.txt # Example: Theme customization spec
|
||||||
|
│ ├── app_spec_mistral_extensible.txt # Example: Mistral provider spec
|
||||||
|
│ ├── initializer_prompt.md # First session prompt (creates Linear issues)
|
||||||
|
│ ├── initializer_bis_prompt.md # Prompt for adding new features
|
||||||
|
│ └── coding_prompt.md # Continuation session prompt (works issues)
|
||||||
|
└── requirements.txt # Python dependencies
|
||||||
|
```
|
||||||
|
|
||||||
|
## Generated Project Structure
|
||||||
|
|
||||||
|
After running, your project directory will contain:
|
||||||
|
|
||||||
|
```
|
||||||
|
my_project/
|
||||||
|
├── .linear_project.json # Linear project state (marker file)
|
||||||
|
├── app_spec.txt # Copied specification
|
||||||
|
├── app_spec_theme_customization.txt # New spec file (if using --new-spec)
|
||||||
|
├── init.sh # Environment setup script
|
||||||
|
├── .claude_settings.json # Security settings
|
||||||
|
└── [application files] # Generated application code
|
||||||
|
```
|
||||||
|
|
||||||
|
## MCP Servers Used
|
||||||
|
|
||||||
|
| Server | Transport | Purpose |
|
||||||
|
|--------|-----------|---------|
|
||||||
|
| **Linear** | HTTP (Streamable HTTP) | Project management - issues, status, comments |
|
||||||
|
| **Puppeteer** | stdio | Browser automation for UI testing |
|
||||||
|
|
||||||
|
## Security Model
|
||||||
|
|
||||||
|
This demo uses defense-in-depth security (see `security.py` and `client.py`):
|
||||||
|
|
||||||
|
1. **OS-level Sandbox:** Bash commands run in an isolated environment
|
||||||
|
2. **Filesystem Restrictions:** File operations restricted to project directory
|
||||||
|
3. **Bash Allowlist:** Only specific commands permitted (npm, node, git, etc.)
|
||||||
|
4. **MCP Permissions:** Tools explicitly allowed in security settings
|
||||||
|
|
||||||
|
## Linear Setup
|
||||||
|
|
||||||
|
Before running, ensure you have:
|
||||||
|
|
||||||
|
1. A Linear workspace with at least one team
|
||||||
|
2. An API key with read/write permissions (from Settings > API)
|
||||||
|
3. The agent will automatically detect your team and create a project
|
||||||
|
|
||||||
|
The initializer agent will create:
|
||||||
|
- A new Linear project named after your app
|
||||||
|
- 50 feature issues based on `app_spec.txt`
|
||||||
|
- 1 META issue for session tracking and handoff
|
||||||
|
|
||||||
|
All subsequent coding agents will work from this Linear project.
|
||||||
|
|
||||||
|
## Customization
|
||||||
|
|
||||||
|
### Changing the Application
|
||||||
|
|
||||||
|
Edit `prompts/app_spec.txt` to specify a different application to build.
|
||||||
|
|
||||||
|
### Adding New Features to Existing Projects
|
||||||
|
|
||||||
|
1. Create a new specification file in `prompts/` directory (e.g., `app_spec_new_feature.txt`)
|
||||||
|
2. Format it with `<feature>` tags following the same structure as `app_spec.txt`
|
||||||
|
3. Run with `--new-spec` flag:
|
||||||
|
```bash
|
||||||
|
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_new_feature.txt
|
||||||
|
```
|
||||||
|
4. The Initializer Bis agent will create new Linear issues for each feature in the spec file
|
||||||
|
|
||||||
|
### Adjusting Issue Count
|
||||||
|
|
||||||
|
Edit `prompts/initializer_prompt.md` and change "50 issues" to your desired count.
|
||||||
|
|
||||||
|
### Modifying Allowed Commands
|
||||||
|
|
||||||
|
Edit `security.py` to add or remove commands from `ALLOWED_COMMANDS`.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**"CLAUDE_CODE_OAUTH_TOKEN not set"**
|
||||||
|
Run `claude setup-token` to generate a token, then export it.
|
||||||
|
|
||||||
|
**"LINEAR_API_KEY not set"**
|
||||||
|
Get your API key from `https://linear.app/YOUR-TEAM/settings/api`
|
||||||
|
|
||||||
|
**"Appears to hang on first run"**
|
||||||
|
Normal behavior. The initializer is creating a Linear project and 50 issues with detailed descriptions. Watch for `[Tool: mcp__linear__create_issue]` output.
|
||||||
|
|
||||||
|
**"Command blocked by security hook"**
|
||||||
|
The agent tried to run a disallowed command. Add it to `ALLOWED_COMMANDS` in `security.py` if needed.
|
||||||
|
|
||||||
|
**"MCP server connection failed"**
|
||||||
|
Verify your `LINEAR_API_KEY` is valid and has appropriate permissions. The Linear MCP server uses HTTP transport at `https://mcp.linear.app/mcp`.
|
||||||
|
|
||||||
|
## Viewing Progress
|
||||||
|
|
||||||
|
Open your Linear workspace to see:
|
||||||
|
- The project created by the initializer agent
|
||||||
|
- All 50 issues organized under the project
|
||||||
|
- Real-time status changes (Todo → In Progress → Done)
|
||||||
|
- Implementation comments on each issue
|
||||||
|
- Session summaries on the META issue
|
||||||
|
- New issues added by Initializer Bis when using `--new-spec`
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT License - see [LICENSE](LICENSE) for details.
|
||||||
231
agent.py
Normal file
231
agent.py
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
"""
|
||||||
|
Agent Session Logic
|
||||||
|
===================
|
||||||
|
|
||||||
|
Core agent interaction functions for running autonomous coding sessions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from claude_code_sdk import ClaudeSDKClient
|
||||||
|
|
||||||
|
from client import create_client
|
||||||
|
from progress import print_session_header, print_progress_summary, is_linear_initialized
|
||||||
|
from prompts import (
|
||||||
|
get_initializer_prompt,
|
||||||
|
get_initializer_bis_prompt,
|
||||||
|
get_coding_prompt,
|
||||||
|
copy_spec_to_project,
|
||||||
|
copy_new_spec_to_project,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
AUTO_CONTINUE_DELAY_SECONDS = 3
|
||||||
|
|
||||||
|
|
||||||
|
async def run_agent_session(
|
||||||
|
client: ClaudeSDKClient,
|
||||||
|
message: str,
|
||||||
|
project_dir: Path,
|
||||||
|
) -> tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Run a single agent session using Claude Agent SDK.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: Claude SDK client
|
||||||
|
message: The prompt to send
|
||||||
|
project_dir: Project directory path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(status, response_text) where status is:
|
||||||
|
- "continue" if agent should continue working
|
||||||
|
- "error" if an error occurred
|
||||||
|
"""
|
||||||
|
print("Sending prompt to Claude Agent SDK...\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Send the query
|
||||||
|
await client.query(message)
|
||||||
|
|
||||||
|
# Collect response text and show tool use
|
||||||
|
response_text = ""
|
||||||
|
async for msg in client.receive_response():
|
||||||
|
msg_type = type(msg).__name__
|
||||||
|
|
||||||
|
# Handle AssistantMessage (text and tool use)
|
||||||
|
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||||
|
for block in msg.content:
|
||||||
|
block_type = type(block).__name__
|
||||||
|
|
||||||
|
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||||
|
response_text += block.text
|
||||||
|
print(block.text, end="", flush=True)
|
||||||
|
elif block_type == "ToolUseBlock" and hasattr(block, "name"):
|
||||||
|
print(f"\n[Tool: {block.name}]", flush=True)
|
||||||
|
if hasattr(block, "input"):
|
||||||
|
input_str = str(block.input)
|
||||||
|
if len(input_str) > 200:
|
||||||
|
print(f" Input: {input_str[:200]}...", flush=True)
|
||||||
|
else:
|
||||||
|
print(f" Input: {input_str}", flush=True)
|
||||||
|
|
||||||
|
# Handle UserMessage (tool results)
|
||||||
|
elif msg_type == "UserMessage" and hasattr(msg, "content"):
|
||||||
|
for block in msg.content:
|
||||||
|
block_type = type(block).__name__
|
||||||
|
|
||||||
|
if block_type == "ToolResultBlock":
|
||||||
|
result_content = getattr(block, "content", "")
|
||||||
|
is_error = getattr(block, "is_error", False)
|
||||||
|
|
||||||
|
# Check if command was blocked by security hook
|
||||||
|
if "blocked" in str(result_content).lower():
|
||||||
|
print(f" [BLOCKED] {result_content}", flush=True)
|
||||||
|
elif is_error:
|
||||||
|
# Show errors (truncated)
|
||||||
|
error_str = str(result_content)[:500]
|
||||||
|
print(f" [Error] {error_str}", flush=True)
|
||||||
|
else:
|
||||||
|
# Tool succeeded - just show brief confirmation
|
||||||
|
print(" [Done]", flush=True)
|
||||||
|
|
||||||
|
print("\n" + "-" * 70 + "\n")
|
||||||
|
return "continue", response_text
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during agent session: {e}")
|
||||||
|
return "error", str(e)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_autonomous_agent(
|
||||||
|
project_dir: Path,
|
||||||
|
model: str,
|
||||||
|
max_iterations: Optional[int] = None,
|
||||||
|
new_spec_filename: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Run the autonomous agent loop.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
project_dir: Directory for the project
|
||||||
|
model: Claude model to use
|
||||||
|
max_iterations: Maximum number of iterations (None for unlimited)
|
||||||
|
"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" AUTONOMOUS CODING AGENT DEMO")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"\nProject directory: {project_dir}")
|
||||||
|
print(f"Model: {model}")
|
||||||
|
if max_iterations:
|
||||||
|
print(f"Max iterations: {max_iterations}")
|
||||||
|
else:
|
||||||
|
print("Max iterations: Unlimited (will run until completion)")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Create project directory
|
||||||
|
project_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Check if this is a fresh start, continuation, or adding new specs
|
||||||
|
# We use .linear_project.json as the marker for initialization
|
||||||
|
is_first_run = not is_linear_initialized(project_dir)
|
||||||
|
use_initializer_bis = new_spec_filename is not None and not is_first_run
|
||||||
|
|
||||||
|
if is_first_run:
|
||||||
|
print("Fresh start - will use initializer agent")
|
||||||
|
print()
|
||||||
|
print("=" * 70)
|
||||||
|
print(" NOTE: First session takes 10-20+ minutes!")
|
||||||
|
print(" The agent is creating 50 Linear issues and setting up the project.")
|
||||||
|
print(" This may appear to hang - it's working. Watch for [Tool: ...] output.")
|
||||||
|
print("=" * 70)
|
||||||
|
print()
|
||||||
|
# Copy the app spec into the project directory for the agent to read
|
||||||
|
copy_spec_to_project(project_dir)
|
||||||
|
elif use_initializer_bis:
|
||||||
|
print("Adding new specifications - will use initializer bis agent")
|
||||||
|
print()
|
||||||
|
print("=" * 70)
|
||||||
|
print(f" NOTE: Adding new features from {new_spec_filename}")
|
||||||
|
print(" The agent will create new Linear issues for the additional features.")
|
||||||
|
print(" This may take several minutes. Watch for [Tool: ...] output.")
|
||||||
|
print("=" * 70)
|
||||||
|
print()
|
||||||
|
# Copy the new spec file into the project directory
|
||||||
|
copy_new_spec_to_project(project_dir, new_spec_filename)
|
||||||
|
print_progress_summary(project_dir)
|
||||||
|
else:
|
||||||
|
print("Continuing existing project (Linear initialized)")
|
||||||
|
print_progress_summary(project_dir)
|
||||||
|
|
||||||
|
# Main loop
|
||||||
|
iteration = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
iteration += 1
|
||||||
|
|
||||||
|
# Check max iterations
|
||||||
|
if max_iterations and iteration > max_iterations:
|
||||||
|
print(f"\nReached max iterations ({max_iterations})")
|
||||||
|
print("To continue, run the script again without --max-iterations")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Print session header
|
||||||
|
is_initializer_session = is_first_run or (use_initializer_bis and iteration == 1)
|
||||||
|
is_bis_session = use_initializer_bis and iteration == 1
|
||||||
|
print_session_header(iteration, is_initializer_session, is_bis_session)
|
||||||
|
|
||||||
|
# Create client (fresh context)
|
||||||
|
client = create_client(project_dir, model)
|
||||||
|
|
||||||
|
# Choose prompt based on session type
|
||||||
|
if is_first_run:
|
||||||
|
prompt = get_initializer_prompt()
|
||||||
|
is_first_run = False # Only use initializer once
|
||||||
|
elif use_initializer_bis and iteration == 1:
|
||||||
|
prompt = get_initializer_bis_prompt()
|
||||||
|
use_initializer_bis = False # Only use initializer bis once
|
||||||
|
else:
|
||||||
|
prompt = get_coding_prompt()
|
||||||
|
|
||||||
|
# Run session with async context manager
|
||||||
|
async with client:
|
||||||
|
status, response = await run_agent_session(client, prompt, project_dir)
|
||||||
|
|
||||||
|
# Handle status
|
||||||
|
if status == "continue":
|
||||||
|
print(f"\nAgent will auto-continue in {AUTO_CONTINUE_DELAY_SECONDS}s...")
|
||||||
|
print_progress_summary(project_dir)
|
||||||
|
await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
|
||||||
|
|
||||||
|
elif status == "error":
|
||||||
|
print("\nSession encountered an error")
|
||||||
|
print("Will retry with a fresh session...")
|
||||||
|
await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
|
||||||
|
|
||||||
|
# Small delay between sessions
|
||||||
|
if max_iterations is None or iteration < max_iterations:
|
||||||
|
print("\nPreparing next session...\n")
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
# Final summary
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" SESSION COMPLETE")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"\nProject directory: {project_dir}")
|
||||||
|
print_progress_summary(project_dir)
|
||||||
|
|
||||||
|
# Print instructions for running the generated application
|
||||||
|
print("\n" + "-" * 70)
|
||||||
|
print(" TO RUN THE GENERATED APPLICATION:")
|
||||||
|
print("-" * 70)
|
||||||
|
print(f"\n cd {project_dir.resolve()}")
|
||||||
|
print(" ./init.sh # Run the setup script")
|
||||||
|
print(" # Or manually:")
|
||||||
|
print(" npm install && npm run dev")
|
||||||
|
print("\n Then open http://localhost:3000 (or check init.sh for the URL)")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
print("\nDone!")
|
||||||
138
autonomous_agent_demo.py
Normal file
138
autonomous_agent_demo.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Autonomous Coding Agent Demo
|
||||||
|
============================
|
||||||
|
|
||||||
|
A minimal harness demonstrating long-running autonomous coding with Claude.
|
||||||
|
This script implements the two-agent pattern (initializer + coding agent) and
|
||||||
|
incorporates all the strategies from the long-running agents guide.
|
||||||
|
|
||||||
|
Example Usage:
|
||||||
|
python autonomous_agent_demo.py --project-dir ./claude_clone_demo
|
||||||
|
python autonomous_agent_demo.py --project-dir ./claude_clone_demo --max-iterations 5
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from agent import run_autonomous_agent
|
||||||
|
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
# Using Claude Opus 4.5 as default for best coding and agentic performance
|
||||||
|
# See: https://www.anthropic.com/news/claude-opus-4-5
|
||||||
|
DEFAULT_MODEL = "claude-opus-4-5-20251101"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
"""Parse command line arguments."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Autonomous Coding Agent Demo - Long-running agent harness",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
# Start fresh project
|
||||||
|
python autonomous_agent_demo.py --project-dir ./claude_clone
|
||||||
|
|
||||||
|
# Use a specific model
|
||||||
|
python autonomous_agent_demo.py --project-dir ./claude_clone --model claude-sonnet-4-5-20250929
|
||||||
|
|
||||||
|
# Limit iterations for testing
|
||||||
|
python autonomous_agent_demo.py --project-dir ./claude_clone --max-iterations 5
|
||||||
|
|
||||||
|
# Continue existing project
|
||||||
|
python autonomous_agent_demo.py --project-dir ./claude_clone
|
||||||
|
|
||||||
|
# Add new specifications to existing project
|
||||||
|
python autonomous_agent_demo.py --project-dir ./claude_clone --new-spec app_spec_new1.txt
|
||||||
|
|
||||||
|
Environment Variables:
|
||||||
|
CLAUDE_CODE_OAUTH_TOKEN Claude Code OAuth token (required)
|
||||||
|
LINEAR_API_KEY Linear API key (required)
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--project-dir",
|
||||||
|
type=Path,
|
||||||
|
default=Path("./autonomous_demo_project"),
|
||||||
|
help="Directory for the project (default: generations/autonomous_demo_project). Relative paths automatically placed in generations/ directory.",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-iterations",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Maximum number of agent iterations (default: unlimited)",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--model",
|
||||||
|
type=str,
|
||||||
|
default=DEFAULT_MODEL,
|
||||||
|
help=f"Claude model to use (default: {DEFAULT_MODEL})",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--new-spec",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Name of new specification file to add (e.g., 'app_spec_new1.txt'). Use this to add new features to an existing project.",
|
||||||
|
)
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Main entry point."""
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
# Check for Claude Code OAuth token
|
||||||
|
if not os.environ.get("CLAUDE_CODE_OAUTH_TOKEN"):
|
||||||
|
print("Error: CLAUDE_CODE_OAUTH_TOKEN environment variable not set")
|
||||||
|
print("\nRun 'claude setup-token' after installing the Claude Code CLI.")
|
||||||
|
print("\nThen set it:")
|
||||||
|
print(" export CLAUDE_CODE_OAUTH_TOKEN='your-token-here'")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check for Linear API key
|
||||||
|
if not os.environ.get("LINEAR_API_KEY"):
|
||||||
|
print("Error: LINEAR_API_KEY environment variable not set")
|
||||||
|
print("\nGet your API key from: https://linear.app/YOUR-TEAM/settings/api")
|
||||||
|
print("\nThen set it:")
|
||||||
|
print(" export LINEAR_API_KEY='lin_api_xxxxxxxxxxxxx'")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Automatically place projects in generations/ directory unless already specified
|
||||||
|
project_dir = args.project_dir
|
||||||
|
if not str(project_dir).startswith("generations/"):
|
||||||
|
# Convert relative paths to be under generations/
|
||||||
|
if project_dir.is_absolute():
|
||||||
|
# If absolute path, use as-is
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Prepend generations/ to relative paths
|
||||||
|
project_dir = Path("generations") / project_dir
|
||||||
|
|
||||||
|
# Run the agent
|
||||||
|
try:
|
||||||
|
asyncio.run(
|
||||||
|
run_autonomous_agent(
|
||||||
|
project_dir=project_dir,
|
||||||
|
model=args.model,
|
||||||
|
max_iterations=args.max_iterations,
|
||||||
|
new_spec_filename=args.new_spec,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\nInterrupted by user")
|
||||||
|
print("To resume, run the same command again")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nFatal error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
169
client.py
Normal file
169
client.py
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
"""
|
||||||
|
Claude SDK Client Configuration
|
||||||
|
===============================
|
||||||
|
|
||||||
|
Functions for creating and configuring the Claude Agent SDK client.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from claude_code_sdk import ClaudeCodeOptions, ClaudeSDKClient
|
||||||
|
from claude_code_sdk.types import HookMatcher
|
||||||
|
|
||||||
|
from security import bash_security_hook
|
||||||
|
|
||||||
|
|
||||||
|
# Puppeteer MCP tools for browser automation
|
||||||
|
PUPPETEER_TOOLS = [
|
||||||
|
"mcp__puppeteer__puppeteer_navigate",
|
||||||
|
"mcp__puppeteer__puppeteer_screenshot",
|
||||||
|
"mcp__puppeteer__puppeteer_click",
|
||||||
|
"mcp__puppeteer__puppeteer_fill",
|
||||||
|
"mcp__puppeteer__puppeteer_select",
|
||||||
|
"mcp__puppeteer__puppeteer_hover",
|
||||||
|
"mcp__puppeteer__puppeteer_evaluate",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Linear MCP tools for project management
|
||||||
|
# Official Linear MCP server at mcp.linear.app
|
||||||
|
LINEAR_TOOLS = [
|
||||||
|
# Team & Project discovery
|
||||||
|
"mcp__linear__list_teams",
|
||||||
|
"mcp__linear__get_team",
|
||||||
|
"mcp__linear__list_projects",
|
||||||
|
"mcp__linear__get_project",
|
||||||
|
"mcp__linear__create_project",
|
||||||
|
"mcp__linear__update_project",
|
||||||
|
# Issue management
|
||||||
|
"mcp__linear__list_issues",
|
||||||
|
"mcp__linear__get_issue",
|
||||||
|
"mcp__linear__create_issue",
|
||||||
|
"mcp__linear__update_issue",
|
||||||
|
"mcp__linear__list_my_issues",
|
||||||
|
# Comments
|
||||||
|
"mcp__linear__list_comments",
|
||||||
|
"mcp__linear__create_comment",
|
||||||
|
# Workflow
|
||||||
|
"mcp__linear__list_issue_statuses",
|
||||||
|
"mcp__linear__get_issue_status",
|
||||||
|
"mcp__linear__list_issue_labels",
|
||||||
|
# Users
|
||||||
|
"mcp__linear__list_users",
|
||||||
|
"mcp__linear__get_user",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Built-in tools
|
||||||
|
BUILTIN_TOOLS = [
|
||||||
|
"Read",
|
||||||
|
"Write",
|
||||||
|
"Edit",
|
||||||
|
"Glob",
|
||||||
|
"Grep",
|
||||||
|
"Bash",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_client(project_dir: Path, model: str) -> ClaudeSDKClient:
|
||||||
|
"""
|
||||||
|
Create a Claude Agent SDK client with multi-layered security.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
project_dir: Directory for the project
|
||||||
|
model: Claude model to use
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured ClaudeSDKClient
|
||||||
|
|
||||||
|
Security layers (defense in depth):
|
||||||
|
1. Sandbox - OS-level bash command isolation prevents filesystem escape
|
||||||
|
2. Permissions - File operations restricted to project_dir only
|
||||||
|
3. Security hooks - Bash commands validated against an allowlist
|
||||||
|
(see security.py for ALLOWED_COMMANDS)
|
||||||
|
"""
|
||||||
|
api_key = os.environ.get("CLAUDE_CODE_OAUTH_TOKEN")
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError(
|
||||||
|
"CLAUDE_CODE_OAUTH_TOKEN environment variable not set.\n"
|
||||||
|
"Run 'claude setup-token after installing the Claude Code CLI."
|
||||||
|
)
|
||||||
|
|
||||||
|
linear_api_key = os.environ.get("LINEAR_API_KEY")
|
||||||
|
if not linear_api_key:
|
||||||
|
raise ValueError(
|
||||||
|
"LINEAR_API_KEY environment variable not set.\n"
|
||||||
|
"Get your API key from: https://linear.app/YOUR-TEAM/settings/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create comprehensive security settings
|
||||||
|
# Note: Using relative paths ("./**") restricts access to project directory
|
||||||
|
# since cwd is set to project_dir
|
||||||
|
security_settings = {
|
||||||
|
"sandbox": {"enabled": True, "autoAllowBashIfSandboxed": True},
|
||||||
|
"permissions": {
|
||||||
|
"defaultMode": "acceptEdits", # Auto-approve edits within allowed directories
|
||||||
|
"allow": [
|
||||||
|
# Allow all file operations within the project directory
|
||||||
|
"Read(./**)",
|
||||||
|
"Write(./**)",
|
||||||
|
"Edit(./**)",
|
||||||
|
"Glob(./**)",
|
||||||
|
"Grep(./**)",
|
||||||
|
# Bash permission granted here, but actual commands are validated
|
||||||
|
# by the bash_security_hook (see security.py for allowed commands)
|
||||||
|
"Bash(*)",
|
||||||
|
# Allow Puppeteer MCP tools for browser automation
|
||||||
|
*PUPPETEER_TOOLS,
|
||||||
|
# Allow Linear MCP tools for project management
|
||||||
|
*LINEAR_TOOLS,
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure project directory exists before creating settings file
|
||||||
|
project_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Write settings to a file in the project directory
|
||||||
|
settings_file = project_dir / ".claude_settings.json"
|
||||||
|
with open(settings_file, "w") as f:
|
||||||
|
json.dump(security_settings, f, indent=2)
|
||||||
|
|
||||||
|
print(f"Created security settings at {settings_file}")
|
||||||
|
print(" - Sandbox enabled (OS-level bash isolation)")
|
||||||
|
print(f" - Filesystem restricted to: {project_dir.resolve()}")
|
||||||
|
print(" - Bash commands restricted to allowlist (see security.py)")
|
||||||
|
print(" - MCP servers: puppeteer (browser automation), linear (project management)")
|
||||||
|
print()
|
||||||
|
|
||||||
|
return ClaudeSDKClient(
|
||||||
|
options=ClaudeCodeOptions(
|
||||||
|
model=model,
|
||||||
|
system_prompt="You are an expert full-stack developer building a production-quality web application. You use Linear for project management and tracking all your work.",
|
||||||
|
allowed_tools=[
|
||||||
|
*BUILTIN_TOOLS,
|
||||||
|
*PUPPETEER_TOOLS,
|
||||||
|
*LINEAR_TOOLS,
|
||||||
|
],
|
||||||
|
mcp_servers={
|
||||||
|
"puppeteer": {"command": "npx", "args": ["puppeteer-mcp-server"]},
|
||||||
|
# Linear MCP with Streamable HTTP transport (recommended over SSE)
|
||||||
|
# See: https://linear.app/docs/mcp
|
||||||
|
"linear": {
|
||||||
|
"type": "http",
|
||||||
|
"url": "https://mcp.linear.app/mcp",
|
||||||
|
"headers": {
|
||||||
|
"Authorization": f"Bearer {linear_api_key}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
hooks={
|
||||||
|
"PreToolUse": [
|
||||||
|
HookMatcher(matcher="Bash", hooks=[bash_security_hook]),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
max_turns=1000,
|
||||||
|
cwd=str(project_dir.resolve()),
|
||||||
|
settings=str(settings_file.resolve()), # Use absolute path
|
||||||
|
)
|
||||||
|
)
|
||||||
38
linear_config.py
Normal file
38
linear_config.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
"""
|
||||||
|
Linear Configuration
|
||||||
|
====================
|
||||||
|
|
||||||
|
Configuration constants for Linear integration.
|
||||||
|
These values are used in prompts and for project state management.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Environment variables (must be set before running)
|
||||||
|
LINEAR_API_KEY = os.environ.get("LINEAR_API_KEY")
|
||||||
|
LINEAR_TEAM_ID = os.environ.get("LINEAR_TEAM_ID")
|
||||||
|
|
||||||
|
# Default number of issues to create (can be overridden via command line)
|
||||||
|
DEFAULT_ISSUE_COUNT = 50
|
||||||
|
|
||||||
|
# Issue status workflow (Linear default states)
|
||||||
|
STATUS_TODO = "Todo"
|
||||||
|
STATUS_IN_PROGRESS = "In Progress"
|
||||||
|
STATUS_DONE = "Done"
|
||||||
|
|
||||||
|
# Label categories (map to feature types)
|
||||||
|
LABEL_FUNCTIONAL = "functional"
|
||||||
|
LABEL_STYLE = "style"
|
||||||
|
LABEL_INFRASTRUCTURE = "infrastructure"
|
||||||
|
|
||||||
|
# Priority mapping (Linear uses 0-4 where 1=Urgent, 4=Low, 0=No priority)
|
||||||
|
PRIORITY_URGENT = 1
|
||||||
|
PRIORITY_HIGH = 2
|
||||||
|
PRIORITY_MEDIUM = 3
|
||||||
|
PRIORITY_LOW = 4
|
||||||
|
|
||||||
|
# Local marker file to track Linear project initialization
|
||||||
|
LINEAR_PROJECT_MARKER = ".linear_project.json"
|
||||||
|
|
||||||
|
# Meta issue title for project tracking and session handoff
|
||||||
|
META_ISSUE_TITLE = "[META] Project Progress Tracker"
|
||||||
86
progress.py
Normal file
86
progress.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
"""
|
||||||
|
Progress Tracking Utilities
|
||||||
|
===========================
|
||||||
|
|
||||||
|
Functions for tracking and displaying progress of the autonomous coding agent.
|
||||||
|
Progress is tracked via Linear issues, with local state cached in .linear_project.json.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from linear_config import LINEAR_PROJECT_MARKER
|
||||||
|
|
||||||
|
|
||||||
|
def load_linear_project_state(project_dir: Path) -> dict | None:
|
||||||
|
"""
|
||||||
|
Load the Linear project state from the marker file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
project_dir: Directory containing .linear_project.json
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Project state dict or None if not initialized
|
||||||
|
"""
|
||||||
|
marker_file = project_dir / LINEAR_PROJECT_MARKER
|
||||||
|
|
||||||
|
if not marker_file.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(marker_file, "r") as f:
|
||||||
|
return json.load(f)
|
||||||
|
except (json.JSONDecodeError, IOError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def is_linear_initialized(project_dir: Path) -> bool:
|
||||||
|
"""
|
||||||
|
Check if Linear project has been initialized.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
project_dir: Directory to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if .linear_project.json exists and is valid
|
||||||
|
"""
|
||||||
|
state = load_linear_project_state(project_dir)
|
||||||
|
return state is not None and state.get("initialized", False)
|
||||||
|
|
||||||
|
|
||||||
|
def print_session_header(session_num: int, is_initializer: bool, is_initializer_bis: bool = False) -> None:
|
||||||
|
"""Print a formatted header for the session."""
|
||||||
|
if is_initializer_bis:
|
||||||
|
session_type = "INITIALIZER BIS"
|
||||||
|
elif is_initializer:
|
||||||
|
session_type = "INITIALIZER"
|
||||||
|
else:
|
||||||
|
session_type = "CODING AGENT"
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(f" SESSION {session_num}: {session_type}")
|
||||||
|
print("=" * 70)
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def print_progress_summary(project_dir: Path) -> None:
|
||||||
|
"""
|
||||||
|
Print a summary of current progress.
|
||||||
|
|
||||||
|
Since actual progress is tracked in Linear, this reads the local
|
||||||
|
state file for cached information. The agent updates Linear directly
|
||||||
|
and reports progress in session comments.
|
||||||
|
"""
|
||||||
|
state = load_linear_project_state(project_dir)
|
||||||
|
|
||||||
|
if state is None:
|
||||||
|
print("\nProgress: Linear project not yet initialized")
|
||||||
|
return
|
||||||
|
|
||||||
|
total = state.get("total_issues", 0)
|
||||||
|
meta_issue = state.get("meta_issue_id", "unknown")
|
||||||
|
|
||||||
|
print(f"\nLinear Project Status:")
|
||||||
|
print(f" Total issues created: {total}")
|
||||||
|
print(f" META issue ID: {meta_issue}")
|
||||||
|
print(f" (Check Linear for current Done/In Progress/Todo counts)")
|
||||||
63
prompts.py
Normal file
63
prompts.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
"""
|
||||||
|
Prompt Loading Utilities
|
||||||
|
========================
|
||||||
|
|
||||||
|
Functions for loading prompt templates from the prompts directory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
PROMPTS_DIR = Path(__file__).parent / "prompts"
|
||||||
|
|
||||||
|
|
||||||
|
def load_prompt(name: str) -> str:
|
||||||
|
"""Load a prompt template from the prompts directory."""
|
||||||
|
prompt_path = PROMPTS_DIR / f"{name}.md"
|
||||||
|
return prompt_path.read_text()
|
||||||
|
|
||||||
|
|
||||||
|
def get_initializer_prompt() -> str:
|
||||||
|
"""Load the initializer prompt."""
|
||||||
|
return load_prompt("initializer_prompt")
|
||||||
|
|
||||||
|
|
||||||
|
def get_coding_prompt() -> str:
|
||||||
|
"""Load the coding agent prompt."""
|
||||||
|
return load_prompt("coding_prompt")
|
||||||
|
|
||||||
|
|
||||||
|
def copy_spec_to_project(project_dir: Path) -> None:
|
||||||
|
"""Copy the app spec file into the project directory for the agent to read."""
|
||||||
|
spec_source = PROMPTS_DIR / "app_spec.txt"
|
||||||
|
spec_dest = project_dir / "app_spec.txt"
|
||||||
|
if not spec_dest.exists():
|
||||||
|
shutil.copy(spec_source, spec_dest)
|
||||||
|
print("Copied app_spec.txt to project directory")
|
||||||
|
|
||||||
|
|
||||||
|
############################################################################################
|
||||||
|
# New specifications added by davebb
|
||||||
|
############################################################################################
|
||||||
|
|
||||||
|
def get_initializer_bis_prompt() -> str:
|
||||||
|
"""Load the initializer bis prompt for adding new specifications."""
|
||||||
|
return load_prompt("initializer_bis_prompt")
|
||||||
|
|
||||||
|
|
||||||
|
def copy_new_spec_to_project(project_dir: Path, new_spec_filename: str) -> None:
|
||||||
|
"""
|
||||||
|
Copy a new specification file into the project directory for the agent to read.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
project_dir: Project directory path
|
||||||
|
new_spec_filename: Name of the new spec file (e.g., "app_spec_new1.txt")
|
||||||
|
"""
|
||||||
|
spec_source = PROMPTS_DIR / new_spec_filename
|
||||||
|
if not spec_source.exists():
|
||||||
|
raise FileNotFoundError(f"New specification file not found: {spec_source}")
|
||||||
|
|
||||||
|
spec_dest = project_dir / new_spec_filename
|
||||||
|
shutil.copy(spec_source, spec_dest)
|
||||||
|
print(f"Copied {new_spec_filename} to project directory")
|
||||||
681
prompts/app_spec.txt
Normal file
681
prompts/app_spec.txt
Normal file
@@ -0,0 +1,681 @@
|
|||||||
|
<project_specification>
|
||||||
|
<project_name>Claude.ai Clone - AI Chat Interface</project_name>
|
||||||
|
|
||||||
|
<overview>
|
||||||
|
Build a fully functional clone of claude.ai, Anthropic's conversational AI interface. The application should
|
||||||
|
provide a clean, modern chat interface for interacting with Claude via the API, including features like
|
||||||
|
conversation management, artifact rendering, project organization, multiple model selection, and advanced
|
||||||
|
settings. The UI should closely match claude.ai's design using Tailwind CSS with a focus on excellent
|
||||||
|
user experience and responsive design.
|
||||||
|
</overview>
|
||||||
|
|
||||||
|
<technology_stack>
|
||||||
|
<api_key>
|
||||||
|
You can use an API key located at /tmp/api-key for testing. You will not be allowed to read this file, but you can reference it in code.
|
||||||
|
</api_key>
|
||||||
|
<frontend>
|
||||||
|
<framework>React with Vite</framework>
|
||||||
|
<styling>Tailwind CSS (via CDN)</styling>
|
||||||
|
<state_management>React hooks and context</state_management>
|
||||||
|
<routing>React Router for navigation</routing>
|
||||||
|
<markdown>React Markdown for message rendering</markdown>
|
||||||
|
<code_highlighting>Syntax highlighting for code blocks</code_highlighting>
|
||||||
|
<port>Only launch on port {frontend_port}</port>
|
||||||
|
</frontend>
|
||||||
|
<backend>
|
||||||
|
<runtime>Node.js with Express</runtime>
|
||||||
|
<database>SQLite with better-sqlite3</database>
|
||||||
|
<api_integration>Claude API for chat completions</api_integration>
|
||||||
|
<streaming>Server-Sent Events for streaming responses</streaming>
|
||||||
|
</backend>
|
||||||
|
<communication>
|
||||||
|
<api>RESTful endpoints</api>
|
||||||
|
<streaming>SSE for real-time message streaming</streaming>
|
||||||
|
<claude_api>Integration with Claude API using Anthropic SDK</claude_api>
|
||||||
|
</communication>
|
||||||
|
</technology_stack>
|
||||||
|
|
||||||
|
<prerequisites>
|
||||||
|
<environment_setup>
|
||||||
|
- Repository includes .env with VITE_ANTHROPIC_API_KEY configured
|
||||||
|
- Frontend dependencies pre-installed via pnpm
|
||||||
|
- Backend code goes in /server directory
|
||||||
|
- Install backend dependencies as needed
|
||||||
|
</environment_setup>
|
||||||
|
</prerequisites>
|
||||||
|
|
||||||
|
<core_features>
|
||||||
|
<chat_interface>
|
||||||
|
- Clean, centered chat layout with message bubbles
|
||||||
|
- Streaming message responses with typing indicator
|
||||||
|
- Markdown rendering with proper formatting
|
||||||
|
- Code blocks with syntax highlighting and copy button
|
||||||
|
- LaTeX/math equation rendering
|
||||||
|
- Image upload and display in messages
|
||||||
|
- Multi-turn conversations with context
|
||||||
|
- Message editing and regeneration
|
||||||
|
- Stop generation button during streaming
|
||||||
|
- Input field with auto-resize textarea
|
||||||
|
- Character count and token estimation
|
||||||
|
- Keyboard shortcuts (Enter to send, Shift+Enter for newline)
|
||||||
|
</chat_interface>
|
||||||
|
|
||||||
|
<artifacts>
|
||||||
|
- Artifact detection and rendering in side panel
|
||||||
|
- Code artifact viewer with syntax highlighting
|
||||||
|
- HTML/SVG preview with live rendering
|
||||||
|
- React component preview
|
||||||
|
- Mermaid diagram rendering
|
||||||
|
- Text document artifacts
|
||||||
|
- Artifact editing and re-prompting
|
||||||
|
- Full-screen artifact view
|
||||||
|
- Download artifact content
|
||||||
|
- Artifact versioning and history
|
||||||
|
</artifacts>
|
||||||
|
|
||||||
|
<conversation_management>
|
||||||
|
- Create new conversations
|
||||||
|
- Conversation list in sidebar
|
||||||
|
- Rename conversations
|
||||||
|
- Delete conversations
|
||||||
|
- Search conversations by title/content
|
||||||
|
- Pin important conversations
|
||||||
|
- Archive conversations
|
||||||
|
- Conversation folders/organization
|
||||||
|
- Duplicate conversation
|
||||||
|
- Export conversation (JSON, Markdown, PDF)
|
||||||
|
- Conversation timestamps (created, last updated)
|
||||||
|
- Unread message indicators
|
||||||
|
</conversation_management>
|
||||||
|
|
||||||
|
<projects>
|
||||||
|
- Create projects to group related conversations
|
||||||
|
- Project knowledge base (upload documents)
|
||||||
|
- Project-specific custom instructions
|
||||||
|
- Share projects with team (mock feature)
|
||||||
|
- Project settings and configuration
|
||||||
|
- Move conversations between projects
|
||||||
|
- Project templates
|
||||||
|
- Project analytics (usage stats)
|
||||||
|
</projects>
|
||||||
|
|
||||||
|
<model_selection>
|
||||||
|
- Model selector dropdown with the following models:
|
||||||
|
- Claude Sonnet 4.5 (claude-sonnet-4-5-20250929) - default
|
||||||
|
- Claude Haiku 4.5 (claude-haiku-4-5-20251001)
|
||||||
|
- Claude Opus 4.1 (claude-opus-4-1-20250805)
|
||||||
|
- Model capabilities display
|
||||||
|
- Context window indicator
|
||||||
|
- Model-specific pricing info (display only)
|
||||||
|
- Switch models mid-conversation
|
||||||
|
- Model comparison view
|
||||||
|
</model_selection>
|
||||||
|
|
||||||
|
<custom_instructions>
|
||||||
|
- Global custom instructions
|
||||||
|
- Project-specific custom instructions
|
||||||
|
- Conversation-specific system prompts
|
||||||
|
- Custom instruction templates
|
||||||
|
- Preview how instructions affect responses
|
||||||
|
</custom_instructions>
|
||||||
|
|
||||||
|
<settings_preferences>
|
||||||
|
- Theme selection (Light, Dark, Auto)
|
||||||
|
- Font size adjustment
|
||||||
|
- Message density (compact, comfortable, spacious)
|
||||||
|
- Code theme selection
|
||||||
|
- Language preferences
|
||||||
|
- Accessibility options
|
||||||
|
- Keyboard shortcuts reference
|
||||||
|
- Data export options
|
||||||
|
- Privacy settings
|
||||||
|
- API key management
|
||||||
|
</settings_preferences>
|
||||||
|
|
||||||
|
<advanced_features>
|
||||||
|
- Temperature control slider
|
||||||
|
- Max tokens adjustment
|
||||||
|
- Top-p (nucleus sampling) control
|
||||||
|
- System prompt override
|
||||||
|
- Thinking/reasoning mode toggle
|
||||||
|
- Multi-modal input (text + images)
|
||||||
|
- Voice input (optional, mock UI)
|
||||||
|
- Response suggestions
|
||||||
|
- Related prompts
|
||||||
|
- Conversation branching
|
||||||
|
</advanced_features>
|
||||||
|
|
||||||
|
<collaboration>
|
||||||
|
- Share conversation via link (read-only)
|
||||||
|
- Export conversation formats
|
||||||
|
- Conversation templates
|
||||||
|
- Prompt library
|
||||||
|
- Share artifacts
|
||||||
|
- Team workspaces (mock UI)
|
||||||
|
</collaboration>
|
||||||
|
|
||||||
|
<search_discovery>
|
||||||
|
- Search across all conversations
|
||||||
|
- Filter by project, date, model
|
||||||
|
- Prompt library with categories
|
||||||
|
- Example conversations
|
||||||
|
- Quick actions menu
|
||||||
|
- Command palette (Cmd/Ctrl+K)
|
||||||
|
</search_discovery>
|
||||||
|
|
||||||
|
<usage_tracking>
|
||||||
|
- Token usage display per message
|
||||||
|
- Conversation cost estimation
|
||||||
|
- Daily/monthly usage dashboard
|
||||||
|
- Usage limits and warnings
|
||||||
|
- API quota tracking
|
||||||
|
</usage_tracking>
|
||||||
|
|
||||||
|
<onboarding>
|
||||||
|
- Welcome screen for new users
|
||||||
|
- Feature tour highlights
|
||||||
|
- Example prompts to get started
|
||||||
|
- Quick tips and best practices
|
||||||
|
- Keyboard shortcuts tutorial
|
||||||
|
</onboarding>
|
||||||
|
|
||||||
|
<accessibility>
|
||||||
|
- Full keyboard navigation
|
||||||
|
- Screen reader support
|
||||||
|
- ARIA labels and roles
|
||||||
|
- High contrast mode
|
||||||
|
- Focus management
|
||||||
|
- Reduced motion support
|
||||||
|
</accessibility>
|
||||||
|
|
||||||
|
<responsive_design>
|
||||||
|
- Mobile-first responsive layout
|
||||||
|
- Touch-optimized interface
|
||||||
|
- Collapsible sidebar on mobile
|
||||||
|
- Swipe gestures for navigation
|
||||||
|
- Adaptive artifact display
|
||||||
|
- Progressive Web App (PWA) support
|
||||||
|
</responsive_design>
|
||||||
|
</core_features>
|
||||||
|
|
||||||
|
<database_schema>
|
||||||
|
<tables>
|
||||||
|
<users>
|
||||||
|
- id, email, name, avatar_url
|
||||||
|
- created_at, last_login
|
||||||
|
- preferences (JSON: theme, font_size, etc.)
|
||||||
|
- custom_instructions
|
||||||
|
</users>
|
||||||
|
|
||||||
|
<projects>
|
||||||
|
- id, user_id, name, description, color
|
||||||
|
- custom_instructions, knowledge_base_path
|
||||||
|
- created_at, updated_at
|
||||||
|
- is_archived, is_pinned
|
||||||
|
</projects>
|
||||||
|
|
||||||
|
<conversations>
|
||||||
|
- id, user_id, project_id, title
|
||||||
|
- model, created_at, updated_at, last_message_at
|
||||||
|
- is_archived, is_pinned, is_deleted
|
||||||
|
- settings (JSON: temperature, max_tokens, etc.)
|
||||||
|
- token_count, message_count
|
||||||
|
</conversations>
|
||||||
|
|
||||||
|
<messages>
|
||||||
|
- id, conversation_id, role (user/assistant/system)
|
||||||
|
- content, created_at, edited_at
|
||||||
|
- tokens, finish_reason
|
||||||
|
- images (JSON array of image data)
|
||||||
|
- parent_message_id (for branching)
|
||||||
|
</messages>
|
||||||
|
|
||||||
|
<artifacts>
|
||||||
|
- id, message_id, conversation_id
|
||||||
|
- type (code/html/svg/react/mermaid/text)
|
||||||
|
- title, identifier, language
|
||||||
|
- content, version
|
||||||
|
- created_at, updated_at
|
||||||
|
</artifacts>
|
||||||
|
|
||||||
|
<shared_conversations>
|
||||||
|
- id, conversation_id, share_token
|
||||||
|
- created_at, expires_at, view_count
|
||||||
|
- is_public
|
||||||
|
</shared_conversations>
|
||||||
|
|
||||||
|
<prompt_library>
|
||||||
|
- id, user_id, title, description
|
||||||
|
- prompt_template, category, tags (JSON)
|
||||||
|
- is_public, usage_count
|
||||||
|
- created_at, updated_at
|
||||||
|
</prompt_library>
|
||||||
|
|
||||||
|
<conversation_folders>
|
||||||
|
- id, user_id, project_id, name, parent_folder_id
|
||||||
|
- created_at, position
|
||||||
|
</conversation_folders>
|
||||||
|
|
||||||
|
<conversation_folder_items>
|
||||||
|
- id, folder_id, conversation_id
|
||||||
|
</conversation_folder_items>
|
||||||
|
|
||||||
|
<usage_tracking>
|
||||||
|
- id, user_id, conversation_id, message_id
|
||||||
|
- model, input_tokens, output_tokens
|
||||||
|
- cost_estimate, created_at
|
||||||
|
</usage_tracking>
|
||||||
|
|
||||||
|
<api_keys>
|
||||||
|
- id, user_id, key_name, api_key_hash
|
||||||
|
- created_at, last_used_at
|
||||||
|
- is_active
|
||||||
|
</api_keys>
|
||||||
|
</tables>
|
||||||
|
</database_schema>
|
||||||
|
|
||||||
|
<api_endpoints_summary>
|
||||||
|
<authentication>
|
||||||
|
- POST /api/auth/login
|
||||||
|
- POST /api/auth/logout
|
||||||
|
- GET /api/auth/me
|
||||||
|
- PUT /api/auth/profile
|
||||||
|
</authentication>
|
||||||
|
|
||||||
|
<conversations>
|
||||||
|
- GET /api/conversations
|
||||||
|
- POST /api/conversations
|
||||||
|
- GET /api/conversations/:id
|
||||||
|
- PUT /api/conversations/:id
|
||||||
|
- DELETE /api/conversations/:id
|
||||||
|
- POST /api/conversations/:id/duplicate
|
||||||
|
- POST /api/conversations/:id/export
|
||||||
|
- PUT /api/conversations/:id/archive
|
||||||
|
- PUT /api/conversations/:id/pin
|
||||||
|
- POST /api/conversations/:id/branch
|
||||||
|
</conversations>
|
||||||
|
|
||||||
|
<messages>
|
||||||
|
- GET /api/conversations/:id/messages
|
||||||
|
- POST /api/conversations/:id/messages
|
||||||
|
- PUT /api/messages/:id
|
||||||
|
- DELETE /api/messages/:id
|
||||||
|
- POST /api/messages/:id/regenerate
|
||||||
|
- GET /api/messages/stream (SSE endpoint)
|
||||||
|
</messages>
|
||||||
|
|
||||||
|
<artifacts>
|
||||||
|
- GET /api/conversations/:id/artifacts
|
||||||
|
- GET /api/artifacts/:id
|
||||||
|
- PUT /api/artifacts/:id
|
||||||
|
- DELETE /api/artifacts/:id
|
||||||
|
- POST /api/artifacts/:id/fork
|
||||||
|
- GET /api/artifacts/:id/versions
|
||||||
|
</artifacts>
|
||||||
|
|
||||||
|
<projects>
|
||||||
|
- GET /api/projects
|
||||||
|
- POST /api/projects
|
||||||
|
- GET /api/projects/:id
|
||||||
|
- PUT /api/projects/:id
|
||||||
|
- DELETE /api/projects/:id
|
||||||
|
- POST /api/projects/:id/knowledge
|
||||||
|
- GET /api/projects/:id/conversations
|
||||||
|
- PUT /api/projects/:id/settings
|
||||||
|
</projects>
|
||||||
|
|
||||||
|
<sharing>
|
||||||
|
- POST /api/conversations/:id/share
|
||||||
|
- GET /api/share/:token
|
||||||
|
- DELETE /api/share/:token
|
||||||
|
- PUT /api/share/:token/settings
|
||||||
|
</sharing>
|
||||||
|
|
||||||
|
<prompts>
|
||||||
|
- GET /api/prompts/library
|
||||||
|
- POST /api/prompts/library
|
||||||
|
- GET /api/prompts/:id
|
||||||
|
- PUT /api/prompts/:id
|
||||||
|
- DELETE /api/prompts/:id
|
||||||
|
- GET /api/prompts/categories
|
||||||
|
- GET /api/prompts/examples
|
||||||
|
</prompts>
|
||||||
|
|
||||||
|
<search>
|
||||||
|
- GET /api/search/conversations?q=query
|
||||||
|
- GET /api/search/messages?q=query
|
||||||
|
- GET /api/search/artifacts?q=query
|
||||||
|
- GET /api/search/prompts?q=query
|
||||||
|
</search>
|
||||||
|
|
||||||
|
<folders>
|
||||||
|
- GET /api/folders
|
||||||
|
- POST /api/folders
|
||||||
|
- PUT /api/folders/:id
|
||||||
|
- DELETE /api/folders/:id
|
||||||
|
- POST /api/folders/:id/items
|
||||||
|
- DELETE /api/folders/:id/items/:conversationId
|
||||||
|
</folders>
|
||||||
|
|
||||||
|
<usage>
|
||||||
|
- GET /api/usage/daily
|
||||||
|
- GET /api/usage/monthly
|
||||||
|
- GET /api/usage/by-model
|
||||||
|
- GET /api/usage/conversations/:id
|
||||||
|
</usage>
|
||||||
|
|
||||||
|
<settings>
|
||||||
|
- GET /api/settings
|
||||||
|
- PUT /api/settings
|
||||||
|
- GET /api/settings/custom-instructions
|
||||||
|
- PUT /api/settings/custom-instructions
|
||||||
|
</settings>
|
||||||
|
|
||||||
|
<claude_api>
|
||||||
|
- POST /api/claude/chat (proxy to Claude API)
|
||||||
|
- POST /api/claude/chat/stream (streaming proxy)
|
||||||
|
- GET /api/claude/models
|
||||||
|
- POST /api/claude/images/upload
|
||||||
|
</claude_api>
|
||||||
|
</api_endpoints_summary>
|
||||||
|
|
||||||
|
<ui_layout>
|
||||||
|
<main_structure>
|
||||||
|
- Three-column layout: sidebar (conversations), main (chat), panel (artifacts)
|
||||||
|
- Collapsible sidebar with resize handle
|
||||||
|
- Responsive breakpoints: mobile (single column), tablet (two column), desktop (three column)
|
||||||
|
- Persistent header with project/model selector
|
||||||
|
- Bottom input area with send button and options
|
||||||
|
</main_structure>
|
||||||
|
|
||||||
|
<sidebar_left>
|
||||||
|
- New chat button (prominent)
|
||||||
|
- Project selector dropdown
|
||||||
|
- Search conversations input
|
||||||
|
- Conversations list (grouped by date: Today, Yesterday, Previous 7 days, etc.)
|
||||||
|
- Folder tree view (collapsible)
|
||||||
|
- Settings gear icon at bottom
|
||||||
|
- User profile at bottom
|
||||||
|
</sidebar_left>
|
||||||
|
|
||||||
|
<main_chat_area>
|
||||||
|
- Conversation title (editable inline)
|
||||||
|
- Model selector badge
|
||||||
|
- Message history (scrollable)
|
||||||
|
- Welcome screen for new conversations
|
||||||
|
- Suggested prompts (empty state)
|
||||||
|
- Input area with formatting toolbar
|
||||||
|
- Attachment button for images
|
||||||
|
- Send button with loading state
|
||||||
|
- Stop generation button
|
||||||
|
</main_chat_area>
|
||||||
|
|
||||||
|
<artifacts_panel>
|
||||||
|
- Artifact header with title and type badge
|
||||||
|
- Code editor or preview pane
|
||||||
|
- Tabs for multiple artifacts
|
||||||
|
- Full-screen toggle
|
||||||
|
- Download button
|
||||||
|
- Edit/Re-prompt button
|
||||||
|
- Version selector
|
||||||
|
- Close panel button
|
||||||
|
</artifacts_panel>
|
||||||
|
|
||||||
|
<modals_overlays>
|
||||||
|
- Settings modal (tabbed interface)
|
||||||
|
- Share conversation modal
|
||||||
|
- Export options modal
|
||||||
|
- Project settings modal
|
||||||
|
- Prompt library modal
|
||||||
|
- Command palette overlay
|
||||||
|
- Keyboard shortcuts reference
|
||||||
|
</modals_overlays>
|
||||||
|
</ui_layout>
|
||||||
|
|
||||||
|
<design_system>
|
||||||
|
<color_palette>
|
||||||
|
- Primary: Orange/amber accent (#CC785C claude-style)
|
||||||
|
- Background: White (light mode), Dark gray (#1A1A1A dark mode)
|
||||||
|
- Surface: Light gray (#F5F5F5 light), Darker gray (#2A2A2A dark)
|
||||||
|
- Text: Near black (#1A1A1A light), Off-white (#E5E5E5 dark)
|
||||||
|
- Borders: Light gray (#E5E5E5 light), Dark gray (#404040 dark)
|
||||||
|
- Code blocks: Monaco editor theme
|
||||||
|
</color_palette>
|
||||||
|
|
||||||
|
<typography>
|
||||||
|
- Sans-serif system font stack (Inter, SF Pro, Roboto, system-ui)
|
||||||
|
- Headings: font-semibold
|
||||||
|
- Body: font-normal, leading-relaxed
|
||||||
|
- Code: Monospace (JetBrains Mono, Consolas, Monaco)
|
||||||
|
- Message text: text-base (16px), comfortable line-height
|
||||||
|
</typography>
|
||||||
|
|
||||||
|
<components>
|
||||||
|
<message_bubble>
|
||||||
|
- User messages: Right-aligned, subtle background
|
||||||
|
- Assistant messages: Left-aligned, no background
|
||||||
|
- Markdown formatting with proper spacing
|
||||||
|
- Inline code with bg-gray-100 background
|
||||||
|
- Code blocks with syntax highlighting
|
||||||
|
- Copy button on code blocks
|
||||||
|
</message_bubble>
|
||||||
|
|
||||||
|
<buttons>
|
||||||
|
- Primary: Orange/amber background, white text, rounded
|
||||||
|
- Secondary: Border style with hover fill
|
||||||
|
- Icon buttons: Square with hover background
|
||||||
|
- Disabled state: Reduced opacity, no pointer events
|
||||||
|
</buttons>
|
||||||
|
|
||||||
|
<inputs>
|
||||||
|
- Rounded borders with focus ring
|
||||||
|
- Textarea auto-resize
|
||||||
|
- Placeholder text in gray
|
||||||
|
- Error states in red
|
||||||
|
- Character counter
|
||||||
|
</inputs>
|
||||||
|
|
||||||
|
<cards>
|
||||||
|
- Subtle border or shadow
|
||||||
|
- Rounded corners (8px)
|
||||||
|
- Padding: p-4 to p-6
|
||||||
|
- Hover state: slight shadow increase
|
||||||
|
</cards>
|
||||||
|
</components>
|
||||||
|
|
||||||
|
<animations>
|
||||||
|
- Smooth transitions (150-300ms)
|
||||||
|
- Fade in for new messages
|
||||||
|
- Slide in for sidebar
|
||||||
|
- Typing indicator animation
|
||||||
|
- Loading spinner for generation
|
||||||
|
- Skeleton loaders for content
|
||||||
|
</animations>
|
||||||
|
</design_system>
|
||||||
|
|
||||||
|
<key_interactions>
|
||||||
|
<message_flow>
|
||||||
|
1. User types message in input field
|
||||||
|
2. Optional: Attach images via button
|
||||||
|
3. Click send or press Enter
|
||||||
|
4. Message appears in chat immediately
|
||||||
|
5. Typing indicator shows while waiting
|
||||||
|
6. Response streams in word by word
|
||||||
|
7. Code blocks render with syntax highlighting
|
||||||
|
8. Artifacts detected and rendered in side panel
|
||||||
|
9. Message complete, enable regenerate option
|
||||||
|
</message_flow>
|
||||||
|
|
||||||
|
<artifact_flow>
|
||||||
|
1. Assistant generates artifact in response
|
||||||
|
2. Artifact panel slides in from right
|
||||||
|
3. Content renders (code with highlighting or live preview)
|
||||||
|
4. User can edit artifact inline
|
||||||
|
5. "Re-prompt" button to iterate with Claude
|
||||||
|
6. Download or copy artifact content
|
||||||
|
7. Full-screen mode for detailed work
|
||||||
|
8. Close panel to return to chat focus
|
||||||
|
</artifact_flow>
|
||||||
|
|
||||||
|
<conversation_management>
|
||||||
|
1. Click "New Chat" to start fresh conversation
|
||||||
|
2. Conversations auto-save with first message
|
||||||
|
3. Auto-generate title from first exchange
|
||||||
|
4. Click title to rename inline
|
||||||
|
5. Drag conversations into folders
|
||||||
|
6. Right-click for context menu (pin, archive, delete, export)
|
||||||
|
7. Search filters conversations in real-time
|
||||||
|
8. Click conversation to switch context
|
||||||
|
</conversation_management>
|
||||||
|
</key_interactions>
|
||||||
|
|
||||||
|
<implementation_steps>
|
||||||
|
<step number="1">
|
||||||
|
<title>Setup Project Foundation and Database</title>
|
||||||
|
<tasks>
|
||||||
|
- Initialize Express server with SQLite database
|
||||||
|
- Set up Claude API client with streaming support
|
||||||
|
- Create database schema with migrations
|
||||||
|
- Implement authentication endpoints
|
||||||
|
- Set up basic CORS and middleware
|
||||||
|
- Create health check endpoint
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
|
||||||
|
<step number="2">
|
||||||
|
<title>Build Core Chat Interface</title>
|
||||||
|
<tasks>
|
||||||
|
- Create main layout with sidebar and chat area
|
||||||
|
- Implement message display with markdown rendering
|
||||||
|
- Add streaming message support with SSE
|
||||||
|
- Build input area with auto-resize textarea
|
||||||
|
- Add code block syntax highlighting
|
||||||
|
- Implement stop generation functionality
|
||||||
|
- Add typing indicators and loading states
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
|
||||||
|
<step number="3">
|
||||||
|
<title>Conversation Management</title>
|
||||||
|
<tasks>
|
||||||
|
- Create conversation list in sidebar
|
||||||
|
- Implement new conversation creation
|
||||||
|
- Add conversation switching
|
||||||
|
- Build conversation rename functionality
|
||||||
|
- Implement delete with confirmation
|
||||||
|
- Add conversation search
|
||||||
|
- Create conversation grouping by date
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
|
||||||
|
<step number="4">
|
||||||
|
<title>Artifacts System</title>
|
||||||
|
<tasks>
|
||||||
|
- Build artifact detection from Claude responses
|
||||||
|
- Create artifact rendering panel
|
||||||
|
- Implement code artifact viewer
|
||||||
|
- Add HTML/SVG live preview
|
||||||
|
- Build artifact editing interface
|
||||||
|
- Add artifact versioning
|
||||||
|
- Implement full-screen artifact view
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
|
||||||
|
<step number="5">
|
||||||
|
<title>Projects and Organization</title>
|
||||||
|
<tasks>
|
||||||
|
- Create projects CRUD endpoints
|
||||||
|
- Build project selector UI
|
||||||
|
- Implement project-specific custom instructions
|
||||||
|
- Add folder system for conversations
|
||||||
|
- Create drag-and-drop organization
|
||||||
|
- Build project settings panel
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
|
||||||
|
<step number="6">
|
||||||
|
<title>Advanced Features</title>
|
||||||
|
<tasks>
|
||||||
|
- Add model selection dropdown
|
||||||
|
- Implement temperature and parameter controls
|
||||||
|
- Build image upload functionality
|
||||||
|
- Create message editing and regeneration
|
||||||
|
- Add conversation branching
|
||||||
|
- Implement export functionality
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
|
||||||
|
<step number="7">
|
||||||
|
<title>Settings and Customization</title>
|
||||||
|
<tasks>
|
||||||
|
- Build settings modal with tabs
|
||||||
|
- Implement theme switching (light/dark)
|
||||||
|
- Add custom instructions management
|
||||||
|
- Create keyboard shortcuts
|
||||||
|
- Build prompt library
|
||||||
|
- Add usage tracking dashboard
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
|
||||||
|
<step number="8">
|
||||||
|
<title>Sharing and Collaboration</title>
|
||||||
|
<tasks>
|
||||||
|
- Implement conversation sharing with tokens
|
||||||
|
- Create public share view
|
||||||
|
- Add export to multiple formats
|
||||||
|
- Build prompt templates
|
||||||
|
- Create example conversations
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
|
||||||
|
<step number="9">
|
||||||
|
<title>Polish and Optimization</title>
|
||||||
|
<tasks>
|
||||||
|
- Optimize for mobile responsiveness
|
||||||
|
- Add command palette (Cmd+K)
|
||||||
|
- Implement comprehensive keyboard navigation
|
||||||
|
- Add onboarding flow
|
||||||
|
- Create accessibility improvements
|
||||||
|
- Performance optimization and caching
|
||||||
|
</tasks>
|
||||||
|
</step>
|
||||||
|
</implementation_steps>
|
||||||
|
|
||||||
|
<success_criteria>
|
||||||
|
<functionality>
|
||||||
|
- Streaming chat responses work smoothly
|
||||||
|
- Artifact detection and rendering accurate
|
||||||
|
- Conversation management intuitive and reliable
|
||||||
|
- Project organization clear and useful
|
||||||
|
- Image upload and display working
|
||||||
|
- All CRUD operations functional
|
||||||
|
</functionality>
|
||||||
|
|
||||||
|
<user_experience>
|
||||||
|
- Interface matches claude.ai design language
|
||||||
|
- Responsive on all device sizes
|
||||||
|
- Smooth animations and transitions
|
||||||
|
- Fast response times and minimal lag
|
||||||
|
- Intuitive navigation and workflows
|
||||||
|
- Clear feedback for all actions
|
||||||
|
</user_experience>
|
||||||
|
|
||||||
|
<technical_quality>
|
||||||
|
- Clean, maintainable code structure
|
||||||
|
- Proper error handling throughout
|
||||||
|
- Secure API key management
|
||||||
|
- Optimized database queries
|
||||||
|
- Efficient streaming implementation
|
||||||
|
- Comprehensive testing coverage
|
||||||
|
</technical_quality>
|
||||||
|
|
||||||
|
<design_polish>
|
||||||
|
- Consistent with claude.ai visual design
|
||||||
|
- Beautiful typography and spacing
|
||||||
|
- Smooth animations and micro-interactions
|
||||||
|
- Excellent contrast and accessibility
|
||||||
|
- Professional, polished appearance
|
||||||
|
- Dark mode fully implemented
|
||||||
|
</design_polish>
|
||||||
|
</success_criteria>
|
||||||
|
</project_specification>
|
||||||
448
prompts/app_spec_mistral_extensible.txt
Normal file
448
prompts/app_spec_mistral_extensible.txt
Normal file
@@ -0,0 +1,448 @@
|
|||||||
|
<project_specification>
|
||||||
|
<project_name>Claude.ai Clone - Multi-Provider Support (Mistral + Extensible)</project_name>
|
||||||
|
|
||||||
|
<overview>
|
||||||
|
This specification adds Mistral AI model support AND creates an extensible provider architecture
|
||||||
|
that makes it easy to add additional AI providers (OpenAI, Gemini, etc.) in the future.
|
||||||
|
This uses the "Open/Closed Principle" - open for extension, closed for modification.
|
||||||
|
|
||||||
|
All changes are additive and backward-compatible. Existing Claude functionality remains unchanged.
|
||||||
|
</overview>
|
||||||
|
|
||||||
|
<safety_requirements>
|
||||||
|
<critical>
|
||||||
|
- DO NOT modify existing Claude API integration code directly
|
||||||
|
- DO NOT change existing model selection logic for Claude models
|
||||||
|
- DO NOT modify existing database schema without safe migrations
|
||||||
|
- DO NOT break existing conversations or messages
|
||||||
|
- All new code must be in separate files/modules when possible
|
||||||
|
- Test thoroughly before marking issues as complete
|
||||||
|
- Maintain backward compatibility at all times
|
||||||
|
- Refactor Claude code to use BaseProvider WITHOUT changing functionality
|
||||||
|
</critical>
|
||||||
|
</safety_requirements>
|
||||||
|
|
||||||
|
<architecture_design>
|
||||||
|
<provider_pattern>
|
||||||
|
Create an abstract provider interface that all AI providers implement:
|
||||||
|
- BaseProvider (abstract class/interface) - defines common interface
|
||||||
|
- ClaudeProvider (existing code refactored to extend BaseProvider)
|
||||||
|
- MistralProvider (new, extends BaseProvider)
|
||||||
|
- OpenAIProvider (future, extends BaseProvider - easy to add)
|
||||||
|
- GeminiProvider (future, extends BaseProvider - easy to add)
|
||||||
|
</provider_pattern>
|
||||||
|
|
||||||
|
<benefits>
|
||||||
|
- Easy to add new providers without modifying existing code
|
||||||
|
- Consistent interface across all providers
|
||||||
|
- Isolated error handling per provider
|
||||||
|
- Unified model selection UI
|
||||||
|
- Shared functionality (streaming, error handling, logging)
|
||||||
|
- Future-proof architecture
|
||||||
|
</benefits>
|
||||||
|
</architecture_design>
|
||||||
|
|
||||||
|
<new_features>
|
||||||
|
<feature_provider_architecture>
|
||||||
|
<title>Extensible Provider Architecture (Foundation)</title>
|
||||||
|
<description>
|
||||||
|
Create a provider abstraction layer that allows easy addition of multiple AI providers.
|
||||||
|
This is the foundation that makes adding OpenAI, Gemini, etc. trivial in the future.
|
||||||
|
|
||||||
|
BaseProvider abstract class should define:
|
||||||
|
- sendMessage(messages, options) -> Promise<response>
|
||||||
|
- streamMessage(messages, options) -> AsyncGenerator<chunk>
|
||||||
|
- getModels() -> Promise<array> of available models
|
||||||
|
- validateApiKey(key) -> Promise<boolean>
|
||||||
|
- getCapabilities() -> object with provider capabilities
|
||||||
|
- getName() -> string (provider name: 'claude', 'mistral', 'openai', etc.)
|
||||||
|
- getDefaultModel() -> string (default model ID for this provider)
|
||||||
|
|
||||||
|
ProviderRegistry should:
|
||||||
|
- Register all available providers
|
||||||
|
- Provide list of all providers
|
||||||
|
- Check which providers are configured (have API keys)
|
||||||
|
- Enable/disable providers
|
||||||
|
|
||||||
|
ProviderFactory should:
|
||||||
|
- Create provider instances based on model ID or provider name
|
||||||
|
- Handle provider selection logic
|
||||||
|
- Route requests to correct provider
|
||||||
|
</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>functional</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Create server/providers/BaseProvider.js (abstract base class)
|
||||||
|
- Refactor existing Claude code to server/providers/ClaudeProvider.js (extends BaseProvider)
|
||||||
|
- Create server/providers/ProviderRegistry.js (manages all providers)
|
||||||
|
- Create server/providers/ProviderFactory.js (creates provider instances)
|
||||||
|
- Update existing routes to use ProviderFactory instead of direct Claude calls
|
||||||
|
- Keep all provider code in server/providers/ directory
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify Claude still works after refactoring to use BaseProvider
|
||||||
|
2. Test that ProviderFactory creates ClaudeProvider correctly
|
||||||
|
3. Test that ProviderRegistry lists Claude provider
|
||||||
|
4. Verify error handling works correctly
|
||||||
|
5. Test that adding a mock provider is straightforward
|
||||||
|
6. Verify no regression in existing Claude functionality
|
||||||
|
</test_steps>
|
||||||
|
</feature_provider_architecture>
|
||||||
|
|
||||||
|
<feature_mistral_provider>
|
||||||
|
<title>Mistral Provider Implementation</title>
|
||||||
|
<description>
|
||||||
|
Implement MistralProvider extending BaseProvider. This should:
|
||||||
|
- Implement all BaseProvider abstract methods
|
||||||
|
- Handle Mistral-specific API calls (https://api.mistral.ai/v1/chat/completions)
|
||||||
|
- Support Mistral streaming (Server-Sent Events)
|
||||||
|
- Handle Mistral-specific error codes and messages
|
||||||
|
- Provide Mistral model list:
|
||||||
|
* mistral-large-latest (default)
|
||||||
|
* mistral-medium-latest
|
||||||
|
* mistral-small-latest
|
||||||
|
* mistral-7b-instruct
|
||||||
|
- Manage Mistral API authentication
|
||||||
|
- Return responses in unified format (same as Claude)
|
||||||
|
</description>
|
||||||
|
<priority>2</priority>
|
||||||
|
<category>functional</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Create server/providers/MistralProvider.js
|
||||||
|
- Extend BaseProvider class
|
||||||
|
- Implement Mistral API integration using fetch or axios
|
||||||
|
- Register in ProviderRegistry
|
||||||
|
- Use same response format as ClaudeProvider for consistency
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Test MistralProvider.sendMessage() works with valid API key
|
||||||
|
2. Test MistralProvider.streamMessage() works
|
||||||
|
3. Test MistralProvider.getModels() returns correct models
|
||||||
|
4. Test error handling for invalid API key
|
||||||
|
5. Test error handling for API rate limits
|
||||||
|
6. Verify it integrates with ProviderFactory
|
||||||
|
7. Verify responses match expected format
|
||||||
|
</test_steps>
|
||||||
|
</feature_mistral_provider>
|
||||||
|
|
||||||
|
<feature_unified_model_selector>
|
||||||
|
<title>Unified Model Selector (All Providers)</title>
|
||||||
|
<description>
|
||||||
|
Update model selector to dynamically load models from all registered providers.
|
||||||
|
The selector should:
|
||||||
|
- Query all providers for available models via GET /api/models
|
||||||
|
- Group models by provider (Claude, Mistral, etc.)
|
||||||
|
- Display provider badges/icons next to model names
|
||||||
|
- Show which provider each model belongs to
|
||||||
|
- Filter models by provider (optional toggle)
|
||||||
|
- Show provider-specific capabilities (streaming, images, etc.)
|
||||||
|
- Only show models from providers with configured API keys
|
||||||
|
- Handle providers gracefully (show "Configure API key" if not set)
|
||||||
|
</description>
|
||||||
|
<priority>2</priority>
|
||||||
|
<category>functional</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Create API endpoint: GET /api/models (returns all models from all providers)
|
||||||
|
- Update frontend ModelSelector component to handle multiple providers
|
||||||
|
- Add provider grouping/filtering in UI
|
||||||
|
- Show provider badges/icons next to model names
|
||||||
|
- Group models by provider with collapsible sections
|
||||||
|
- Show provider status (configured/not configured)
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify model selector shows Claude models (existing functionality)
|
||||||
|
2. Verify model selector shows Mistral models (if key configured)
|
||||||
|
3. Test grouping by provider works
|
||||||
|
4. Test filtering by provider works
|
||||||
|
5. Verify provider badges display correctly
|
||||||
|
6. Test that providers without API keys show "Configure" message
|
||||||
|
7. Verify selecting a model works for both providers
|
||||||
|
</test_steps>
|
||||||
|
</feature_unified_model_selector>
|
||||||
|
|
||||||
|
<feature_provider_settings>
|
||||||
|
<title>Multi-Provider API Key Management</title>
|
||||||
|
<description>
|
||||||
|
Create unified API key management that supports multiple providers. Users should be able to:
|
||||||
|
- Manage API keys for each provider separately (Claude, Mistral, OpenAI, etc.)
|
||||||
|
- See which providers are available
|
||||||
|
- See which providers are configured (have API keys)
|
||||||
|
- Test each provider's API key independently
|
||||||
|
- Enable/disable providers (hide models if key not configured)
|
||||||
|
- See provider status indicators (configured/not configured/error)
|
||||||
|
- Update or remove API keys for any provider
|
||||||
|
- See usage statistics per provider
|
||||||
|
</description>
|
||||||
|
<priority>2</priority>
|
||||||
|
<category>functional</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Create server/routes/providers.js with unified provider management
|
||||||
|
- Update settings UI to show provider cards (one per provider)
|
||||||
|
- Each provider card has:
|
||||||
|
* Provider name and logo/icon
|
||||||
|
* API key input field (masked)
|
||||||
|
* "Test Connection" button
|
||||||
|
* Status indicator (green/yellow/red)
|
||||||
|
* Enable/disable toggle
|
||||||
|
- Store keys in api_keys table with key_name = 'claude_api_key', 'mistral_api_key', etc.
|
||||||
|
- Use same encryption method for all providers
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Configure Claude API key (verify existing functionality still works)
|
||||||
|
2. Configure Mistral API key
|
||||||
|
3. Verify both keys are stored separately
|
||||||
|
4. Test each provider's "Test Connection" button
|
||||||
|
5. Remove one key and verify only that provider's models are hidden
|
||||||
|
6. Verify provider status indicators update correctly
|
||||||
|
7. Test that disabling a provider hides its models
|
||||||
|
</test_steps>
|
||||||
|
</feature_provider_settings>
|
||||||
|
|
||||||
|
<feature_database_provider_support>
|
||||||
|
<title>Database Support for Multiple Providers (Future-Proof)</title>
|
||||||
|
<description>
|
||||||
|
Update database schema to support multiple providers in a future-proof way.
|
||||||
|
This should:
|
||||||
|
- Add provider field to conversations table (TEXT, default: 'claude')
|
||||||
|
- Add provider field to messages/usage_tracking (TEXT, default: 'claude')
|
||||||
|
- Use TEXT field (not ENUM) to allow easy addition of new providers without schema changes
|
||||||
|
- Migration should be safe, idempotent, and backward compatible
|
||||||
|
- All existing records default to 'claude' provider
|
||||||
|
- Add indexes for performance on provider queries
|
||||||
|
</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>functional</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Create migration: server/migrations/add_provider_support.sql
|
||||||
|
- Use TEXT field (not ENUM) for provider name (allows 'claude', 'mistral', 'openai', etc.)
|
||||||
|
- Default all existing records to 'claude'
|
||||||
|
- Add indexes on provider columns for performance
|
||||||
|
- Make migration idempotent (can run multiple times safely)
|
||||||
|
- Create rollback script if needed
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Backup existing database
|
||||||
|
2. Run migration script
|
||||||
|
3. Verify all existing conversations have provider='claude'
|
||||||
|
4. Verify all existing messages have provider='claude' (via usage_tracking)
|
||||||
|
5. Create new conversation with Mistral provider
|
||||||
|
6. Verify provider='mistral' is saved correctly
|
||||||
|
7. Query conversations by provider (test index performance)
|
||||||
|
8. Verify existing Claude conversations still work
|
||||||
|
9. Test rollback script if needed
|
||||||
|
</test_steps>
|
||||||
|
</feature_database_provider_support>
|
||||||
|
|
||||||
|
<feature_unified_chat_endpoint>
|
||||||
|
<title>Unified Chat Endpoint (Works with Any Provider)</title>
|
||||||
|
<description>
|
||||||
|
Update chat endpoints to use ProviderFactory, making them work with any provider.
|
||||||
|
The endpoint should:
|
||||||
|
- Accept provider or model ID in request
|
||||||
|
- Use ProviderFactory to get correct provider
|
||||||
|
- Route request to appropriate provider
|
||||||
|
- Return unified response format
|
||||||
|
- Handle provider-specific errors gracefully
|
||||||
|
- Support streaming for all providers that support it
|
||||||
|
</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>functional</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Update POST /api/chat to use ProviderFactory
|
||||||
|
- Update POST /api/chat/stream to use ProviderFactory
|
||||||
|
- Extract provider from model ID or accept provider parameter
|
||||||
|
- Route to correct provider instance
|
||||||
|
- Return unified response format
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Test POST /api/chat with Claude model (verify no regression)
|
||||||
|
2. Test POST /api/chat with Mistral model
|
||||||
|
3. Test POST /api/chat/stream with Claude (verify streaming still works)
|
||||||
|
4. Test POST /api/chat/stream with Mistral
|
||||||
|
5. Test error handling for invalid provider
|
||||||
|
6. Test error handling for missing API key
|
||||||
|
</test_steps>
|
||||||
|
</feature_unified_chat_endpoint>
|
||||||
|
</new_features>
|
||||||
|
|
||||||
|
<future_extensibility>
|
||||||
|
<openai_provider_example>
|
||||||
|
<title>How to Add OpenAI in the Future</title>
|
||||||
|
<description>
|
||||||
|
To add OpenAI support later, simply follow these steps (NO changes to existing code needed):
|
||||||
|
|
||||||
|
1. Create server/providers/OpenAIProvider.js extending BaseProvider
|
||||||
|
2. Implement OpenAI API calls (https://api.openai.com/v1/chat/completions)
|
||||||
|
3. Register in ProviderRegistry: ProviderRegistry.register('openai', OpenAIProvider)
|
||||||
|
4. That's it! OpenAI models will automatically appear in model selector.
|
||||||
|
|
||||||
|
Example OpenAIProvider structure:
|
||||||
|
- Extends BaseProvider
|
||||||
|
- Implements sendMessage() using OpenAI API
|
||||||
|
- Implements streamMessage() for streaming support
|
||||||
|
- Returns models: gpt-4, gpt-3.5-turbo, etc.
|
||||||
|
- Handles OpenAI-specific authentication and errors
|
||||||
|
</description>
|
||||||
|
</openai_provider_example>
|
||||||
|
|
||||||
|
<other_providers>
|
||||||
|
<note>
|
||||||
|
Same pattern works for any AI provider:
|
||||||
|
- Google Gemini (GeminiProvider)
|
||||||
|
- Cohere (CohereProvider)
|
||||||
|
- Any other AI API that follows similar patterns
|
||||||
|
Just create a new Provider class extending BaseProvider and register it.
|
||||||
|
</note>
|
||||||
|
</other_providers>
|
||||||
|
</future_extensibility>
|
||||||
|
|
||||||
|
<implementation_notes>
|
||||||
|
<code_structure>
|
||||||
|
server/
|
||||||
|
providers/
|
||||||
|
BaseProvider.js # Abstract base class (NEW)
|
||||||
|
ClaudeProvider.js # Refactored Claude (extends BaseProvider)
|
||||||
|
MistralProvider.js # New Mistral (extends BaseProvider)
|
||||||
|
ProviderRegistry.js # Manages all providers (NEW)
|
||||||
|
ProviderFactory.js # Creates provider instances (NEW)
|
||||||
|
routes/
|
||||||
|
providers.js # Unified provider management (NEW)
|
||||||
|
chat.js # Updated to use ProviderFactory
|
||||||
|
migrations/
|
||||||
|
add_provider_support.sql # Database migration (NEW)
|
||||||
|
</code_structure>
|
||||||
|
|
||||||
|
<safety_guidelines>
|
||||||
|
- Refactor Claude code to use BaseProvider WITHOUT changing functionality
|
||||||
|
- All providers are isolated - errors in one don't affect others
|
||||||
|
- Database changes are backward compatible (TEXT field, not ENUM)
|
||||||
|
- Existing conversations default to 'claude' provider
|
||||||
|
- Test Claude thoroughly after refactoring
|
||||||
|
- Use feature flags if needed to enable/disable providers
|
||||||
|
- Log all provider operations separately for debugging
|
||||||
|
</safety_guidelines>
|
||||||
|
|
||||||
|
<error_handling>
|
||||||
|
- Each provider handles its own errors
|
||||||
|
- Provider errors should NOT affect other providers
|
||||||
|
- Show user-friendly error messages
|
||||||
|
- Log errors with provider context
|
||||||
|
- Don't throw unhandled exceptions
|
||||||
|
</error_handling>
|
||||||
|
</implementation_notes>
|
||||||
|
|
||||||
|
<database_changes>
|
||||||
|
<safe_migrations>
|
||||||
|
<migration_1>
|
||||||
|
<description>Add provider support (TEXT field for extensibility)</description>
|
||||||
|
<sql>
|
||||||
|
-- Add provider column to conversations (TEXT allows any provider name)
|
||||||
|
-- Default to 'claude' for backward compatibility
|
||||||
|
ALTER TABLE conversations
|
||||||
|
ADD COLUMN provider TEXT DEFAULT 'claude';
|
||||||
|
|
||||||
|
-- Add provider column to usage_tracking
|
||||||
|
ALTER TABLE usage_tracking
|
||||||
|
ADD COLUMN provider TEXT DEFAULT 'claude';
|
||||||
|
|
||||||
|
-- Add indexes for performance
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_conversations_provider
|
||||||
|
ON conversations(provider);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_usage_tracking_provider
|
||||||
|
ON usage_tracking(provider);
|
||||||
|
</sql>
|
||||||
|
<rollback>
|
||||||
|
-- Rollback script (use with caution - may cause data issues)
|
||||||
|
DROP INDEX IF EXISTS idx_conversations_provider;
|
||||||
|
DROP INDEX IF EXISTS idx_usage_tracking_provider;
|
||||||
|
-- Note: SQLite doesn't support DROP COLUMN easily
|
||||||
|
-- Would need to recreate table without provider column
|
||||||
|
</rollback>
|
||||||
|
<note>
|
||||||
|
Using TEXT instead of ENUM allows adding new providers (OpenAI, Gemini, etc.)
|
||||||
|
without database schema changes in the future. This is future-proof.
|
||||||
|
</note>
|
||||||
|
</migration_1>
|
||||||
|
</safe_migrations>
|
||||||
|
|
||||||
|
<data_integrity>
|
||||||
|
- All existing conversations default to provider='claude'
|
||||||
|
- All existing messages default to provider='claude'
|
||||||
|
- Migration is idempotent (can run multiple times safely)
|
||||||
|
- No data loss during migration
|
||||||
|
- Existing queries continue to work
|
||||||
|
</data_integrity>
|
||||||
|
</database_changes>
|
||||||
|
|
||||||
|
<api_endpoints>
|
||||||
|
<new_endpoints>
|
||||||
|
- GET /api/models - Get all models from all configured providers
|
||||||
|
- GET /api/providers - Get list of available providers and their status
|
||||||
|
- POST /api/providers/:provider/key - Set API key for specific provider
|
||||||
|
- POST /api/providers/:provider/test - Test provider API key
|
||||||
|
- GET /api/providers/:provider/status - Get provider configuration status
|
||||||
|
- DELETE /api/providers/:provider/key - Remove provider API key
|
||||||
|
</new_endpoints>
|
||||||
|
|
||||||
|
<updated_endpoints>
|
||||||
|
- POST /api/chat - Updated to use ProviderFactory (works with any provider)
|
||||||
|
* Accepts: { model: 'model-id', messages: [...], ... }
|
||||||
|
* Provider is determined from model ID or can be specified
|
||||||
|
- POST /api/chat/stream - Updated to use ProviderFactory (streaming for any provider)
|
||||||
|
* Same interface, works with any provider that supports streaming
|
||||||
|
</updated_endpoints>
|
||||||
|
</api_endpoints>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<backend>
|
||||||
|
- No new dependencies required (use native fetch for Mistral API)
|
||||||
|
- Optional: @mistralai/mistralai (only if provides significant value)
|
||||||
|
- Keep dependencies minimal to avoid conflicts
|
||||||
|
</backend>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<testing_requirements>
|
||||||
|
<regression_tests>
|
||||||
|
- Verify all existing Claude functionality still works
|
||||||
|
- Test that existing conversations load correctly
|
||||||
|
- Verify Claude model selection still works
|
||||||
|
- Test Claude API endpoints are unaffected
|
||||||
|
- Verify database queries for Claude still work
|
||||||
|
- Test Claude streaming still works
|
||||||
|
</regression_tests>
|
||||||
|
|
||||||
|
<integration_tests>
|
||||||
|
- Test switching between Claude and Mistral models
|
||||||
|
- Test conversations with different providers
|
||||||
|
- Test error handling doesn't affect other providers
|
||||||
|
- Test migration doesn't break existing data
|
||||||
|
- Test ProviderFactory routes correctly
|
||||||
|
- Test unified model selector with multiple providers
|
||||||
|
</integration_tests>
|
||||||
|
|
||||||
|
<extensibility_tests>
|
||||||
|
- Verify adding a mock provider is straightforward
|
||||||
|
- Test that ProviderFactory correctly routes to providers
|
||||||
|
- Verify provider isolation (errors don't propagate)
|
||||||
|
- Test that new providers automatically appear in UI
|
||||||
|
</extensibility_tests>
|
||||||
|
</testing_requirements>
|
||||||
|
|
||||||
|
<success_criteria>
|
||||||
|
<functionality>
|
||||||
|
- Claude functionality works exactly as before (no regression)
|
||||||
|
- Mistral models appear in selector and work correctly
|
||||||
|
- Users can switch between Claude and Mistral seamlessly
|
||||||
|
- API key management works for both providers
|
||||||
|
- Database migration is safe and backward compatible
|
||||||
|
</functionality>
|
||||||
|
|
||||||
|
<extensibility>
|
||||||
|
- Adding a new provider (like OpenAI) requires only creating one new file
|
||||||
|
- No changes needed to existing code when adding providers
|
||||||
|
- Provider architecture is documented and easy to follow
|
||||||
|
- Code is organized and maintainable
|
||||||
|
</extensibility>
|
||||||
|
</success_criteria>
|
||||||
|
</project_specification>
|
||||||
403
prompts/app_spec_theme_customization.txt
Normal file
403
prompts/app_spec_theme_customization.txt
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
<project_specification>
|
||||||
|
<project_name>Claude.ai Clone - Advanced Theme Customization</project_name>
|
||||||
|
|
||||||
|
<overview>
|
||||||
|
This specification adds advanced theme customization features to the Claude.ai clone application.
|
||||||
|
Users will be able to customize accent colors, font sizes, message spacing, and choose from
|
||||||
|
preset color themes. All changes are additive and backward-compatible with existing theme functionality.
|
||||||
|
|
||||||
|
The existing light/dark mode toggle remains unchanged and functional.
|
||||||
|
</overview>
|
||||||
|
|
||||||
|
<safety_requirements>
|
||||||
|
<critical>
|
||||||
|
- DO NOT modify existing light/dark mode functionality
|
||||||
|
- DO NOT break existing theme persistence
|
||||||
|
- DO NOT change existing CSS classes without ensuring backward compatibility
|
||||||
|
- All new theme options must be optional (defaults should match current behavior)
|
||||||
|
- Test thoroughly to ensure existing themes still work
|
||||||
|
- Maintain backward compatibility at all times
|
||||||
|
- New theme preferences should be stored separately from existing theme settings
|
||||||
|
</critical>
|
||||||
|
</safety_requirements>
|
||||||
|
|
||||||
|
<new_features>
|
||||||
|
<feature_6_theme_customization>
|
||||||
|
<title>Advanced Theme Customization</title>
|
||||||
|
<description>
|
||||||
|
Add advanced theme customization options. Users should be able to:
|
||||||
|
- Customize accent colors (beyond just light/dark mode)
|
||||||
|
- Choose from preset color themes (blue, green, purple, orange)
|
||||||
|
- Adjust font size globally (small, medium, large)
|
||||||
|
- Adjust message spacing (compact, comfortable, spacious)
|
||||||
|
- Preview theme changes before applying
|
||||||
|
- Save custom theme preferences
|
||||||
|
|
||||||
|
The customization interface should be intuitive and provide real-time preview
|
||||||
|
of changes before they are applied. All preferences should persist across sessions.
|
||||||
|
</description>
|
||||||
|
<priority>3</priority>
|
||||||
|
<category>style</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Create a new "Appearance" or "Theme" section in settings
|
||||||
|
- Add accent color picker with preset options (blue, green, purple, orange)
|
||||||
|
- Add font size slider/selector (small, medium, large)
|
||||||
|
- Add message spacing selector (compact, comfortable, spacious)
|
||||||
|
- Implement preview functionality that shows changes in real-time
|
||||||
|
- Store theme preferences in localStorage or backend (user preferences)
|
||||||
|
- Apply theme using CSS custom properties (CSS variables)
|
||||||
|
- Ensure theme works with both light and dark modes
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Open settings menu
|
||||||
|
2. Navigate to "Appearance" or "Theme" section
|
||||||
|
3. Select a different accent color (e.g., green)
|
||||||
|
4. Verify accent color changes are visible in preview
|
||||||
|
5. Adjust font size slider to "large"
|
||||||
|
6. Verify font size changes in preview
|
||||||
|
7. Adjust message spacing option to "spacious"
|
||||||
|
8. Verify spacing changes in preview
|
||||||
|
9. Click "Preview" to see changes applied temporarily
|
||||||
|
10. Click "Apply" to save changes permanently
|
||||||
|
11. Verify changes persist after page refresh
|
||||||
|
12. Test with both light and dark mode
|
||||||
|
13. Test reset to default theme
|
||||||
|
14. Verify existing conversations display correctly with new theme
|
||||||
|
</test_steps>
|
||||||
|
</feature_6_theme_customization>
|
||||||
|
|
||||||
|
<feature_accent_colors>
|
||||||
|
<title>Accent Color Customization</title>
|
||||||
|
<description>
|
||||||
|
Allow users to customize the accent color used throughout the application.
|
||||||
|
This includes:
|
||||||
|
- Primary button colors
|
||||||
|
- Link colors
|
||||||
|
- Focus states
|
||||||
|
- Active states
|
||||||
|
- Selection highlights
|
||||||
|
- Progress indicators
|
||||||
|
|
||||||
|
Preset options:
|
||||||
|
- Blue (default, matches Claude.ai)
|
||||||
|
- Green
|
||||||
|
- Purple
|
||||||
|
- Orange
|
||||||
|
|
||||||
|
Users should be able to see a preview of each color before applying.
|
||||||
|
</description>
|
||||||
|
<priority>3</priority>
|
||||||
|
<category>style</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Define accent colors as CSS custom properties
|
||||||
|
- Create color palette for each preset (light and dark variants)
|
||||||
|
- Add color picker UI component in settings
|
||||||
|
- Update all accent color usages to use CSS variables
|
||||||
|
- Ensure colors have proper contrast ratios for accessibility
|
||||||
|
- Store selected accent color in user preferences
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Open theme settings
|
||||||
|
2. Select "Green" accent color
|
||||||
|
3. Verify buttons, links, and highlights use green
|
||||||
|
4. Switch to dark mode and verify green accent still works
|
||||||
|
5. Test all preset colors (blue, green, purple, orange)
|
||||||
|
6. Verify color persists after refresh
|
||||||
|
7. Test accessibility (contrast ratios)
|
||||||
|
</test_steps>
|
||||||
|
</feature_accent_colors>
|
||||||
|
|
||||||
|
<feature_font_size>
|
||||||
|
<title>Global Font Size Adjustment</title>
|
||||||
|
<description>
|
||||||
|
Allow users to adjust the global font size for better readability.
|
||||||
|
Options:
|
||||||
|
- Small (12px base)
|
||||||
|
- Medium (14px base, default)
|
||||||
|
- Large (16px base)
|
||||||
|
|
||||||
|
Font size should scale proportionally across all text elements:
|
||||||
|
- Message text
|
||||||
|
- UI labels
|
||||||
|
- Input fields
|
||||||
|
- Buttons
|
||||||
|
- Sidebar text
|
||||||
|
</description>
|
||||||
|
<priority>3</priority>
|
||||||
|
<category>style</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Use CSS rem units for all font sizes
|
||||||
|
- Set base font size on root element
|
||||||
|
- Create font size presets (small, medium, large)
|
||||||
|
- Add font size selector in settings
|
||||||
|
- Store preference in user settings
|
||||||
|
- Ensure responsive design still works with different font sizes
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Open theme settings
|
||||||
|
2. Select "Small" font size
|
||||||
|
3. Verify all text is smaller throughout the app
|
||||||
|
4. Select "Large" font size
|
||||||
|
5. Verify all text is larger throughout the app
|
||||||
|
6. Verify layout doesn't break with different font sizes
|
||||||
|
7. Test with long messages to ensure wrapping works
|
||||||
|
8. Verify preference persists after refresh
|
||||||
|
</test_steps>
|
||||||
|
</feature_font_size>
|
||||||
|
|
||||||
|
<feature_message_spacing>
|
||||||
|
<title>Message Spacing Customization</title>
|
||||||
|
<description>
|
||||||
|
Allow users to adjust the spacing between messages and within message bubbles.
|
||||||
|
Options:
|
||||||
|
- Compact: Minimal spacing (for users who prefer dense layouts)
|
||||||
|
- Comfortable: Default spacing (current behavior)
|
||||||
|
- Spacious: Increased spacing (for better readability)
|
||||||
|
|
||||||
|
This affects:
|
||||||
|
- Vertical spacing between messages
|
||||||
|
- Padding within message bubbles
|
||||||
|
- Spacing between message elements (avatar, text, timestamp)
|
||||||
|
</description>
|
||||||
|
<priority>3</priority>
|
||||||
|
<category>style</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Define spacing scale using CSS custom properties
|
||||||
|
- Create spacing presets (compact, comfortable, spacious)
|
||||||
|
- Apply spacing to message containers and bubbles
|
||||||
|
- Add spacing selector in settings
|
||||||
|
- Store preference in user settings
|
||||||
|
- Ensure spacing works well with different font sizes
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Open theme settings
|
||||||
|
2. Select "Compact" spacing
|
||||||
|
3. Verify messages are closer together
|
||||||
|
4. Select "Spacious" spacing
|
||||||
|
5. Verify messages have more space between them
|
||||||
|
6. Test with long conversations to ensure scrolling works
|
||||||
|
7. Verify spacing preference persists after refresh
|
||||||
|
8. Test with different font sizes to ensure compatibility
|
||||||
|
</test_steps>
|
||||||
|
</feature_message_spacing>
|
||||||
|
|
||||||
|
<feature_theme_preview>
|
||||||
|
<title>Theme Preview Functionality</title>
|
||||||
|
<description>
|
||||||
|
Allow users to preview theme changes before applying them permanently.
|
||||||
|
The preview should:
|
||||||
|
- Show a sample conversation with the new theme applied
|
||||||
|
- Update in real-time as settings are changed
|
||||||
|
- Allow users to cancel and revert to previous theme
|
||||||
|
- Show both light and dark mode previews if applicable
|
||||||
|
|
||||||
|
Users should be able to:
|
||||||
|
- See preview immediately when changing settings
|
||||||
|
- Click "Apply" to save changes
|
||||||
|
- Click "Cancel" to discard changes
|
||||||
|
- Click "Reset" to return to default theme
|
||||||
|
</description>
|
||||||
|
<priority>3</priority>
|
||||||
|
<category>functional</category>
|
||||||
|
<implementation_approach>
|
||||||
|
- Create preview component showing sample conversation
|
||||||
|
- Apply theme changes temporarily to preview
|
||||||
|
- Store original theme state for cancel functionality
|
||||||
|
- Update preview in real-time as settings change
|
||||||
|
- Only persist changes when "Apply" is clicked
|
||||||
|
- Show clear visual feedback for preview vs. applied state
|
||||||
|
</implementation_approach>
|
||||||
|
<test_steps>
|
||||||
|
1. Open theme settings
|
||||||
|
2. Change accent color to green
|
||||||
|
3. Verify preview updates immediately
|
||||||
|
4. Change font size to large
|
||||||
|
5. Verify preview updates with new font size
|
||||||
|
6. Click "Cancel" and verify changes are reverted
|
||||||
|
7. Make changes again and click "Apply"
|
||||||
|
8. Verify changes are saved and applied to actual interface
|
||||||
|
9. Test preview with both light and dark mode
|
||||||
|
</test_steps>
|
||||||
|
</feature_theme_preview>
|
||||||
|
</new_features>
|
||||||
|
|
||||||
|
<implementation_notes>
|
||||||
|
<code_structure>
|
||||||
|
frontend/
|
||||||
|
components/
|
||||||
|
ThemeSettings.jsx # New theme customization UI (NEW)
|
||||||
|
ThemePreview.jsx # Preview component (NEW)
|
||||||
|
styles/
|
||||||
|
theme-variables.css # CSS custom properties for themes (NEW)
|
||||||
|
accent-colors.css # Accent color definitions (NEW)
|
||||||
|
hooks/
|
||||||
|
useTheme.js # Updated to handle new theme options
|
||||||
|
utils/
|
||||||
|
themeStorage.js # Theme preference persistence (NEW)
|
||||||
|
</code_structure>
|
||||||
|
|
||||||
|
<css_architecture>
|
||||||
|
Use CSS custom properties (CSS variables) for all theme values:
|
||||||
|
- --accent-color-primary
|
||||||
|
- --accent-color-hover
|
||||||
|
- --font-size-base
|
||||||
|
- --message-spacing-vertical
|
||||||
|
- --message-padding
|
||||||
|
|
||||||
|
This allows easy theme switching without JavaScript manipulation.
|
||||||
|
</css_architecture>
|
||||||
|
|
||||||
|
<storage_approach>
|
||||||
|
Store theme preferences in:
|
||||||
|
- localStorage for client-side persistence
|
||||||
|
- Or backend user preferences table if available
|
||||||
|
|
||||||
|
Structure:
|
||||||
|
{
|
||||||
|
accentColor: 'blue' | 'green' | 'purple' | 'orange',
|
||||||
|
fontSize: 'small' | 'medium' | 'large',
|
||||||
|
messageSpacing: 'compact' | 'comfortable' | 'spacious',
|
||||||
|
theme: 'light' | 'dark' (existing)
|
||||||
|
}
|
||||||
|
</storage_approach>
|
||||||
|
|
||||||
|
<safety_guidelines>
|
||||||
|
- Keep existing theme functionality intact
|
||||||
|
- Default values should match current behavior
|
||||||
|
- Use feature detection for new theme features
|
||||||
|
- Gracefully degrade if CSS custom properties not supported
|
||||||
|
- Test with existing conversations and UI elements
|
||||||
|
- Ensure accessibility standards are maintained
|
||||||
|
</safety_guidelines>
|
||||||
|
</implementation_notes>
|
||||||
|
|
||||||
|
<ui_components>
|
||||||
|
<theme_settings_panel>
|
||||||
|
<description>Settings panel for theme customization</description>
|
||||||
|
<sections>
|
||||||
|
- Accent Color: Radio buttons or color swatches for preset colors
|
||||||
|
- Font Size: Slider or dropdown (small, medium, large)
|
||||||
|
- Message Spacing: Radio buttons (compact, comfortable, spacious)
|
||||||
|
- Preview: Live preview of theme changes
|
||||||
|
- Actions: Apply, Cancel, Reset buttons
|
||||||
|
</sections>
|
||||||
|
</theme_settings_panel>
|
||||||
|
|
||||||
|
<theme_preview>
|
||||||
|
<description>Preview component showing sample conversation</description>
|
||||||
|
<elements>
|
||||||
|
- Sample user message
|
||||||
|
- Sample AI response
|
||||||
|
- Shows current accent color
|
||||||
|
- Shows current font size
|
||||||
|
- Shows current spacing
|
||||||
|
- Updates in real-time
|
||||||
|
</elements>
|
||||||
|
</theme_preview>
|
||||||
|
</ui_components>
|
||||||
|
|
||||||
|
<css_custom_properties>
|
||||||
|
<accent_colors>
|
||||||
|
Define CSS variables for each accent color preset:
|
||||||
|
--accent-blue: #2563eb;
|
||||||
|
--accent-green: #10b981;
|
||||||
|
--accent-purple: #8b5cf6;
|
||||||
|
--accent-orange: #f59e0b;
|
||||||
|
|
||||||
|
Each should have hover, active, and focus variants.
|
||||||
|
</accent_colors>
|
||||||
|
|
||||||
|
<font_sizes>
|
||||||
|
Define base font sizes:
|
||||||
|
--font-size-small: 0.75rem; (12px)
|
||||||
|
--font-size-medium: 0.875rem; (14px, default)
|
||||||
|
--font-size-large: 1rem; (16px)
|
||||||
|
</font_sizes>
|
||||||
|
|
||||||
|
<spacing>
|
||||||
|
Define spacing scales:
|
||||||
|
--spacing-compact: 0.5rem;
|
||||||
|
--spacing-comfortable: 1rem; (default)
|
||||||
|
--spacing-spacious: 1.5rem;
|
||||||
|
</spacing>
|
||||||
|
</css_custom_properties>
|
||||||
|
|
||||||
|
<api_endpoints>
|
||||||
|
<if_backend_storage>
|
||||||
|
If storing preferences in backend:
|
||||||
|
- GET /api/user/preferences - Get user theme preferences
|
||||||
|
- PUT /api/user/preferences - Update user theme preferences
|
||||||
|
- GET /api/user/preferences/theme - Get theme preferences only
|
||||||
|
</if_backend_storage>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
If using localStorage only, no API endpoints needed.
|
||||||
|
Backend storage is optional but recommended for multi-device sync.
|
||||||
|
</note>
|
||||||
|
</api_endpoints>
|
||||||
|
|
||||||
|
<accessibility_requirements>
|
||||||
|
- All accent colors must meet WCAG AA contrast ratios (4.5:1 for text)
|
||||||
|
- Font size changes must not break screen reader compatibility
|
||||||
|
- Theme settings must be keyboard navigable
|
||||||
|
- Color choices should not be the only way to convey information
|
||||||
|
- Provide high contrast mode option if possible
|
||||||
|
</accessibility_requirements>
|
||||||
|
|
||||||
|
<testing_requirements>
|
||||||
|
<regression_tests>
|
||||||
|
- Verify existing light/dark mode toggle still works
|
||||||
|
- Verify existing theme persistence still works
|
||||||
|
- Test that default theme matches current behavior
|
||||||
|
- Verify existing conversations display correctly
|
||||||
|
- Test that all UI elements are styled correctly
|
||||||
|
</regression_tests>
|
||||||
|
|
||||||
|
<feature_tests>
|
||||||
|
- Test each accent color preset
|
||||||
|
- Test each font size option
|
||||||
|
- Test each spacing option
|
||||||
|
- Test theme preview functionality
|
||||||
|
- Test theme persistence (localStorage/backend)
|
||||||
|
- Test theme reset to defaults
|
||||||
|
- Test theme with both light and dark modes
|
||||||
|
- Test theme changes in real-time
|
||||||
|
</feature_tests>
|
||||||
|
|
||||||
|
<compatibility_tests>
|
||||||
|
- Test with different browsers (Chrome, Firefox, Safari, Edge)
|
||||||
|
- Test with different screen sizes (responsive design)
|
||||||
|
- Test with long conversations
|
||||||
|
- Test with different message types (text, code, artifacts)
|
||||||
|
- Test accessibility with screen readers
|
||||||
|
</compatibility_tests>
|
||||||
|
</testing_requirements>
|
||||||
|
|
||||||
|
<success_criteria>
|
||||||
|
<functionality>
|
||||||
|
- Users can customize accent colors from preset options
|
||||||
|
- Users can adjust global font size (small, medium, large)
|
||||||
|
- Users can adjust message spacing (compact, comfortable, spacious)
|
||||||
|
- Theme preview shows changes in real-time
|
||||||
|
- Theme preferences persist across sessions
|
||||||
|
- Existing light/dark mode functionality works unchanged
|
||||||
|
- All theme options work together harmoniously
|
||||||
|
</functionality>
|
||||||
|
|
||||||
|
<user_experience>
|
||||||
|
- Theme customization is intuitive and easy to use
|
||||||
|
- Preview provides clear feedback before applying changes
|
||||||
|
- Changes apply smoothly without flickering
|
||||||
|
- Settings are easy to find and access
|
||||||
|
- Reset to defaults is easily accessible
|
||||||
|
</user_experience>
|
||||||
|
|
||||||
|
<technical>
|
||||||
|
- Code is well-organized and maintainable
|
||||||
|
- CSS custom properties are used consistently
|
||||||
|
- Theme preferences are stored reliably
|
||||||
|
- No performance degradation with theme changes
|
||||||
|
- Backward compatibility is maintained
|
||||||
|
</technical>
|
||||||
|
</success_criteria>
|
||||||
|
</project_specification>
|
||||||
304
prompts/coding_prompt.md
Normal file
304
prompts/coding_prompt.md
Normal file
@@ -0,0 +1,304 @@
|
|||||||
|
## YOUR ROLE - CODING AGENT
|
||||||
|
|
||||||
|
You are continuing work on a long-running autonomous development task.
|
||||||
|
This is a FRESH context window - you have no memory of previous sessions.
|
||||||
|
|
||||||
|
You have access to Linear for project management via MCP tools. Linear is your
|
||||||
|
single source of truth for what needs to be built and what's been completed.
|
||||||
|
|
||||||
|
### STEP 1: GET YOUR BEARINGS (MANDATORY)
|
||||||
|
|
||||||
|
Start by orienting yourself:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. See your working directory
|
||||||
|
pwd
|
||||||
|
|
||||||
|
# 2. List files to understand project structure
|
||||||
|
ls -la
|
||||||
|
|
||||||
|
# 3. Read the project specification to understand what you're building
|
||||||
|
cat app_spec.txt
|
||||||
|
|
||||||
|
# 4. Read the Linear project state
|
||||||
|
cat .linear_project.json
|
||||||
|
|
||||||
|
# 5. Check recent git history
|
||||||
|
git log --oneline -20
|
||||||
|
```
|
||||||
|
|
||||||
|
Understanding the `app_spec.txt` is critical - it contains the full requirements
|
||||||
|
for the application you're building.
|
||||||
|
|
||||||
|
### STEP 2: CHECK LINEAR STATUS
|
||||||
|
|
||||||
|
Query Linear to understand current project state. The `.linear_project.json` file
|
||||||
|
contains the `project_id` and `team_id` you should use for all Linear queries.
|
||||||
|
|
||||||
|
1. **Find the META issue** for session context:
|
||||||
|
Use `mcp__linear__list_issues` with the project ID from `.linear_project.json`
|
||||||
|
and search for "[META] Project Progress Tracker".
|
||||||
|
Read the issue description and recent comments for context from previous sessions.
|
||||||
|
|
||||||
|
2. **Count progress:**
|
||||||
|
Use `mcp__linear__list_issues` with the project ID to get all issues, then count:
|
||||||
|
- Issues with status "Done" = completed
|
||||||
|
- Issues with status "Todo" = remaining
|
||||||
|
- Issues with status "In Progress" = currently being worked on
|
||||||
|
|
||||||
|
3. **Check for in-progress work:**
|
||||||
|
If any issue is "In Progress", that should be your first priority.
|
||||||
|
A previous session may have been interrupted.
|
||||||
|
|
||||||
|
### STEP 3: START SERVERS (IF NOT RUNNING)
|
||||||
|
|
||||||
|
If `init.sh` exists, run it:
|
||||||
|
```bash
|
||||||
|
chmod +x init.sh
|
||||||
|
./init.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Otherwise, start servers manually and document the process.
|
||||||
|
|
||||||
|
### STEP 4: VERIFICATION TEST (CRITICAL!)
|
||||||
|
|
||||||
|
**MANDATORY BEFORE NEW WORK:**
|
||||||
|
|
||||||
|
The previous session may have introduced bugs. Before implementing anything
|
||||||
|
new, you MUST run verification tests.
|
||||||
|
|
||||||
|
Use `mcp__linear__list_issues` with the project ID and status "Done" to find 1-2
|
||||||
|
completed features that are core to the app's functionality.
|
||||||
|
|
||||||
|
Test these through the browser using Puppeteer:
|
||||||
|
- Navigate to the feature
|
||||||
|
- Verify it still works as expected
|
||||||
|
- Take screenshots to confirm
|
||||||
|
|
||||||
|
**If you find ANY issues (functional or visual):**
|
||||||
|
- Use `mcp__linear__update_issue` to set status back to "In Progress"
|
||||||
|
- Add a comment explaining what broke
|
||||||
|
- Fix the issue BEFORE moving to new features
|
||||||
|
- This includes UI bugs like:
|
||||||
|
* White-on-white text or poor contrast
|
||||||
|
* Random characters displayed
|
||||||
|
* Incorrect timestamps
|
||||||
|
* Layout issues or overflow
|
||||||
|
* Buttons too close together
|
||||||
|
* Missing hover states
|
||||||
|
* Console errors
|
||||||
|
|
||||||
|
### STEP 5: SELECT NEXT ISSUE TO WORK ON
|
||||||
|
|
||||||
|
Use `mcp__linear__list_issues` with the project ID from `.linear_project.json`:
|
||||||
|
- Filter by `status`: "Todo"
|
||||||
|
- Sort by priority (1=urgent is highest)
|
||||||
|
- `limit`: 5
|
||||||
|
|
||||||
|
Review the highest-priority unstarted issues and select ONE to work on.
|
||||||
|
|
||||||
|
### STEP 6: CLAIM THE ISSUE
|
||||||
|
|
||||||
|
Before starting work, use `mcp__linear__update_issue` to:
|
||||||
|
- Set the issue's `status` to "In Progress"
|
||||||
|
|
||||||
|
This signals to any other agents (or humans watching) that this issue is being worked on.
|
||||||
|
|
||||||
|
### STEP 7: IMPLEMENT THE FEATURE
|
||||||
|
|
||||||
|
Read the issue description for test steps and implement accordingly:
|
||||||
|
|
||||||
|
1. Write the code (frontend and/or backend as needed)
|
||||||
|
2. Test manually using browser automation (see Step 8)
|
||||||
|
3. Fix any issues discovered
|
||||||
|
4. Verify the feature works end-to-end
|
||||||
|
|
||||||
|
### STEP 8: VERIFY WITH BROWSER AUTOMATION
|
||||||
|
|
||||||
|
**CRITICAL:** You MUST verify features through the actual UI.
|
||||||
|
|
||||||
|
Use browser automation tools:
|
||||||
|
- `mcp__puppeteer__puppeteer_navigate` - Start browser and go to URL
|
||||||
|
- `mcp__puppeteer__puppeteer_screenshot` - Capture screenshot
|
||||||
|
- `mcp__puppeteer__puppeteer_click` - Click elements
|
||||||
|
- `mcp__puppeteer__puppeteer_fill` - Fill form inputs
|
||||||
|
|
||||||
|
**DO:**
|
||||||
|
- Test through the UI with clicks and keyboard input
|
||||||
|
- Take screenshots to verify visual appearance
|
||||||
|
- Check for console errors in browser
|
||||||
|
- Verify complete user workflows end-to-end
|
||||||
|
|
||||||
|
**DON'T:**
|
||||||
|
- Only test with curl commands (backend testing alone is insufficient)
|
||||||
|
- Use JavaScript evaluation to bypass UI (no shortcuts)
|
||||||
|
- Skip visual verification
|
||||||
|
- Mark issues Done without thorough verification
|
||||||
|
|
||||||
|
### STEP 9: UPDATE LINEAR ISSUE (CAREFULLY!)
|
||||||
|
|
||||||
|
After thorough verification:
|
||||||
|
|
||||||
|
1. **Add implementation comment** using `mcp__linear__create_comment`:
|
||||||
|
```markdown
|
||||||
|
## Implementation Complete
|
||||||
|
|
||||||
|
### Changes Made
|
||||||
|
- [List of files changed]
|
||||||
|
- [Key implementation details]
|
||||||
|
|
||||||
|
### Verification
|
||||||
|
- Tested via Puppeteer browser automation
|
||||||
|
- Screenshots captured
|
||||||
|
- All test steps from issue description verified
|
||||||
|
|
||||||
|
### Git Commit
|
||||||
|
[commit hash and message]
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Update status** using `mcp__linear__update_issue`:
|
||||||
|
- Set `status` to "Done"
|
||||||
|
|
||||||
|
**ONLY update status to Done AFTER:**
|
||||||
|
- All test steps in the issue description pass
|
||||||
|
- Visual verification via screenshots
|
||||||
|
- No console errors
|
||||||
|
- Code committed to git
|
||||||
|
|
||||||
|
### STEP 10: COMMIT YOUR PROGRESS
|
||||||
|
|
||||||
|
Make a descriptive git commit:
|
||||||
|
```bash
|
||||||
|
git add .
|
||||||
|
git commit -m "Implement [feature name]
|
||||||
|
|
||||||
|
- Added [specific changes]
|
||||||
|
- Tested with browser automation
|
||||||
|
- Linear issue: [issue identifier]
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
### STEP 11: UPDATE META ISSUE
|
||||||
|
|
||||||
|
Add a comment to the "[META] Project Progress Tracker" issue with session summary:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Session Complete - [Brief description]
|
||||||
|
|
||||||
|
### Completed This Session
|
||||||
|
- [Issue title]: [Brief summary of implementation]
|
||||||
|
|
||||||
|
### Current Progress
|
||||||
|
- X issues Done
|
||||||
|
- Y issues In Progress
|
||||||
|
- Z issues remaining in Todo
|
||||||
|
|
||||||
|
### Verification Status
|
||||||
|
- Ran verification tests on [feature names]
|
||||||
|
- All previously completed features still working: [Yes/No]
|
||||||
|
|
||||||
|
### Notes for Next Session
|
||||||
|
- [Any important context]
|
||||||
|
- [Recommendations for what to work on next]
|
||||||
|
- [Any blockers or concerns]
|
||||||
|
```
|
||||||
|
|
||||||
|
### STEP 12: END SESSION CLEANLY
|
||||||
|
|
||||||
|
Before context fills up:
|
||||||
|
1. Commit all working code
|
||||||
|
2. If working on an issue you can't complete:
|
||||||
|
- Add a comment explaining progress and what's left
|
||||||
|
- Keep status as "In Progress" (don't revert to Todo)
|
||||||
|
3. Update META issue with session summary
|
||||||
|
4. Ensure no uncommitted changes
|
||||||
|
5. Leave app in working state (no broken features)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## LINEAR WORKFLOW RULES
|
||||||
|
|
||||||
|
**Status Transitions:**
|
||||||
|
- Todo → In Progress (when you start working)
|
||||||
|
- In Progress → Done (when verified complete)
|
||||||
|
- Done → In Progress (only if regression found)
|
||||||
|
|
||||||
|
**Comments Are Your Memory:**
|
||||||
|
- Every implementation gets a detailed comment
|
||||||
|
- Session handoffs happen via META issue comments
|
||||||
|
- Comments are permanent - future agents will read them
|
||||||
|
|
||||||
|
**NEVER:**
|
||||||
|
- Delete or archive issues
|
||||||
|
- Modify issue descriptions or test steps
|
||||||
|
- Work on issues already "In Progress" by someone else
|
||||||
|
- Mark "Done" without verification
|
||||||
|
- Leave issues "In Progress" when switching to another issue
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## TESTING REQUIREMENTS
|
||||||
|
|
||||||
|
**ALL testing must use browser automation tools.**
|
||||||
|
|
||||||
|
Available Puppeteer tools:
|
||||||
|
- `mcp__puppeteer__puppeteer_navigate` - Go to URL
|
||||||
|
- `mcp__puppeteer__puppeteer_screenshot` - Capture screenshot
|
||||||
|
- `mcp__puppeteer__puppeteer_click` - Click elements
|
||||||
|
- `mcp__puppeteer__puppeteer_fill` - Fill form inputs
|
||||||
|
- `mcp__puppeteer__puppeteer_select` - Select dropdown options
|
||||||
|
- `mcp__puppeteer__puppeteer_hover` - Hover over elements
|
||||||
|
|
||||||
|
Test like a human user with mouse and keyboard. Don't take shortcuts.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## SESSION PACING
|
||||||
|
|
||||||
|
**How many issues should you complete per session?**
|
||||||
|
|
||||||
|
This depends on the project phase:
|
||||||
|
|
||||||
|
**Early phase (< 20% Done):** You may complete multiple issues per session when:
|
||||||
|
- Setting up infrastructure/scaffolding that unlocks many issues at once
|
||||||
|
- Fixing build issues that were blocking progress
|
||||||
|
- Auditing existing code and marking already-implemented features as Done
|
||||||
|
|
||||||
|
**Mid/Late phase (> 20% Done):** Slow down to **1-2 issues per session**:
|
||||||
|
- Each feature now requires focused implementation and testing
|
||||||
|
- Quality matters more than quantity
|
||||||
|
- Clean handoffs are critical
|
||||||
|
|
||||||
|
**After completing an issue, ask yourself:**
|
||||||
|
1. Is the app in a stable, working state right now?
|
||||||
|
2. Have I been working for a while? (You can't measure this precisely, but use judgment)
|
||||||
|
3. Would this be a good stopping point for handoff?
|
||||||
|
|
||||||
|
If yes to all three → proceed to Step 11 (session summary) and end cleanly.
|
||||||
|
If no → you may continue to the next issue, but **commit first** and stay aware.
|
||||||
|
|
||||||
|
**Golden rule:** It's always better to end a session cleanly with good handoff notes
|
||||||
|
than to start another issue and risk running out of context mid-implementation.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## IMPORTANT REMINDERS
|
||||||
|
|
||||||
|
**Your Goal:** Production-quality application with all Linear issues Done
|
||||||
|
|
||||||
|
**This Session's Goal:** Make meaningful progress with clean handoff
|
||||||
|
|
||||||
|
**Priority:** Fix regressions before implementing new features
|
||||||
|
|
||||||
|
**Quality Bar:**
|
||||||
|
- Zero console errors
|
||||||
|
- Polished UI matching the design in app_spec.txt
|
||||||
|
- All features work end-to-end through the UI
|
||||||
|
- Fast, responsive, professional
|
||||||
|
|
||||||
|
**Context is finite.** You cannot monitor your context usage, so err on the side
|
||||||
|
of ending sessions early with good handoff notes. The next agent will continue.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Begin by running Step 1 (Get Your Bearings).
|
||||||
187
prompts/initializer_bis_prompt.md
Normal file
187
prompts/initializer_bis_prompt.md
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
## YOUR ROLE - INITIALIZER BIS AGENT (Adding New Specifications)
|
||||||
|
|
||||||
|
You are an EXTENSION agent in a long-running autonomous development process.
|
||||||
|
Your job is to ADD NEW SPECIFICATIONS to an EXISTING project that has already been initialized.
|
||||||
|
|
||||||
|
**IMPORTANT:** This project already exists and has been initialized. You are NOT creating a new project.
|
||||||
|
You are ADDING new features based on a new specification file.
|
||||||
|
|
||||||
|
You have access to Linear for project management via MCP tools. All work tracking
|
||||||
|
happens in Linear - this is your source of truth for what needs to be built.
|
||||||
|
|
||||||
|
### FIRST: Understand the Existing Project
|
||||||
|
|
||||||
|
Start by reading the existing project state:
|
||||||
|
|
||||||
|
1. **Read `.linear_project.json`:**
|
||||||
|
```bash
|
||||||
|
cat .linear_project.json
|
||||||
|
```
|
||||||
|
This file contains:
|
||||||
|
- `project_id`: The Linear project ID (you'll use this for new issues)
|
||||||
|
- `team_id`: The team ID (you'll use this for new issues)
|
||||||
|
- `meta_issue_id`: The META issue ID (you'll add a comment here)
|
||||||
|
- `total_issues`: Current total number of issues
|
||||||
|
|
||||||
|
2. **Read the original `app_spec.txt`** (if it exists) to understand what was already built:
|
||||||
|
```bash
|
||||||
|
cat app_spec.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Check existing Linear issues** to understand what's already been done:
|
||||||
|
Use `mcp__linear__list_issues` with the project ID from `.linear_project.json`
|
||||||
|
to see existing issues and their statuses.
|
||||||
|
|
||||||
|
### SECOND: Read the New Specification File
|
||||||
|
|
||||||
|
Read the NEW specification file that was provided. This file contains the ADDITIONAL
|
||||||
|
features to be added to the existing project. The filename will be something like
|
||||||
|
`app_spec_new1.txt` or similar.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# List files to find the new spec file
|
||||||
|
ls -la *.txt
|
||||||
|
|
||||||
|
# Read the new specification file
|
||||||
|
cat app_spec_new*.txt
|
||||||
|
# (or whatever the filename is)
|
||||||
|
```
|
||||||
|
|
||||||
|
Read it carefully to understand what NEW features need to be added.
|
||||||
|
|
||||||
|
### CRITICAL TASK: Create NEW Linear Issues
|
||||||
|
|
||||||
|
Based on the NEW specification file, create NEW Linear issues for each NEW feature
|
||||||
|
using the `mcp__linear__create_issue` tool.
|
||||||
|
|
||||||
|
**IMPORTANT:**
|
||||||
|
- Use the EXISTING `project_id` and `team_id` from `.linear_project.json`
|
||||||
|
- Do NOT create a new Linear project
|
||||||
|
- Do NOT modify existing issues
|
||||||
|
- Only create NEW issues for the NEW features
|
||||||
|
|
||||||
|
**For each NEW feature, create an issue with:**
|
||||||
|
|
||||||
|
```
|
||||||
|
title: Brief feature name (e.g., "New Feature - Advanced search")
|
||||||
|
teamId: [Use the team ID from .linear_project.json]
|
||||||
|
projectId: [Use the project ID from .linear_project.json]
|
||||||
|
description: Markdown with feature details and test steps (see template below)
|
||||||
|
priority: 1-4 based on importance (1=urgent/foundational, 4=low/polish)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Issue Description Template:**
|
||||||
|
```markdown
|
||||||
|
## Feature Description
|
||||||
|
[Brief description of what this NEW feature does and why it matters]
|
||||||
|
|
||||||
|
## Category
|
||||||
|
[functional OR style]
|
||||||
|
|
||||||
|
## Test Steps
|
||||||
|
1. Navigate to [page/location]
|
||||||
|
2. [Specific action to perform]
|
||||||
|
3. [Another action]
|
||||||
|
4. Verify [expected result]
|
||||||
|
5. [Additional verification steps as needed]
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
- [ ] [Specific criterion 1]
|
||||||
|
- [ ] [Specific criterion 2]
|
||||||
|
- [ ] [Specific criterion 3]
|
||||||
|
|
||||||
|
## Note
|
||||||
|
This is a NEW feature added via initializer bis. It extends the existing application.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Requirements for NEW Linear Issues:**
|
||||||
|
- Create issues ONLY for NEW features from the new spec file
|
||||||
|
- Do NOT duplicate features that already exist
|
||||||
|
- Mix of functional and style features (note category in description)
|
||||||
|
- Order by priority: foundational features get priority 1-2, polish features get 3-4
|
||||||
|
- Include detailed test steps in each issue description
|
||||||
|
- All issues start in "Todo" status (default)
|
||||||
|
- Prefix issue titles with something like "[NEW]" if helpful to distinguish from existing issues
|
||||||
|
|
||||||
|
**Priority Guidelines:**
|
||||||
|
- Priority 1 (Urgent): Core infrastructure additions, critical new features
|
||||||
|
- Priority 2 (High): Important user-facing new features
|
||||||
|
- Priority 3 (Medium): Secondary new features, enhancements
|
||||||
|
- Priority 4 (Low): Polish, nice-to-haves, edge cases
|
||||||
|
|
||||||
|
**CRITICAL INSTRUCTION:**
|
||||||
|
Once created, issues can ONLY have their status changed (Todo → In Progress → Done).
|
||||||
|
Never delete issues, never modify descriptions after creation.
|
||||||
|
This ensures no functionality is missed across sessions.
|
||||||
|
|
||||||
|
### NEXT TASK: Update Linear Project State
|
||||||
|
|
||||||
|
Update the `.linear_project.json` file to reflect the new total number of issues:
|
||||||
|
|
||||||
|
1. Read the current `.linear_project.json`
|
||||||
|
2. Count how many NEW issues you created
|
||||||
|
3. Add that number to the existing `total_issues` count
|
||||||
|
4. Update the file with the new total
|
||||||
|
|
||||||
|
Example update:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"initialized": true,
|
||||||
|
"created_at": "[original timestamp]",
|
||||||
|
"team_id": "[existing team ID]",
|
||||||
|
"project_id": "[existing project ID]",
|
||||||
|
"project_name": "[existing project name]",
|
||||||
|
"meta_issue_id": "[existing meta issue ID]",
|
||||||
|
"total_issues": [original_count + new_issues_count],
|
||||||
|
"notes": "Project initialized by initializer agent. Extended by initializer bis with [X] new issues."
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### NEXT TASK: Update META Issue
|
||||||
|
|
||||||
|
Add a comment to the existing "[META] Project Progress Tracker" issue (use the `meta_issue_id`
|
||||||
|
from `.linear_project.json`) summarizing what you accomplished:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Initializer Bis Session Complete - New Specifications Added
|
||||||
|
|
||||||
|
### Accomplished
|
||||||
|
- Read new specification file: [filename]
|
||||||
|
- Created [X] NEW Linear issues for additional features
|
||||||
|
- Updated .linear_project.json with new total issue count
|
||||||
|
- [Any other relevant information]
|
||||||
|
|
||||||
|
### New Issues Created
|
||||||
|
- Total new issues: [X]
|
||||||
|
- Priority 1: [X]
|
||||||
|
- Priority 2: [X]
|
||||||
|
- Priority 3: [X]
|
||||||
|
- Priority 4: [X]
|
||||||
|
|
||||||
|
### Updated Linear Status
|
||||||
|
- Previous total issues: [Y]
|
||||||
|
- New total issues: [Y + X]
|
||||||
|
- All new issues start in "Todo" status
|
||||||
|
|
||||||
|
### Notes for Next Session
|
||||||
|
- [Any important context about the new features]
|
||||||
|
- [Recommendations for what to work on next]
|
||||||
|
- [Any dependencies or integration points with existing features]
|
||||||
|
```
|
||||||
|
|
||||||
|
### ENDING THIS SESSION
|
||||||
|
|
||||||
|
Before your context fills up:
|
||||||
|
1. Commit all work with descriptive messages
|
||||||
|
2. Add a comment to the META issue (as described above)
|
||||||
|
3. Ensure `.linear_project.json` is updated with the new total
|
||||||
|
4. Leave the environment in a clean, working state
|
||||||
|
|
||||||
|
The next agent (coding agent) will continue from here with a fresh context window and will
|
||||||
|
see both the original issues and the new issues you created.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Remember:** You are EXTENDING an existing project, not creating a new one.
|
||||||
|
Focus on adding the new features cleanly without breaking existing functionality.
|
||||||
|
Production-ready integration is the goal.
|
||||||
202
prompts/initializer_prompt.md
Normal file
202
prompts/initializer_prompt.md
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
## YOUR ROLE - INITIALIZER AGENT (Session 1 of Many)
|
||||||
|
|
||||||
|
You are the FIRST agent in a long-running autonomous development process.
|
||||||
|
Your job is to set up the foundation for all future coding agents.
|
||||||
|
|
||||||
|
You have access to Linear for project management via MCP tools. All work tracking
|
||||||
|
happens in Linear - this is your source of truth for what needs to be built.
|
||||||
|
|
||||||
|
### FIRST: Read the Project Specification
|
||||||
|
|
||||||
|
Start by reading `app_spec.txt` in your working directory. This file contains
|
||||||
|
the complete specification for what you need to build. Read it carefully
|
||||||
|
before proceeding.
|
||||||
|
|
||||||
|
### SECOND: Set Up Linear Project
|
||||||
|
|
||||||
|
Before creating issues, you need to set up Linear:
|
||||||
|
|
||||||
|
1. **Get the team ID:**
|
||||||
|
Use `mcp__linear__list_teams` to see available teams.
|
||||||
|
Note the team ID (e.g., "TEAM-123") for the team where you'll create issues.
|
||||||
|
|
||||||
|
2. **Create a Linear project:**
|
||||||
|
Use `mcp__linear__create_project` to create a new project:
|
||||||
|
- `name`: Use the project name from app_spec.txt (e.g., "Claude.ai Clone")
|
||||||
|
- `teamIds`: Array with your team ID
|
||||||
|
- `description`: Brief project overview from app_spec.txt
|
||||||
|
|
||||||
|
Save the returned project ID - you'll use it when creating issues.
|
||||||
|
|
||||||
|
### CRITICAL TASK: Create Linear Issues
|
||||||
|
|
||||||
|
Based on `app_spec.txt`, create Linear issues for each feature using the
|
||||||
|
`mcp__linear__create_issue` tool. Create 50 detailed issues that
|
||||||
|
comprehensively cover all features in the spec.
|
||||||
|
|
||||||
|
**For each feature, create an issue with:**
|
||||||
|
|
||||||
|
```
|
||||||
|
title: Brief feature name (e.g., "Auth - User login flow")
|
||||||
|
teamId: [Use the team ID you found earlier]
|
||||||
|
projectId: [Use the project ID from the project you created]
|
||||||
|
description: Markdown with feature details and test steps (see template below)
|
||||||
|
priority: 1-4 based on importance (1=urgent/foundational, 4=low/polish)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Issue Description Template:**
|
||||||
|
```markdown
|
||||||
|
## Feature Description
|
||||||
|
[Brief description of what this feature does and why it matters]
|
||||||
|
|
||||||
|
## Category
|
||||||
|
[functional OR style]
|
||||||
|
|
||||||
|
## Test Steps
|
||||||
|
1. Navigate to [page/location]
|
||||||
|
2. [Specific action to perform]
|
||||||
|
3. [Another action]
|
||||||
|
4. Verify [expected result]
|
||||||
|
5. [Additional verification steps as needed]
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
- [ ] [Specific criterion 1]
|
||||||
|
- [ ] [Specific criterion 2]
|
||||||
|
- [ ] [Specific criterion 3]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Requirements for Linear Issues:**
|
||||||
|
- Create 50 issues total covering all features in the spec
|
||||||
|
- Mix of functional and style features (note category in description)
|
||||||
|
- Order by priority: foundational features get priority 1-2, polish features get 3-4
|
||||||
|
- Include detailed test steps in each issue description
|
||||||
|
- All issues start in "Todo" status (default)
|
||||||
|
|
||||||
|
**Priority Guidelines:**
|
||||||
|
- Priority 1 (Urgent): Core infrastructure, database, basic UI layout
|
||||||
|
- Priority 2 (High): Primary user-facing features, authentication
|
||||||
|
- Priority 3 (Medium): Secondary features, enhancements
|
||||||
|
- Priority 4 (Low): Polish, nice-to-haves, edge cases
|
||||||
|
|
||||||
|
**CRITICAL INSTRUCTION:**
|
||||||
|
Once created, issues can ONLY have their status changed (Todo → In Progress → Done).
|
||||||
|
Never delete issues, never modify descriptions after creation.
|
||||||
|
This ensures no functionality is missed across sessions.
|
||||||
|
|
||||||
|
### NEXT TASK: Create Meta Issue for Session Tracking
|
||||||
|
|
||||||
|
Create a special issue titled "[META] Project Progress Tracker" with:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Project Overview
|
||||||
|
[Copy the project name and brief overview from app_spec.txt]
|
||||||
|
|
||||||
|
## Session Tracking
|
||||||
|
This issue is used for session handoff between coding agents.
|
||||||
|
Each agent should add a comment summarizing their session.
|
||||||
|
|
||||||
|
## Key Milestones
|
||||||
|
- [ ] Project setup complete
|
||||||
|
- [ ] Core infrastructure working
|
||||||
|
- [ ] Primary features implemented
|
||||||
|
- [ ] All features complete
|
||||||
|
- [ ] Polish and refinement done
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
[Any important context about the project]
|
||||||
|
```
|
||||||
|
|
||||||
|
This META issue will be used by all future agents to:
|
||||||
|
- Read context from previous sessions (via comments)
|
||||||
|
- Write session summaries before ending
|
||||||
|
- Track overall project milestones
|
||||||
|
|
||||||
|
### NEXT TASK: Create init.sh
|
||||||
|
|
||||||
|
Create a script called `init.sh` that future agents can use to quickly
|
||||||
|
set up and run the development environment. The script should:
|
||||||
|
|
||||||
|
1. Install any required dependencies
|
||||||
|
2. Start any necessary servers or services
|
||||||
|
3. Print helpful information about how to access the running application
|
||||||
|
|
||||||
|
Base the script on the technology stack specified in `app_spec.txt`.
|
||||||
|
|
||||||
|
### NEXT TASK: Initialize Git
|
||||||
|
|
||||||
|
Create a git repository and make your first commit with:
|
||||||
|
- init.sh (environment setup script)
|
||||||
|
- README.md (project overview and setup instructions)
|
||||||
|
- Any initial project structure files
|
||||||
|
|
||||||
|
Commit message: "Initial setup: project structure and init script"
|
||||||
|
|
||||||
|
### NEXT TASK: Create Project Structure
|
||||||
|
|
||||||
|
Set up the basic project structure based on what's specified in `app_spec.txt`.
|
||||||
|
This typically includes directories for frontend, backend, and any other
|
||||||
|
components mentioned in the spec.
|
||||||
|
|
||||||
|
### NEXT TASK: Save Linear Project State
|
||||||
|
|
||||||
|
Create a file called `.linear_project.json` with the following information:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"initialized": true,
|
||||||
|
"created_at": "[current timestamp]",
|
||||||
|
"team_id": "[ID of the team you used]",
|
||||||
|
"project_id": "[ID of the Linear project you created]",
|
||||||
|
"project_name": "[Name of the project from app_spec.txt]",
|
||||||
|
"meta_issue_id": "[ID of the META issue you created]",
|
||||||
|
"total_issues": 50,
|
||||||
|
"notes": "Project initialized by initializer agent"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This file tells future sessions that Linear has been set up.
|
||||||
|
|
||||||
|
### OPTIONAL: Start Implementation
|
||||||
|
|
||||||
|
If you have time remaining in this session, you may begin implementing
|
||||||
|
the highest-priority features. Remember:
|
||||||
|
- Use `mcp__linear__linear_search_issues` to find Todo issues with priority 1
|
||||||
|
- Use `mcp__linear__linear_update_issue` to set status to "In Progress"
|
||||||
|
- Work on ONE feature at a time
|
||||||
|
- Test thoroughly before marking status as "Done"
|
||||||
|
- Add a comment to the issue with implementation notes
|
||||||
|
- Commit your progress before session ends
|
||||||
|
|
||||||
|
### ENDING THIS SESSION
|
||||||
|
|
||||||
|
Before your context fills up:
|
||||||
|
1. Commit all work with descriptive messages
|
||||||
|
2. Add a comment to the META issue summarizing what you accomplished:
|
||||||
|
```markdown
|
||||||
|
## Session 1 Complete - Initialization
|
||||||
|
|
||||||
|
### Accomplished
|
||||||
|
- Created 50 Linear issues from app_spec.txt
|
||||||
|
- Set up project structure
|
||||||
|
- Created init.sh
|
||||||
|
- Initialized git repository
|
||||||
|
- [Any features started/completed]
|
||||||
|
|
||||||
|
### Linear Status
|
||||||
|
- Total issues: 50
|
||||||
|
- Done: X
|
||||||
|
- In Progress: Y
|
||||||
|
- Todo: Z
|
||||||
|
|
||||||
|
### Notes for Next Session
|
||||||
|
- [Any important context]
|
||||||
|
- [Recommendations for what to work on next]
|
||||||
|
```
|
||||||
|
3. Ensure `.linear_project.json` exists
|
||||||
|
4. Leave the environment in a clean, working state
|
||||||
|
|
||||||
|
The next agent will continue from here with a fresh context window.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Remember:** You have unlimited time across many sessions. Focus on
|
||||||
|
quality over speed. Production-ready is the goal.
|
||||||
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
claude-code-sdk>=0.0.25
|
||||||
359
security.py
Normal file
359
security.py
Normal file
@@ -0,0 +1,359 @@
|
|||||||
|
"""
|
||||||
|
Security Hooks for Autonomous Coding Agent
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
Pre-tool-use hooks that validate bash commands for security.
|
||||||
|
Uses an allowlist approach - only explicitly permitted commands can run.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shlex
|
||||||
|
|
||||||
|
|
||||||
|
# Allowed commands for development tasks
|
||||||
|
# Minimal set needed for the autonomous coding demo
|
||||||
|
ALLOWED_COMMANDS = {
|
||||||
|
# File inspection
|
||||||
|
"ls",
|
||||||
|
"cat",
|
||||||
|
"head",
|
||||||
|
"tail",
|
||||||
|
"wc",
|
||||||
|
"grep",
|
||||||
|
# File operations (agent uses SDK tools for most file ops, but cp/mkdir needed occasionally)
|
||||||
|
"cp",
|
||||||
|
"mkdir",
|
||||||
|
"chmod", # For making scripts executable; validated separately
|
||||||
|
# Directory
|
||||||
|
"pwd",
|
||||||
|
# Node.js development
|
||||||
|
"npm",
|
||||||
|
"node",
|
||||||
|
# Version control
|
||||||
|
"git",
|
||||||
|
# Process management
|
||||||
|
"ps",
|
||||||
|
"lsof",
|
||||||
|
"sleep",
|
||||||
|
"pkill", # For killing dev servers; validated separately
|
||||||
|
# Script execution
|
||||||
|
"init.sh", # Init scripts; validated separately
|
||||||
|
}
|
||||||
|
|
||||||
|
# Commands that need additional validation even when in the allowlist
|
||||||
|
COMMANDS_NEEDING_EXTRA_VALIDATION = {"pkill", "chmod", "init.sh"}
|
||||||
|
|
||||||
|
|
||||||
|
def split_command_segments(command_string: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
Split a compound command into individual command segments.
|
||||||
|
|
||||||
|
Handles command chaining (&&, ||, ;) but not pipes (those are single commands).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
command_string: The full shell command
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of individual command segments
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Split on && and || while preserving the ability to handle each segment
|
||||||
|
# This regex splits on && or || that aren't inside quotes
|
||||||
|
segments = re.split(r"\s*(?:&&|\|\|)\s*", command_string)
|
||||||
|
|
||||||
|
# Further split on semicolons
|
||||||
|
result = []
|
||||||
|
for segment in segments:
|
||||||
|
sub_segments = re.split(r'(?<!["\'])\s*;\s*(?!["\'])', segment)
|
||||||
|
for sub in sub_segments:
|
||||||
|
sub = sub.strip()
|
||||||
|
if sub:
|
||||||
|
result.append(sub)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def extract_commands(command_string: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
Extract command names from a shell command string.
|
||||||
|
|
||||||
|
Handles pipes, command chaining (&&, ||, ;), and subshells.
|
||||||
|
Returns the base command names (without paths).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
command_string: The full shell command
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of command names found in the string
|
||||||
|
"""
|
||||||
|
commands = []
|
||||||
|
|
||||||
|
# shlex doesn't treat ; as a separator, so we need to pre-process
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Split on semicolons that aren't inside quotes (simple heuristic)
|
||||||
|
# This handles common cases like "echo hello; ls"
|
||||||
|
segments = re.split(r'(?<!["\'])\s*;\s*(?!["\'])', command_string)
|
||||||
|
|
||||||
|
for segment in segments:
|
||||||
|
segment = segment.strip()
|
||||||
|
if not segment:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
tokens = shlex.split(segment)
|
||||||
|
except ValueError:
|
||||||
|
# Malformed command (unclosed quotes, etc.)
|
||||||
|
# Return empty to trigger block (fail-safe)
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not tokens:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Track when we expect a command vs arguments
|
||||||
|
expect_command = True
|
||||||
|
|
||||||
|
for token in tokens:
|
||||||
|
# Shell operators indicate a new command follows
|
||||||
|
if token in ("|", "||", "&&", "&"):
|
||||||
|
expect_command = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip shell keywords that precede commands
|
||||||
|
if token in (
|
||||||
|
"if",
|
||||||
|
"then",
|
||||||
|
"else",
|
||||||
|
"elif",
|
||||||
|
"fi",
|
||||||
|
"for",
|
||||||
|
"while",
|
||||||
|
"until",
|
||||||
|
"do",
|
||||||
|
"done",
|
||||||
|
"case",
|
||||||
|
"esac",
|
||||||
|
"in",
|
||||||
|
"!",
|
||||||
|
"{",
|
||||||
|
"}",
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip flags/options
|
||||||
|
if token.startswith("-"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip variable assignments (VAR=value)
|
||||||
|
if "=" in token and not token.startswith("="):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if expect_command:
|
||||||
|
# Extract the base command name (handle paths like /usr/bin/python)
|
||||||
|
cmd = os.path.basename(token)
|
||||||
|
commands.append(cmd)
|
||||||
|
expect_command = False
|
||||||
|
|
||||||
|
return commands
|
||||||
|
|
||||||
|
|
||||||
|
def validate_pkill_command(command_string: str) -> tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Validate pkill commands - only allow killing dev-related processes.
|
||||||
|
|
||||||
|
Uses shlex to parse the command, avoiding regex bypass vulnerabilities.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_allowed, reason_if_blocked)
|
||||||
|
"""
|
||||||
|
# Allowed process names for pkill
|
||||||
|
allowed_process_names = {
|
||||||
|
"node",
|
||||||
|
"npm",
|
||||||
|
"npx",
|
||||||
|
"vite",
|
||||||
|
"next",
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
tokens = shlex.split(command_string)
|
||||||
|
except ValueError:
|
||||||
|
return False, "Could not parse pkill command"
|
||||||
|
|
||||||
|
if not tokens:
|
||||||
|
return False, "Empty pkill command"
|
||||||
|
|
||||||
|
# Separate flags from arguments
|
||||||
|
args = []
|
||||||
|
for token in tokens[1:]:
|
||||||
|
if not token.startswith("-"):
|
||||||
|
args.append(token)
|
||||||
|
|
||||||
|
if not args:
|
||||||
|
return False, "pkill requires a process name"
|
||||||
|
|
||||||
|
# The target is typically the last non-flag argument
|
||||||
|
target = args[-1]
|
||||||
|
|
||||||
|
# For -f flag (full command line match), extract the first word as process name
|
||||||
|
# e.g., "pkill -f 'node server.js'" -> target is "node server.js", process is "node"
|
||||||
|
if " " in target:
|
||||||
|
target = target.split()[0]
|
||||||
|
|
||||||
|
if target in allowed_process_names:
|
||||||
|
return True, ""
|
||||||
|
return False, f"pkill only allowed for dev processes: {allowed_process_names}"
|
||||||
|
|
||||||
|
|
||||||
|
def validate_chmod_command(command_string: str) -> tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Validate chmod commands - only allow making files executable with +x.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_allowed, reason_if_blocked)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
tokens = shlex.split(command_string)
|
||||||
|
except ValueError:
|
||||||
|
return False, "Could not parse chmod command"
|
||||||
|
|
||||||
|
if not tokens or tokens[0] != "chmod":
|
||||||
|
return False, "Not a chmod command"
|
||||||
|
|
||||||
|
# Look for the mode argument
|
||||||
|
# Valid modes: +x, u+x, a+x, etc. (anything ending with +x for execute permission)
|
||||||
|
mode = None
|
||||||
|
files = []
|
||||||
|
|
||||||
|
for token in tokens[1:]:
|
||||||
|
if token.startswith("-"):
|
||||||
|
# Skip flags like -R (we don't allow recursive chmod anyway)
|
||||||
|
return False, "chmod flags are not allowed"
|
||||||
|
elif mode is None:
|
||||||
|
mode = token
|
||||||
|
else:
|
||||||
|
files.append(token)
|
||||||
|
|
||||||
|
if mode is None:
|
||||||
|
return False, "chmod requires a mode"
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
return False, "chmod requires at least one file"
|
||||||
|
|
||||||
|
# Only allow +x variants (making files executable)
|
||||||
|
# This matches: +x, u+x, g+x, o+x, a+x, ug+x, etc.
|
||||||
|
import re
|
||||||
|
|
||||||
|
if not re.match(r"^[ugoa]*\+x$", mode):
|
||||||
|
return False, f"chmod only allowed with +x mode, got: {mode}"
|
||||||
|
|
||||||
|
return True, ""
|
||||||
|
|
||||||
|
|
||||||
|
def validate_init_script(command_string: str) -> tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Validate init.sh script execution - only allow ./init.sh.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_allowed, reason_if_blocked)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
tokens = shlex.split(command_string)
|
||||||
|
except ValueError:
|
||||||
|
return False, "Could not parse init script command"
|
||||||
|
|
||||||
|
if not tokens:
|
||||||
|
return False, "Empty command"
|
||||||
|
|
||||||
|
# The command should be exactly ./init.sh (possibly with arguments)
|
||||||
|
script = tokens[0]
|
||||||
|
|
||||||
|
# Allow ./init.sh or paths ending in /init.sh
|
||||||
|
if script == "./init.sh" or script.endswith("/init.sh"):
|
||||||
|
return True, ""
|
||||||
|
|
||||||
|
return False, f"Only ./init.sh is allowed, got: {script}"
|
||||||
|
|
||||||
|
|
||||||
|
def get_command_for_validation(cmd: str, segments: list[str]) -> str:
|
||||||
|
"""
|
||||||
|
Find the specific command segment that contains the given command.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cmd: The command name to find
|
||||||
|
segments: List of command segments
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The segment containing the command, or empty string if not found
|
||||||
|
"""
|
||||||
|
for segment in segments:
|
||||||
|
segment_commands = extract_commands(segment)
|
||||||
|
if cmd in segment_commands:
|
||||||
|
return segment
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
async def bash_security_hook(input_data, tool_use_id=None, context=None):
|
||||||
|
"""
|
||||||
|
Pre-tool-use hook that validates bash commands using an allowlist.
|
||||||
|
|
||||||
|
Only commands in ALLOWED_COMMANDS are permitted.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_data: Dict containing tool_name and tool_input
|
||||||
|
tool_use_id: Optional tool use ID
|
||||||
|
context: Optional context
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Empty dict to allow, or {"decision": "block", "reason": "..."} to block
|
||||||
|
"""
|
||||||
|
if input_data.get("tool_name") != "Bash":
|
||||||
|
return {}
|
||||||
|
|
||||||
|
command = input_data.get("tool_input", {}).get("command", "")
|
||||||
|
if not command:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Extract all commands from the command string
|
||||||
|
commands = extract_commands(command)
|
||||||
|
|
||||||
|
if not commands:
|
||||||
|
# Could not parse - fail safe by blocking
|
||||||
|
return {
|
||||||
|
"decision": "block",
|
||||||
|
"reason": f"Could not parse command for security validation: {command}",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Split into segments for per-command validation
|
||||||
|
segments = split_command_segments(command)
|
||||||
|
|
||||||
|
# Check each command against the allowlist
|
||||||
|
for cmd in commands:
|
||||||
|
if cmd not in ALLOWED_COMMANDS:
|
||||||
|
return {
|
||||||
|
"decision": "block",
|
||||||
|
"reason": f"Command '{cmd}' is not in the allowed commands list",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Additional validation for sensitive commands
|
||||||
|
if cmd in COMMANDS_NEEDING_EXTRA_VALIDATION:
|
||||||
|
# Find the specific segment containing this command
|
||||||
|
cmd_segment = get_command_for_validation(cmd, segments)
|
||||||
|
if not cmd_segment:
|
||||||
|
cmd_segment = command # Fallback to full command
|
||||||
|
|
||||||
|
if cmd == "pkill":
|
||||||
|
allowed, reason = validate_pkill_command(cmd_segment)
|
||||||
|
if not allowed:
|
||||||
|
return {"decision": "block", "reason": reason}
|
||||||
|
elif cmd == "chmod":
|
||||||
|
allowed, reason = validate_chmod_command(cmd_segment)
|
||||||
|
if not allowed:
|
||||||
|
return {"decision": "block", "reason": reason}
|
||||||
|
elif cmd == "init.sh":
|
||||||
|
allowed, reason = validate_init_script(cmd_segment)
|
||||||
|
if not allowed:
|
||||||
|
return {"decision": "block", "reason": reason}
|
||||||
|
|
||||||
|
return {}
|
||||||
290
test_security.py
Normal file
290
test_security.py
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Security Hook Tests
|
||||||
|
===================
|
||||||
|
|
||||||
|
Tests for the bash command security validation logic.
|
||||||
|
Run with: python test_security.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from security import (
|
||||||
|
bash_security_hook,
|
||||||
|
extract_commands,
|
||||||
|
validate_chmod_command,
|
||||||
|
validate_init_script,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_hook(command: str, should_block: bool) -> bool:
|
||||||
|
"""Test a single command against the security hook."""
|
||||||
|
input_data = {"tool_name": "Bash", "tool_input": {"command": command}}
|
||||||
|
result = asyncio.run(bash_security_hook(input_data))
|
||||||
|
was_blocked = result.get("decision") == "block"
|
||||||
|
|
||||||
|
if was_blocked == should_block:
|
||||||
|
status = "PASS"
|
||||||
|
else:
|
||||||
|
status = "FAIL"
|
||||||
|
expected = "blocked" if should_block else "allowed"
|
||||||
|
actual = "blocked" if was_blocked else "allowed"
|
||||||
|
reason = result.get("reason", "")
|
||||||
|
print(f" {status}: {command!r}")
|
||||||
|
print(f" Expected: {expected}, Got: {actual}")
|
||||||
|
if reason:
|
||||||
|
print(f" Reason: {reason}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print(f" {status}: {command!r}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_commands():
|
||||||
|
"""Test the command extraction logic."""
|
||||||
|
print("\nTesting command extraction:\n")
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
test_cases = [
|
||||||
|
("ls -la", ["ls"]),
|
||||||
|
("npm install && npm run build", ["npm", "npm"]),
|
||||||
|
("cat file.txt | grep pattern", ["cat", "grep"]),
|
||||||
|
("/usr/bin/node script.js", ["node"]),
|
||||||
|
("VAR=value ls", ["ls"]),
|
||||||
|
("git status || git init", ["git", "git"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
for cmd, expected in test_cases:
|
||||||
|
result = extract_commands(cmd)
|
||||||
|
if result == expected:
|
||||||
|
print(f" PASS: {cmd!r} -> {result}")
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
print(f" FAIL: {cmd!r}")
|
||||||
|
print(f" Expected: {expected}, Got: {result}")
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
return passed, failed
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_chmod():
|
||||||
|
"""Test chmod command validation."""
|
||||||
|
print("\nTesting chmod validation:\n")
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
# Test cases: (command, should_be_allowed, description)
|
||||||
|
test_cases = [
|
||||||
|
# Allowed cases
|
||||||
|
("chmod +x init.sh", True, "basic +x"),
|
||||||
|
("chmod +x script.sh", True, "+x on any script"),
|
||||||
|
("chmod u+x init.sh", True, "user +x"),
|
||||||
|
("chmod a+x init.sh", True, "all +x"),
|
||||||
|
("chmod ug+x init.sh", True, "user+group +x"),
|
||||||
|
("chmod +x file1.sh file2.sh", True, "multiple files"),
|
||||||
|
# Blocked cases
|
||||||
|
("chmod 777 init.sh", False, "numeric mode"),
|
||||||
|
("chmod 755 init.sh", False, "numeric mode 755"),
|
||||||
|
("chmod +w init.sh", False, "write permission"),
|
||||||
|
("chmod +r init.sh", False, "read permission"),
|
||||||
|
("chmod -x init.sh", False, "remove execute"),
|
||||||
|
("chmod -R +x dir/", False, "recursive flag"),
|
||||||
|
("chmod --recursive +x dir/", False, "long recursive flag"),
|
||||||
|
("chmod +x", False, "missing file"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for cmd, should_allow, description in test_cases:
|
||||||
|
allowed, reason = validate_chmod_command(cmd)
|
||||||
|
if allowed == should_allow:
|
||||||
|
print(f" PASS: {cmd!r} ({description})")
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
expected = "allowed" if should_allow else "blocked"
|
||||||
|
actual = "allowed" if allowed else "blocked"
|
||||||
|
print(f" FAIL: {cmd!r} ({description})")
|
||||||
|
print(f" Expected: {expected}, Got: {actual}")
|
||||||
|
if reason:
|
||||||
|
print(f" Reason: {reason}")
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
return passed, failed
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_init_script():
|
||||||
|
"""Test init.sh script execution validation."""
|
||||||
|
print("\nTesting init.sh validation:\n")
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
# Test cases: (command, should_be_allowed, description)
|
||||||
|
test_cases = [
|
||||||
|
# Allowed cases
|
||||||
|
("./init.sh", True, "basic ./init.sh"),
|
||||||
|
("./init.sh arg1 arg2", True, "with arguments"),
|
||||||
|
("/path/to/init.sh", True, "absolute path"),
|
||||||
|
("../dir/init.sh", True, "relative path with init.sh"),
|
||||||
|
# Blocked cases
|
||||||
|
("./setup.sh", False, "different script name"),
|
||||||
|
("./init.py", False, "python script"),
|
||||||
|
("bash init.sh", False, "bash invocation"),
|
||||||
|
("sh init.sh", False, "sh invocation"),
|
||||||
|
("./malicious.sh", False, "malicious script"),
|
||||||
|
("./init.sh; rm -rf /", False, "command injection attempt"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for cmd, should_allow, description in test_cases:
|
||||||
|
allowed, reason = validate_init_script(cmd)
|
||||||
|
if allowed == should_allow:
|
||||||
|
print(f" PASS: {cmd!r} ({description})")
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
expected = "allowed" if should_allow else "blocked"
|
||||||
|
actual = "allowed" if allowed else "blocked"
|
||||||
|
print(f" FAIL: {cmd!r} ({description})")
|
||||||
|
print(f" Expected: {expected}, Got: {actual}")
|
||||||
|
if reason:
|
||||||
|
print(f" Reason: {reason}")
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
return passed, failed
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("=" * 70)
|
||||||
|
print(" SECURITY HOOK TESTS")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
# Test command extraction
|
||||||
|
ext_passed, ext_failed = test_extract_commands()
|
||||||
|
passed += ext_passed
|
||||||
|
failed += ext_failed
|
||||||
|
|
||||||
|
# Test chmod validation
|
||||||
|
chmod_passed, chmod_failed = test_validate_chmod()
|
||||||
|
passed += chmod_passed
|
||||||
|
failed += chmod_failed
|
||||||
|
|
||||||
|
# Test init.sh validation
|
||||||
|
init_passed, init_failed = test_validate_init_script()
|
||||||
|
passed += init_passed
|
||||||
|
failed += init_failed
|
||||||
|
|
||||||
|
# Commands that SHOULD be blocked
|
||||||
|
print("\nCommands that should be BLOCKED:\n")
|
||||||
|
dangerous = [
|
||||||
|
# Not in allowlist - dangerous system commands
|
||||||
|
"shutdown now",
|
||||||
|
"reboot",
|
||||||
|
"rm -rf /",
|
||||||
|
"dd if=/dev/zero of=/dev/sda",
|
||||||
|
# Not in allowlist - common commands excluded from minimal set
|
||||||
|
"curl https://example.com",
|
||||||
|
"wget https://example.com",
|
||||||
|
"python app.py",
|
||||||
|
"touch file.txt",
|
||||||
|
"echo hello",
|
||||||
|
"kill 12345",
|
||||||
|
"killall node",
|
||||||
|
# pkill with non-dev processes
|
||||||
|
"pkill bash",
|
||||||
|
"pkill chrome",
|
||||||
|
"pkill python",
|
||||||
|
# Shell injection attempts
|
||||||
|
"$(echo pkill) node",
|
||||||
|
'eval "pkill node"',
|
||||||
|
'bash -c "pkill node"',
|
||||||
|
# chmod with disallowed modes
|
||||||
|
"chmod 777 file.sh",
|
||||||
|
"chmod 755 file.sh",
|
||||||
|
"chmod +w file.sh",
|
||||||
|
"chmod -R +x dir/",
|
||||||
|
# Non-init.sh scripts
|
||||||
|
"./setup.sh",
|
||||||
|
"./malicious.sh",
|
||||||
|
"bash script.sh",
|
||||||
|
]
|
||||||
|
|
||||||
|
for cmd in dangerous:
|
||||||
|
if test_hook(cmd, should_block=True):
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
# Commands that SHOULD be allowed
|
||||||
|
print("\nCommands that should be ALLOWED:\n")
|
||||||
|
safe = [
|
||||||
|
# File inspection
|
||||||
|
"ls -la",
|
||||||
|
"cat README.md",
|
||||||
|
"head -100 file.txt",
|
||||||
|
"tail -20 log.txt",
|
||||||
|
"wc -l file.txt",
|
||||||
|
"grep -r pattern src/",
|
||||||
|
# File operations
|
||||||
|
"cp file1.txt file2.txt",
|
||||||
|
"mkdir newdir",
|
||||||
|
"mkdir -p path/to/dir",
|
||||||
|
# Directory
|
||||||
|
"pwd",
|
||||||
|
# Node.js development
|
||||||
|
"npm install",
|
||||||
|
"npm run build",
|
||||||
|
"node server.js",
|
||||||
|
# Version control
|
||||||
|
"git status",
|
||||||
|
"git commit -m 'test'",
|
||||||
|
"git add . && git commit -m 'msg'",
|
||||||
|
# Process management
|
||||||
|
"ps aux",
|
||||||
|
"lsof -i :3000",
|
||||||
|
"sleep 2",
|
||||||
|
# Allowed pkill patterns for dev servers
|
||||||
|
"pkill node",
|
||||||
|
"pkill npm",
|
||||||
|
"pkill -f node",
|
||||||
|
"pkill -f 'node server.js'",
|
||||||
|
"pkill vite",
|
||||||
|
# Chained commands
|
||||||
|
"npm install && npm run build",
|
||||||
|
"ls | grep test",
|
||||||
|
# Full paths
|
||||||
|
"/usr/local/bin/node app.js",
|
||||||
|
# chmod +x (allowed)
|
||||||
|
"chmod +x init.sh",
|
||||||
|
"chmod +x script.sh",
|
||||||
|
"chmod u+x init.sh",
|
||||||
|
"chmod a+x init.sh",
|
||||||
|
# init.sh execution (allowed)
|
||||||
|
"./init.sh",
|
||||||
|
"./init.sh --production",
|
||||||
|
"/path/to/init.sh",
|
||||||
|
# Combined chmod and init.sh
|
||||||
|
"chmod +x init.sh && ./init.sh",
|
||||||
|
]
|
||||||
|
|
||||||
|
for cmd in safe:
|
||||||
|
if test_hook(cmd, should_block=False):
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
print("\n" + "-" * 70)
|
||||||
|
print(f" Results: {passed} passed, {failed} failed")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
if failed == 0:
|
||||||
|
print("\n ALL TESTS PASSED")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"\n {failed} TEST(S) FAILED")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
Reference in New Issue
Block a user