Merge branch 'main' of https://github.com/davidblanc347/linear-coding-agent
This commit is contained in:
@@ -6,38 +6,64 @@
|
||||
"Bash(netstat:*)",
|
||||
"Bash(docker-compose:*)",
|
||||
"Bash(ls:*)",
|
||||
"Bash(git:*)",
|
||||
"Bash(rm:*)",
|
||||
"Bash(python autonomous_agent_demo.py:*)",
|
||||
"Bash(dir C:GitHublinear_coding_philosophia_raggenerationslibrary_rag*.py)",
|
||||
"Bash(git add:*)",
|
||||
"Bash(git commit -m \"$\\(cat <<''EOF''\nFix import error: rename delete_document_passages to delete_document_chunks\n\nThe function was renamed in weaviate_ingest.py but the import in __init__.py\nwas not updated, causing ImportError when using the library.\n\nChanges:\n- Updated import statement in utils/__init__.py\n- Updated __all__ export list to use correct function name\nEOF\n\\)\")",
|
||||
"Bash(dir \"C:\\\\GitHub\\\\linear_coding_philosophia_rag\\\\generations\\\\library_rag\\\\.env\")",
|
||||
"Bash(git commit:*)",
|
||||
"Bash(tasklist:*)",
|
||||
"Bash(findstr:*)",
|
||||
"Bash(wmic process:*)",
|
||||
"Bash(powershell -Command \"Get-Process python | Select-Object Id,Path,StartTime | Format-Table -AutoSize\")",
|
||||
"Bash(powershell -Command \"Get-WmiObject Win32_Process -Filter \"\"name = ''python.exe''\"\" | Select-Object ProcessId, CommandLine | Format-List\")",
|
||||
"Bash(timeout:*)",
|
||||
"Bash(powershell -Command:*)",
|
||||
"Bash(python:*)",
|
||||
"Bash(dir \"C:\\\\GitHub\\\\linear_coding_library_rag\\\\generations\\\\library_rag\")",
|
||||
"Bash(docker ps:*)",
|
||||
"Bash(docker logs:*)",
|
||||
"Bash(curl:*)",
|
||||
"Bash(dir:*)",
|
||||
"Bash(timeout 30 tail:*)",
|
||||
"Bash(xargs:*)",
|
||||
"Bash(npm run dev:*)",
|
||||
"Bash(npm run build:*)",
|
||||
"Bash(npm install:*)",
|
||||
"WebFetch(domain:docs.anthropic.com)",
|
||||
"WebFetch(domain:docs.claude.com)",
|
||||
"Bash(npm start)",
|
||||
"Bash(node test_extended_thinking.js:*)",
|
||||
"Bash(node test_screenshot.js:*)",
|
||||
"Bash(node test_thinking_badge.js:*)",
|
||||
"Bash(node test_thinking_badge_simple.js:*)",
|
||||
"Bash(node:*)",
|
||||
"Bash(npx sqlite3:*)",
|
||||
"Bash(taskkill:*)",
|
||||
"Bash(findstr:*)",
|
||||
"Bash(tee:*)",
|
||||
"Bash(grep:*)",
|
||||
"Bash(git push:*)",
|
||||
"Bash(mypy:*)",
|
||||
"WebSearch",
|
||||
"Bash(nvidia-smi:*)",
|
||||
"WebFetch(domain:cr.weaviate.io)",
|
||||
"Bash(git restore:*)",
|
||||
"Bash(git log:*)",
|
||||
"Bash(done)",
|
||||
"Bash(git remote set-url:*)",
|
||||
"Bash(docker compose:*)",
|
||||
"Bash(pytest:*)",
|
||||
"Bash(git pull:*)"
|
||||
"Bash(timeout 10 tail:*)",
|
||||
"Bash(iconv:*)",
|
||||
"Bash(pip install:*)",
|
||||
"Bash(sqlite3:*)",
|
||||
"Bash(wmic process where \"name=''python.exe''\" get ProcessId,CommandLine /format:list)",
|
||||
"Bash(powershell -Command \"Get-Process python | Select-Object Id, Path, StartTime | Format-List\")",
|
||||
"Bash(powershell:*)",
|
||||
"Bash(timeout 5 tail:*)",
|
||||
"Bash(timeout 8 tail:*)",
|
||||
"Bash(find:*)",
|
||||
"Bash(npm:*)",
|
||||
"Bash(tasklist:*)",
|
||||
"Bash(chmod:*)",
|
||||
"Bash(restart.bat)",
|
||||
"Bash(npm run dev)",
|
||||
"Bash(pkill:*)",
|
||||
"Bash(claude doctor:*)",
|
||||
"Bash(dir /s /b \".claude_settings.json\")",
|
||||
"Bash(dir /s /b \"settings.local.json\")",
|
||||
"Bash(python -m json.tool:*)",
|
||||
"Bash(del NUL)",
|
||||
"Bash(lsof:*)",
|
||||
"Bash(dir:*)",
|
||||
"Bash(docker ps:*)",
|
||||
"Bash(wmic process where \"name=''node.exe'' OR name=''python.exe''\" get ProcessId,CommandLine /format:list)",
|
||||
"Bash(cmd /c \"cd C:\\\\GitHub\\\\Linear_coding_ikario_body\\\\generations\\\\ikario_body && restart.bat\")",
|
||||
"Bash(cmd /c \"C:\\\\GitHub\\\\linear_coding_library_rag\\\\generations\\\\library_rag\\\\diagnose_wsl.bat\")",
|
||||
"Bash(wsl --status:*)",
|
||||
"Bash(wsl --list:*)",
|
||||
"Bash(docker version:*)",
|
||||
"Bash(docker info:*)",
|
||||
"Bash(docker stats:*)",
|
||||
"Bash(timeout:*)",
|
||||
"Bash(docker inspect:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -6,4 +6,8 @@ logs/
|
||||
|
||||
.env
|
||||
venv
|
||||
__pycache__
|
||||
__pycache__
|
||||
|
||||
# Node modules (if any)
|
||||
node_modules/
|
||||
package-lock.json
|
||||
337
GUIDE_NEW_APP.md
337
GUIDE_NEW_APP.md
@@ -1,337 +0,0 @@
|
||||
# Guide : Créer une Nouvelle Application avec le Framework Linear Coding
|
||||
|
||||
Ce guide explique comment utiliser ce framework pour créer une **toute nouvelle application** à partir de zéro.
|
||||
|
||||
## Vue d'ensemble
|
||||
|
||||
Ce framework permet de générer automatiquement une application complète en utilisant :
|
||||
- **Linear** pour la gestion de projet (issues, suivi, commentaires)
|
||||
- **Claude Agent SDK** pour le développement autonome
|
||||
- **Spécifications en format XML** pour décrire l'application
|
||||
|
||||
## Structure du Framework
|
||||
|
||||
### Fichiers génériques (à NE PAS modifier)
|
||||
|
||||
Ces fichiers font partie du framework et sont réutilisables pour toutes les applications :
|
||||
|
||||
```
|
||||
linear-coding-agent/
|
||||
├── autonomous_agent_demo.py # Point d'entrée principal
|
||||
├── agent.py # Logique des sessions d'agent
|
||||
├── client.py # Configuration SDK Claude + MCP
|
||||
├── security.py # Validation et whitelist des commandes
|
||||
├── progress.py # Utilitaires de suivi de progression
|
||||
├── prompts.py # Utilitaires de chargement des prompts
|
||||
├── linear_config.py # Constantes de configuration Linear
|
||||
├── requirements.txt # Dépendances Python
|
||||
└── prompts/
|
||||
├── initializer_prompt.md # Prompt pour la session initiale
|
||||
├── initializer_bis_prompt.md # Prompt pour ajouter des features
|
||||
└── coding_prompt.md # Prompt pour les sessions de codage
|
||||
```
|
||||
|
||||
**⚠️ Ne modifiez PAS ces fichiers** - ils sont génériques et fonctionnent pour toutes les applications.
|
||||
|
||||
### Fichiers spécifiques à votre application (à CRÉER)
|
||||
|
||||
Le seul fichier que vous devez créer est :
|
||||
|
||||
```
|
||||
prompts/
|
||||
└── app_spec.txt # Votre spécification d'application (format XML)
|
||||
```
|
||||
|
||||
## Étapes pour Créer une Nouvelle Application
|
||||
|
||||
### Étape 1 : Créer votre fichier de spécification
|
||||
|
||||
Créez un fichier `prompts/app_spec.txt` qui décrit votre application. Utilisez le format XML suivant :
|
||||
|
||||
```xml
|
||||
<project_specification>
|
||||
<project_name>Nom de Votre Application</project_name>
|
||||
|
||||
<overview>
|
||||
Description complète de votre application. Expliquez ce que vous voulez construire,
|
||||
les objectifs principaux, et les fonctionnalités clés.
|
||||
</overview>
|
||||
|
||||
<technology_stack>
|
||||
<frontend>
|
||||
<framework>React avec Vite</framework>
|
||||
<styling>Tailwind CSS</styling>
|
||||
<state_management>React hooks</state_management>
|
||||
<!-- Ajoutez d'autres technologies frontend -->
|
||||
</frontend>
|
||||
<backend>
|
||||
<runtime>Node.js avec Express</runtime>
|
||||
<database>SQLite</database>
|
||||
<!-- Ajoutez d'autres technologies backend -->
|
||||
</backend>
|
||||
</technology_stack>
|
||||
|
||||
<prerequisites>
|
||||
<environment_setup>
|
||||
- Liste des prérequis (dépendances, clés API, etc.)
|
||||
</environment_setup>
|
||||
</prerequisites>
|
||||
|
||||
<core_features>
|
||||
<feature_1>
|
||||
<title>Titre de la fonctionnalité 1</title>
|
||||
<description>Description détaillée</description>
|
||||
<priority>1</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Étape de test 1
|
||||
2. Étape de test 2
|
||||
</test_steps>
|
||||
</feature_1>
|
||||
|
||||
<feature_2>
|
||||
<!-- Autres fonctionnalités -->
|
||||
</feature_2>
|
||||
</core_features>
|
||||
</project_specification>
|
||||
```
|
||||
|
||||
### Étape 2 : Exemple de structure complète
|
||||
|
||||
Voici un exemple basé sur l'application "Claude Clone" que vous pouvez utiliser comme référence :
|
||||
|
||||
**Structure recommandée de `app_spec.txt` :**
|
||||
|
||||
```xml
|
||||
<project_specification>
|
||||
<project_name>Mon Application</project_name>
|
||||
|
||||
<overview>
|
||||
Description de votre application...
|
||||
</overview>
|
||||
|
||||
<technology_stack>
|
||||
<!-- Stack technique complète -->
|
||||
</technology_stack>
|
||||
|
||||
<prerequisites>
|
||||
<!-- Prérequis -->
|
||||
</prerequisites>
|
||||
|
||||
<core_features>
|
||||
<!-- Liste toutes vos fonctionnalités avec des balises <feature_X> -->
|
||||
</core_features>
|
||||
|
||||
<ui_design>
|
||||
<!-- Spécifications UI si nécessaire -->
|
||||
</ui_design>
|
||||
|
||||
<api_endpoints>
|
||||
<!-- Endpoints API si nécessaire -->
|
||||
</api_endpoints>
|
||||
|
||||
<database_schema>
|
||||
<!-- Schéma de base de données si nécessaire -->
|
||||
</database_schema>
|
||||
</project_specification>
|
||||
```
|
||||
|
||||
### Étape 3 : Lancer l'initialisation
|
||||
|
||||
Une fois votre `app_spec.txt` créé, lancez l'agent initializer :
|
||||
|
||||
```bash
|
||||
python autonomous_agent_demo.py --project-dir ./ma_nouvelle_app
|
||||
```
|
||||
|
||||
L'agent va :
|
||||
1. Lire votre `app_spec.txt`
|
||||
2. Créer un projet Linear
|
||||
3. Créer ~50 issues Linear basées sur votre spécification
|
||||
4. Initialiser la structure du projet
|
||||
|
||||
### Étape 4 : Suivre le développement
|
||||
|
||||
Les agents de codage vont ensuite :
|
||||
- Travailler sur les issues Linear une par une
|
||||
- Implémenter les fonctionnalités
|
||||
- Tester avec Puppeteer
|
||||
- Mettre à jour les issues avec leurs commentaires
|
||||
|
||||
## Exemple : Utiliser l'application "Claude Clone" comme référence
|
||||
|
||||
L'application "Claude Clone" dans `prompts/app_spec.txt` est un excellent exemple à suivre car elle contient :
|
||||
|
||||
### ✅ Éléments à copier/adapter :
|
||||
|
||||
1. **Structure XML** : La structure générale avec `<project_specification>`, `<overview>`, `<technology_stack>`, etc.
|
||||
|
||||
2. **Format des fonctionnalités** : Comment structurer les `<feature_X>` avec :
|
||||
- `<title>`
|
||||
- `<description>`
|
||||
- `<priority>`
|
||||
- `<category>`
|
||||
- `<test_steps>`
|
||||
|
||||
3. **Détails techniques** : Comment décrire :
|
||||
- La stack technologique
|
||||
- Les prérequis
|
||||
- Les endpoints API
|
||||
- Le schéma de base de données
|
||||
- Les spécifications UI
|
||||
|
||||
### ❌ Éléments spécifiques à NE PAS copier :
|
||||
|
||||
1. **Le contenu spécifique** : Les détails sur "Claude API", "artifacts", "conversations", etc. sont spécifiques à cette app
|
||||
2. **Les fonctionnalités métier** : Adaptez-les à votre application
|
||||
|
||||
## Checklist pour une Nouvelle Application
|
||||
|
||||
- [ ] Créer `prompts/app_spec.txt` avec votre spécification
|
||||
- [ ] Définir le `<project_name>` de votre application
|
||||
- [ ] Décrire l'`<overview>` complète
|
||||
- [ ] Spécifier la `<technology_stack>` (frontend + backend)
|
||||
- [ ] Lister les `<prerequisites>` nécessaires
|
||||
- [ ] Définir toutes les `<core_features>` avec des balises `<feature_X>`
|
||||
- [ ] Ajouter des `<test_steps>` pour chaque fonctionnalité
|
||||
- [ ] Lancer : `python autonomous_agent_demo.py --project-dir ./mon_app`
|
||||
- [ ] Vérifier dans Linear que les issues sont créées correctement
|
||||
|
||||
## Conseils pour Rédiger une Bonne Spécification
|
||||
|
||||
### 1. Soyez détaillé mais structuré
|
||||
|
||||
Chaque fonctionnalité doit avoir :
|
||||
- Un titre clair
|
||||
- Une description complète de ce qu'elle fait
|
||||
- Des étapes de test précises
|
||||
- Une priorité (1=urgent, 4=optionnel)
|
||||
|
||||
### 2. Utilisez le format XML cohérent
|
||||
|
||||
```xml
|
||||
<feature_1>
|
||||
<title>Authentification - Connexion utilisateur</title>
|
||||
<description>
|
||||
Implémenter un système d'authentification avec :
|
||||
- Formulaire de connexion (email/mot de passe)
|
||||
- Validation côté client et serveur
|
||||
- Gestion des sessions avec JWT
|
||||
- Page de réinitialisation de mot de passe
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>auth</category>
|
||||
<test_steps>
|
||||
1. Accéder à la page de connexion
|
||||
2. Entrer un email invalide → voir erreur
|
||||
3. Entrer des identifiants valides → redirection vers dashboard
|
||||
4. Vérifier que le token JWT est stocké
|
||||
5. Tester la déconnexion
|
||||
</test_steps>
|
||||
</feature_1>
|
||||
```
|
||||
|
||||
### 3. Organisez par catégories
|
||||
|
||||
Groupez les fonctionnalités par catégorie :
|
||||
- `auth` : Authentification
|
||||
- `frontend` : Interface utilisateur
|
||||
- `backend` : API et logique serveur
|
||||
- `database` : Modèles et migrations
|
||||
- `integration` : Intégrations externes
|
||||
|
||||
### 4. Priorisez les fonctionnalités
|
||||
|
||||
- **Priority 1** : Fonctionnalités critiques (auth, base de données)
|
||||
- **Priority 2** : Fonctionnalités importantes (core features)
|
||||
- **Priority 3** : Fonctionnalités secondaires (améliorations UX)
|
||||
- **Priority 4** : Nice-to-have (polish, optimisations)
|
||||
|
||||
## Exemple Minimal
|
||||
|
||||
Voici un exemple minimal pour démarrer :
|
||||
|
||||
```xml
|
||||
<project_specification>
|
||||
<project_name>Todo App - Gestionnaire de Tâches</project_name>
|
||||
|
||||
<overview>
|
||||
Application web simple pour gérer des listes de tâches.
|
||||
Les utilisateurs peuvent créer, modifier, compléter et supprimer des tâches.
|
||||
</overview>
|
||||
|
||||
<technology_stack>
|
||||
<frontend>
|
||||
<framework>React avec Vite</framework>
|
||||
<styling>Tailwind CSS</styling>
|
||||
</frontend>
|
||||
<backend>
|
||||
<runtime>Node.js avec Express</runtime>
|
||||
<database>SQLite</database>
|
||||
</backend>
|
||||
</technology_stack>
|
||||
|
||||
<core_features>
|
||||
<feature_1>
|
||||
<title>Interface principale - Liste des tâches</title>
|
||||
<description>Afficher une liste de toutes les tâches avec leur statut</description>
|
||||
<priority>1</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Ouvrir l'application
|
||||
2. Vérifier que la liste des tâches s'affiche
|
||||
</test_steps>
|
||||
</feature_1>
|
||||
|
||||
<feature_2>
|
||||
<title>Créer une nouvelle tâche</title>
|
||||
<description>Formulaire pour ajouter une nouvelle tâche à la liste</description>
|
||||
<priority>1</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Cliquer sur "Nouvelle tâche"
|
||||
2. Entrer un titre
|
||||
3. Cliquer sur "Ajouter"
|
||||
4. Vérifier que la tâche apparaît dans la liste
|
||||
</test_steps>
|
||||
</feature_2>
|
||||
</core_features>
|
||||
</project_specification>
|
||||
```
|
||||
|
||||
## Fichiers à Conserver du Framework
|
||||
|
||||
Ces fichiers sont **génériques** et fonctionnent pour toutes les applications :
|
||||
|
||||
- ✅ `autonomous_agent_demo.py` - Point d'entrée
|
||||
- ✅ `agent.py` - Logique des agents
|
||||
- ✅ `client.py` - Configuration Claude SDK
|
||||
- ✅ `prompts.py` - Chargement des prompts
|
||||
- ✅ `progress.py` - Suivi de progression
|
||||
- ✅ `security.py` - Sécurité
|
||||
- ✅ `linear_config.py` - Config Linear
|
||||
- ✅ `prompts/initializer_prompt.md` - Template initializer
|
||||
- ✅ `prompts/initializer_bis_prompt.md` - Template initializer bis
|
||||
- ✅ `prompts/coding_prompt.md` - Template coding agent
|
||||
- ✅ `requirements.txt` - Dépendances Python
|
||||
|
||||
## Fichiers à Créer pour Votre Application
|
||||
|
||||
- ✅ `prompts/app_spec.txt` - **Votre spécification (le seul fichier à créer !)**
|
||||
|
||||
## Résumé
|
||||
|
||||
Pour créer une nouvelle application :
|
||||
|
||||
1. **Copiez la structure XML** de `prompts/app_spec.txt` (exemple Claude Clone)
|
||||
2. **Adaptez le contenu** à votre application
|
||||
3. **Définissez toutes vos fonctionnalités** avec des balises `<feature_X>`
|
||||
4. **Lancez** : `python autonomous_agent_demo.py --project-dir ./mon_app`
|
||||
5. **Suivez le progrès** dans Linear
|
||||
|
||||
Le framework s'occupe du reste ! 🚀
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
106
LIBRARY_RAG_SETUP.md
Normal file
106
LIBRARY_RAG_SETUP.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# Library RAG MCP Setup Guide
|
||||
|
||||
## Quick Setup
|
||||
|
||||
To enable the Library RAG MCP server (document search with Weaviate), you need to configure the following in your `.env` file:
|
||||
|
||||
### Required Environment Variables
|
||||
|
||||
```bash
|
||||
# Library RAG MCP Configuration
|
||||
LIBRARY_RAG_ENABLED=true
|
||||
MCP_LIBRARY_RAG_SERVER_PATH=C:/GitHub/linear_coding_library_rag/generations/library_rag/mcp_server.py
|
||||
LIBRARY_RAG_PYTHON_COMMAND=python
|
||||
LIBRARY_RAG_CONNECTION_TIMEOUT=10000
|
||||
LIBRARY_RAG_AUTO_RECONNECT=true
|
||||
LIBRARY_RAG_MAX_RETRIES=3
|
||||
|
||||
# Weaviate Configuration
|
||||
WEAVIATE_URL=http://localhost:8080
|
||||
|
||||
# REQUIRED: Mistral API Key (for OCR functionality)
|
||||
MISTRAL_API_KEY=your_mistral_api_key_here
|
||||
```
|
||||
|
||||
### Why MISTRAL_API_KEY is Required
|
||||
|
||||
The Library RAG MCP server uses Mistral API for:
|
||||
- **OCR with annotations**: Extracting text from PDF images with layout annotations
|
||||
- **LLM processing**: Metadata extraction, table of contents generation, semantic chunking
|
||||
|
||||
Without this key, the MCP server **will fail to start** and the backend connection will timeout with error: `MCP error -32001: Request timed out`.
|
||||
|
||||
### Getting Your Mistral API Key
|
||||
|
||||
1. Go to https://console.mistral.ai/
|
||||
2. Sign up or log in
|
||||
3. Navigate to API Keys section
|
||||
4. Create a new API key
|
||||
5. Copy the key and add it to your `.env` file
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
**Issue**: Library RAG shows `"connected": false` in status endpoint
|
||||
|
||||
**Solution**:
|
||||
1. Check that `MISTRAL_API_KEY` is uncommented in `.env`
|
||||
2. Verify the key is valid
|
||||
3. Restart the backend server: `npm run dev`
|
||||
|
||||
**Issue**: Connection timeout error
|
||||
|
||||
**Cause**: The MCP server subprocess cannot start without the Mistral API key
|
||||
|
||||
**Fix**: Add the key to `.env` and restart
|
||||
|
||||
### Verifying Connection
|
||||
|
||||
Check the connection status:
|
||||
|
||||
```bash
|
||||
curl http://localhost:5175/api/library-rag/status | python -m json.tool
|
||||
```
|
||||
|
||||
Expected response when connected:
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"data": {
|
||||
"connected": true,
|
||||
"message": "Library RAG MCP server is connected and ready",
|
||||
"tools": [...],
|
||||
"error": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Available Tools
|
||||
|
||||
Once connected, you'll have access to:
|
||||
|
||||
1. **upload_document** - Upload and index PDF documents with full pipeline (OCR, metadata, chunking, Weaviate ingestion)
|
||||
2. **search_library** - Semantic search over document chunks
|
||||
3. **hybrid_search** - Search summaries and high-level content
|
||||
4. **list_collections** - List all indexed documents
|
||||
5. **get_document** - Retrieve document metadata
|
||||
6. **filter_search** - Filter by author, work, or language
|
||||
7. **extract_text_from_image** - OCR from image URLs
|
||||
|
||||
### Related Configuration
|
||||
|
||||
Make sure Weaviate Docker is running:
|
||||
|
||||
```bash
|
||||
cd C:\GitHub\linear_coding_library_rag\generations\library_rag
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
Check Weaviate status:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/meta
|
||||
```
|
||||
|
||||
## Changelog
|
||||
|
||||
**2025-12-26**: Fixed MCP connection issue by adding MISTRAL_API_KEY requirement to documentation. The key must be present in `.env` for the MCP server to start successfully.
|
||||
294
README.md
294
README.md
@@ -64,19 +64,25 @@ pip show claude-code-sdk # Check SDK is installed
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Option 1: Use the Example (Claude Clone)
|
||||
|
||||
```bash
|
||||
# Initialize a new project
|
||||
python autonomous_agent_demo.py --project-dir ./my_project
|
||||
# Initialize the Claude Clone example project
|
||||
python autonomous_agent_demo.py --project-dir ./ikario_body
|
||||
|
||||
# Add new features to an existing project
|
||||
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_theme_customization.txt
|
||||
python autonomous_agent_demo.py --project-dir ./ikario_body --new-spec app_spec_theme_customization.txt
|
||||
```
|
||||
|
||||
For testing with limited iterations:
|
||||
```bash
|
||||
python autonomous_agent_demo.py --project-dir ./my_project --max-iterations 3
|
||||
python autonomous_agent_demo.py --project-dir ./ikario_body --max-iterations 3
|
||||
```
|
||||
|
||||
### Option 2: Create Your Own Application
|
||||
|
||||
See the [Creating a New Application](#creating-a-new-application) section below for detailed instructions on creating a custom application from scratch.
|
||||
|
||||
## How It Works
|
||||
|
||||
### Linear-Centric Workflow
|
||||
@@ -142,7 +148,7 @@ The **Initializer Bis** agent allows you to add new features to an existing proj
|
||||
**Example:**
|
||||
```bash
|
||||
# Add theme customization features to an existing project
|
||||
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_theme_customization.txt
|
||||
python autonomous_agent_demo.py --project-dir ./ikario_body --new-spec app_spec_theme_customization.txt
|
||||
```
|
||||
|
||||
This will create multiple Linear issues (one per `<feature>` tag) that will be worked on by subsequent coding agent sessions.
|
||||
@@ -192,7 +198,6 @@ linear-agent-harness/
|
||||
│ ├── initializer_prompt.md # First session prompt (creates Linear issues)
|
||||
│ ├── initializer_bis_prompt.md # Prompt for adding new features
|
||||
│ └── coding_prompt.md # Continuation session prompt (works issues)
|
||||
├── GUIDE_NEW_APP.md # Guide pour créer une nouvelle application
|
||||
└── requirements.txt # Python dependencies
|
||||
```
|
||||
|
||||
@@ -201,7 +206,7 @@ linear-agent-harness/
|
||||
After running, your project directory will contain:
|
||||
|
||||
```
|
||||
my_project/
|
||||
ikario_body/
|
||||
├── .linear_project.json # Linear project state (marker file)
|
||||
├── app_spec.txt # Copied specification
|
||||
├── app_spec_theme_customization.txt # New spec file (if using --new-spec)
|
||||
@@ -241,35 +246,268 @@ The initializer agent will create:
|
||||
|
||||
All subsequent coding agents will work from this Linear project.
|
||||
|
||||
## Creating a New Application
|
||||
|
||||
This framework is designed to be **generic and reusable** for any web application. Here's how to create your own application from scratch.
|
||||
|
||||
### Understanding the Framework Structure
|
||||
|
||||
#### Generic Framework Files (DO NOT MODIFY)
|
||||
|
||||
These files work for all applications and should remain unchanged:
|
||||
|
||||
```
|
||||
linear-coding-agent/
|
||||
├── autonomous_agent_demo.py # Main entry point
|
||||
├── agent.py # Agent session logic
|
||||
├── client.py # Claude SDK + MCP client configuration
|
||||
├── security.py # Bash command allowlist and validation
|
||||
├── progress.py # Progress tracking utilities
|
||||
├── prompts.py # Prompt loading utilities
|
||||
├── linear_config.py # Linear configuration constants
|
||||
├── requirements.txt # Python dependencies
|
||||
└── prompts/
|
||||
├── initializer_prompt.md # First session prompt template
|
||||
├── initializer_bis_prompt.md # New features prompt template
|
||||
└── coding_prompt.md # Continuation session prompt template
|
||||
```
|
||||
|
||||
#### Application-Specific Files (CREATE THESE)
|
||||
|
||||
The **only file you need to create** is your application specification:
|
||||
|
||||
```
|
||||
prompts/
|
||||
└── app_spec.txt # Your application specification (XML format)
|
||||
```
|
||||
|
||||
### Step-by-Step Guide
|
||||
|
||||
#### Step 1: Create Your Specification File
|
||||
|
||||
Create `prompts/app_spec.txt` using this XML structure:
|
||||
|
||||
```xml
|
||||
<project_specification>
|
||||
<project_name>Your Application Name</project_name>
|
||||
|
||||
<overview>
|
||||
Complete description of your application. Explain what you want to build,
|
||||
main objectives, and key features.
|
||||
</overview>
|
||||
|
||||
<technology_stack>
|
||||
<frontend>
|
||||
<framework>React with Vite</framework>
|
||||
<styling>Tailwind CSS</styling>
|
||||
<state_management>React hooks</state_management>
|
||||
</frontend>
|
||||
<backend>
|
||||
<runtime>Node.js with Express</runtime>
|
||||
<database>SQLite</database>
|
||||
</backend>
|
||||
</technology_stack>
|
||||
|
||||
<prerequisites>
|
||||
<environment_setup>
|
||||
- List of prerequisites (dependencies, API keys, etc.)
|
||||
</environment_setup>
|
||||
</prerequisites>
|
||||
|
||||
<core_features>
|
||||
<feature_1>
|
||||
<title>Feature 1 Title</title>
|
||||
<description>Detailed description</description>
|
||||
<priority>1</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Test step 1
|
||||
2. Test step 2
|
||||
</test_steps>
|
||||
</feature_1>
|
||||
|
||||
<feature_2>
|
||||
<!-- More features -->
|
||||
</feature_2>
|
||||
</core_features>
|
||||
</project_specification>
|
||||
```
|
||||
|
||||
#### Step 2: Define Your Features
|
||||
|
||||
Each feature should have:
|
||||
|
||||
- **Title**: Clear, descriptive title
|
||||
- **Description**: Complete explanation of what it does
|
||||
- **Priority**: 1 (urgent) to 4 (optional)
|
||||
- **Category**: `frontend`, `backend`, `database`, `auth`, `integration`, etc.
|
||||
- **Test Steps**: Precise verification steps
|
||||
|
||||
Example feature:
|
||||
|
||||
```xml
|
||||
<feature_1>
|
||||
<title>User Authentication - Login Flow</title>
|
||||
<description>
|
||||
Implement authentication system with:
|
||||
- Login form (email/password)
|
||||
- Client and server-side validation
|
||||
- JWT session management
|
||||
- Password reset page
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>auth</category>
|
||||
<test_steps>
|
||||
1. Access login page
|
||||
2. Enter invalid email → see error
|
||||
3. Enter valid credentials → redirect to dashboard
|
||||
4. Verify JWT token is stored
|
||||
5. Test logout functionality
|
||||
</test_steps>
|
||||
</feature_1>
|
||||
```
|
||||
|
||||
#### Step 3: Launch Initialization
|
||||
|
||||
Once your `app_spec.txt` is ready:
|
||||
|
||||
```bash
|
||||
python autonomous_agent_demo.py --project-dir ./my_new_app
|
||||
```
|
||||
|
||||
The initializer agent will:
|
||||
1. Read your `app_spec.txt`
|
||||
2. Create a Linear project
|
||||
3. Create ~50 Linear issues based on your spec
|
||||
4. Initialize project structure, `init.sh`, and git
|
||||
|
||||
#### Step 4: Monitor Development
|
||||
|
||||
Coding agents will then:
|
||||
- Work on Linear issues one by one
|
||||
- Implement features
|
||||
- Test with Puppeteer browser automation
|
||||
- Update issues with implementation comments
|
||||
- Mark issues as complete
|
||||
|
||||
### Minimal Example
|
||||
|
||||
Here's a minimal Todo App example to get started:
|
||||
|
||||
```xml
|
||||
<project_specification>
|
||||
<project_name>Todo App - Task Manager</project_name>
|
||||
|
||||
<overview>
|
||||
Simple web application for managing task lists.
|
||||
Users can create, edit, complete, and delete tasks.
|
||||
</overview>
|
||||
|
||||
<technology_stack>
|
||||
<frontend>
|
||||
<framework>React with Vite</framework>
|
||||
<styling>Tailwind CSS</styling>
|
||||
</frontend>
|
||||
<backend>
|
||||
<runtime>Node.js with Express</runtime>
|
||||
<database>SQLite</database>
|
||||
</backend>
|
||||
</technology_stack>
|
||||
|
||||
<core_features>
|
||||
<feature_1>
|
||||
<title>Main Interface - Task List</title>
|
||||
<description>Display a list of all tasks with their status</description>
|
||||
<priority>1</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Open application
|
||||
2. Verify task list displays
|
||||
</test_steps>
|
||||
</feature_1>
|
||||
|
||||
<feature_2>
|
||||
<title>Create New Task</title>
|
||||
<description>Form to add a new task to the list</description>
|
||||
<priority>1</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Click "New Task"
|
||||
2. Enter a title
|
||||
3. Click "Add"
|
||||
4. Verify task appears in list
|
||||
</test_steps>
|
||||
</feature_2>
|
||||
</core_features>
|
||||
</project_specification>
|
||||
```
|
||||
|
||||
### Best Practices
|
||||
|
||||
#### 1. Be Detailed but Structured
|
||||
|
||||
Each feature must have:
|
||||
- Clear title
|
||||
- Complete description of functionality
|
||||
- Precise test steps
|
||||
- Priority (1=urgent, 4=optional)
|
||||
|
||||
#### 2. Use Consistent XML Format
|
||||
|
||||
Follow the structure shown above for all features using `<feature_X>` tags.
|
||||
|
||||
#### 3. Organize by Categories
|
||||
|
||||
Group features by category:
|
||||
- `auth`: Authentication
|
||||
- `frontend`: User interface
|
||||
- `backend`: API and server logic
|
||||
- `database`: Models and migrations
|
||||
- `integration`: External integrations
|
||||
|
||||
#### 4. Prioritize Features
|
||||
|
||||
- **Priority 1**: Critical features (auth, database)
|
||||
- **Priority 2**: Important features (core functionality)
|
||||
- **Priority 3**: Secondary features (UX improvements)
|
||||
- **Priority 4**: Nice-to-have (polish, optimizations)
|
||||
|
||||
### Using the Claude Clone as Reference
|
||||
|
||||
The Claude Clone example in `prompts/app_spec.txt` is excellent reference material:
|
||||
|
||||
#### ✅ Elements to Copy/Adapt:
|
||||
|
||||
1. **XML Structure**: Overall structure with `<project_specification>`, `<overview>`, `<technology_stack>`, etc.
|
||||
2. **Feature Format**: How to structure `<feature_X>` tags with all required fields
|
||||
3. **Technical Details**: How to describe technology stack, prerequisites, API endpoints, database schema, UI specs
|
||||
|
||||
#### ❌ Elements NOT to Copy:
|
||||
|
||||
1. **Specific Content**: Details about "Claude API", "artifacts", "conversations" are app-specific
|
||||
2. **Business Features**: Adapt features to your application's needs
|
||||
|
||||
### Checklist for New Application
|
||||
|
||||
- [ ] Create `prompts/app_spec.txt` with your specification
|
||||
- [ ] Define `<project_name>` for your application
|
||||
- [ ] Write complete `<overview>`
|
||||
- [ ] Specify `<technology_stack>` (frontend + backend)
|
||||
- [ ] List all `<prerequisites>`
|
||||
- [ ] Define all `<core_features>` with `<feature_X>` tags
|
||||
- [ ] Add `<test_steps>` for each feature
|
||||
- [ ] Launch: `python autonomous_agent_demo.py --project-dir ./my_app`
|
||||
- [ ] Verify in Linear that issues are created correctly
|
||||
|
||||
## Customization
|
||||
|
||||
### Creating a New Application from Scratch
|
||||
|
||||
To create a **completely new application** (not based on the Claude Clone example):
|
||||
|
||||
1. **Read the guide**: See [GUIDE_NEW_APP.md](GUIDE_NEW_APP.md) for detailed instructions
|
||||
2. **Use the template**: Copy `prompts/app_spec_template.txt` as a starting point
|
||||
3. **Reference the example**: Use `prompts/app_spec.txt` (Claude Clone) as a reference for structure and detail level
|
||||
4. **Create your spec**: Write your `prompts/app_spec.txt` with your application specification
|
||||
5. **Launch**: Run `python autonomous_agent_demo.py --project-dir ./my_new_app`
|
||||
|
||||
**Key points:**
|
||||
- Keep the framework files unchanged (they're generic and reusable)
|
||||
- Only create/modify `prompts/app_spec.txt` for your new application
|
||||
- Use the XML structure from the Claude Clone example as a template
|
||||
- Define features with `<feature_X>` tags - each will become a Linear issue
|
||||
|
||||
### Changing the Application
|
||||
|
||||
Edit `prompts/app_spec.txt` to specify a different application to build.
|
||||
|
||||
### Adding New Features to Existing Projects
|
||||
|
||||
1. Create a new specification file in `prompts/` directory (e.g., `app_spec_new_feature.txt`)
|
||||
2. Format it with `<feature>` tags following the same structure as `app_spec.txt`
|
||||
3. Run with `--new-spec` flag:
|
||||
```bash
|
||||
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_new_feature.txt
|
||||
python autonomous_agent_demo.py --project-dir ./ikario_body --new-spec app_spec_new_feature.txt
|
||||
```
|
||||
4. The Initializer Bis agent will create new Linear issues for each feature in the spec file
|
||||
|
||||
|
||||
@@ -15,8 +15,15 @@ Example Usage:
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Fix Windows encoding issues with emojis and Unicode characters
|
||||
if sys.platform == 'win32':
|
||||
import io
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from agent import run_autonomous_agent
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
services:
|
||||
my_project_frontend:
|
||||
ikario_body_frontend:
|
||||
image: node:20
|
||||
working_dir: /app
|
||||
volumes:
|
||||
- ./generations/my_project:/app
|
||||
- ./generations/ikario_body:/app
|
||||
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
||||
- /app/node_modules
|
||||
command: ["sh", "-c", "npm install && npm run dev -- --host 0.0.0.0 --port 3000"]
|
||||
@@ -11,12 +11,13 @@ services:
|
||||
- "4300:3000"
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
- DOCKER_ENV=true
|
||||
|
||||
my_project_server:
|
||||
ikario_body_server:
|
||||
image: node:20
|
||||
working_dir: /app/server
|
||||
volumes:
|
||||
- ./generations/my_project:/app
|
||||
- ./generations/ikario_body:/app
|
||||
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
||||
- /app/server/node_modules
|
||||
command: ["sh", "-c", "npm install && npm start"]
|
||||
@@ -25,5 +26,5 @@ services:
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
depends_on:
|
||||
- my_project_frontend
|
||||
- ikario_body_frontend
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
"""
|
||||
Dockerization helper for my_project
|
||||
Dockerization helper for ikario_body
|
||||
===================================
|
||||
|
||||
Ce script crée les fichiers Docker nécessaires pour exécuter l'application
|
||||
`generations/my_project` (frontend + serveur + base SQLite) dans Docker,
|
||||
`generations/ikario_body` (frontend + serveur + base SQLite) dans Docker,
|
||||
SANS modifier aucun fichier existant.
|
||||
|
||||
Il génère un fichier de composition :
|
||||
- docker-compose.my_project.yml (à la racine du repo)
|
||||
- docker-compose.ikario_body.yml (à la racine du repo)
|
||||
|
||||
Ce fichier utilise l'image officielle Node et monte le code existant
|
||||
ainsi que la base SQLite dans les conteneurs (mode développement).
|
||||
|
||||
Utilisation :
|
||||
1) Depuis la racine du repo :
|
||||
python dockerize_my_project.py
|
||||
python dockerize_ikario_body.py
|
||||
2) Puis pour lancer l'appli dans Docker :
|
||||
docker compose -f docker-compose.my_project.yml up
|
||||
docker compose -f docker-compose.ikario_body.yml up
|
||||
ou, selon votre installation :
|
||||
docker-compose -f docker-compose.my_project.yml up
|
||||
docker-compose -f docker-compose.ikario_body.yml up
|
||||
|
||||
- Frontend accessible sur: http://localhost:3000
|
||||
- API backend (server) sur : http://localhost:3001
|
||||
@@ -28,13 +28,13 @@ from pathlib import Path
|
||||
|
||||
|
||||
def generate_docker_compose(root: Path) -> None:
|
||||
"""Génère le fichier docker-compose.my_project.yml sans toucher au code existant."""
|
||||
project_dir = root / "generations" / "my_project"
|
||||
"""Génère le fichier docker-compose.ikario_body.yml sans toucher au code existant."""
|
||||
project_dir = root / "generations" / "ikario_body"
|
||||
|
||||
if not project_dir.exists():
|
||||
raise SystemExit(f"Project directory not found: {project_dir}")
|
||||
|
||||
compose_path = root / "docker-compose.my_project.yml"
|
||||
compose_path = root / "docker-compose.ikario_body.yml"
|
||||
|
||||
# On utilise les scripts npm déjà définis :
|
||||
# - frontend: npm run dev (Vite) en écoutant sur 0.0.0.0:3000 (dans le conteneur)
|
||||
@@ -45,14 +45,14 @@ def generate_docker_compose(root: Path) -> None:
|
||||
# - frontend : host 4300 -> container 3000
|
||||
# - backend : host 4301 -> container 3001
|
||||
#
|
||||
# Le volume ./generations/my_project est monté dans /app,
|
||||
# Le volume ./generations/ikario_body est monté dans /app,
|
||||
# ce qui inclut aussi la base SQLite dans server/data/claude-clone.db.
|
||||
compose_content = f"""services:
|
||||
my_project_frontend:
|
||||
ikario_body_frontend:
|
||||
image: node:20
|
||||
working_dir: /app
|
||||
volumes:
|
||||
- ./generations/my_project:/app
|
||||
- ./generations/ikario_body:/app
|
||||
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
||||
- /app/node_modules
|
||||
command: ["sh", "-c", "npm install && npm run dev -- --host 0.0.0.0 --port 3000"]
|
||||
@@ -61,11 +61,11 @@ def generate_docker_compose(root: Path) -> None:
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
|
||||
my_project_server:
|
||||
ikario_body_server:
|
||||
image: node:20
|
||||
working_dir: /app/server
|
||||
volumes:
|
||||
- ./generations/my_project:/app
|
||||
- ./generations/ikario_body:/app
|
||||
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
||||
- /app/server/node_modules
|
||||
command: ["sh", "-c", "npm install && npm start"]
|
||||
@@ -74,7 +74,7 @@ def generate_docker_compose(root: Path) -> None:
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
depends_on:
|
||||
- my_project_frontend
|
||||
- ikario_body_frontend
|
||||
|
||||
"""
|
||||
|
||||
26021
ikario_memories_export.md
Normal file
26021
ikario_memories_export.md
Normal file
File diff suppressed because it is too large
Load Diff
2510
navette.txt
Normal file
2510
navette.txt
Normal file
File diff suppressed because it is too large
Load Diff
5
package.json
Normal file
5
package.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"puppeteer": "^24.33.1"
|
||||
}
|
||||
}
|
||||
151
patch_stats.py
Normal file
151
patch_stats.py
Normal file
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Patch getMemoryStats to count thoughts and conversations separately
|
||||
"""
|
||||
|
||||
file_path = "C:/GitHub/Linear_coding/generations/ikario_body/server/services/memoryService.js"
|
||||
|
||||
# Lire le fichier
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Trouver la ligne qui contient "export async function getMemoryStats"
|
||||
start_line = None
|
||||
for i, line in enumerate(lines):
|
||||
if 'export async function getMemoryStats()' in line:
|
||||
start_line = i
|
||||
break
|
||||
|
||||
if start_line is None:
|
||||
print("ERROR: Could not find getMemoryStats function")
|
||||
exit(1)
|
||||
|
||||
# Trouver la fin de la fonction (ligne qui contient uniquement '}')
|
||||
end_line = None
|
||||
brace_count = 0
|
||||
for i in range(start_line, len(lines)):
|
||||
if '{' in lines[i]:
|
||||
brace_count += lines[i].count('{')
|
||||
if '}' in lines[i]:
|
||||
brace_count -= lines[i].count('}')
|
||||
if brace_count == 0 and i > start_line:
|
||||
end_line = i
|
||||
break
|
||||
|
||||
if end_line is None:
|
||||
print("ERROR: Could not find end of getMemoryStats function")
|
||||
exit(1)
|
||||
|
||||
print(f"Found getMemoryStats from line {start_line+1} to {end_line+1}")
|
||||
|
||||
# Nouvelle fonction
|
||||
new_function = '''export async function getMemoryStats() {
|
||||
const status = getMCPStatus();
|
||||
|
||||
if (!isMCPConnected()) {
|
||||
return {
|
||||
connected: false,
|
||||
enabled: status.enabled,
|
||||
configured: status.configured,
|
||||
total_memories: 0,
|
||||
thoughts_count: 0,
|
||||
conversations_count: 0,
|
||||
last_save: null,
|
||||
error: status.error,
|
||||
serverPath: status.serverPath,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
// Count thoughts using search_thoughts with broad query
|
||||
let thoughtsCount = 0;
|
||||
try {
|
||||
const thoughtsResult = await callMCPTool('search_thoughts', {
|
||||
query: 'a', // Simple query that will match most thoughts
|
||||
n_results: 100
|
||||
});
|
||||
|
||||
// Parse the text response to count thoughts
|
||||
const thoughtsText = thoughtsResult.content?.[0]?.text || '';
|
||||
const thoughtMatches = thoughtsText.match(/\\[Pertinence:/g);
|
||||
thoughtsCount = thoughtMatches ? thoughtMatches.length : 0;
|
||||
} catch (err) {
|
||||
console.log('[getMemoryStats] Could not count thoughts:', err.message);
|
||||
}
|
||||
|
||||
// Count conversations using search_conversations with search_level="full"
|
||||
let conversationsCount = 0;
|
||||
try {
|
||||
const convsResult = await callMCPTool('search_conversations', {
|
||||
query: 'a', // Simple query that will match most conversations
|
||||
n_results: 100,
|
||||
search_level: 'full'
|
||||
});
|
||||
|
||||
// Parse the text response to count conversations
|
||||
const convsText = convsResult.content?.[0]?.text || '';
|
||||
const convMatches = convsText.match(/\\[Pertinence:/g);
|
||||
conversationsCount = convMatches ? convMatches.length : 0;
|
||||
} catch (err) {
|
||||
console.log('[getMemoryStats] Could not count conversations:', err.message);
|
||||
}
|
||||
|
||||
const totalMemories = thoughtsCount + conversationsCount;
|
||||
|
||||
return {
|
||||
connected: true,
|
||||
enabled: status.enabled,
|
||||
configured: status.configured,
|
||||
total_memories: totalMemories,
|
||||
thoughts_count: thoughtsCount,
|
||||
conversations_count: conversationsCount,
|
||||
last_save: new Date().toISOString(), // Would need to track this separately
|
||||
error: null,
|
||||
serverPath: status.serverPath,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
connected: true,
|
||||
enabled: status.enabled,
|
||||
configured: status.configured,
|
||||
total_memories: 0,
|
||||
thoughts_count: 0,
|
||||
conversations_count: 0,
|
||||
last_save: null,
|
||||
error: error.message,
|
||||
serverPath: status.serverPath,
|
||||
};
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
# Conserver le commentaire JSDoc avant la fonction
|
||||
comment_start = start_line - 1
|
||||
while comment_start >= 0 and (lines[comment_start].strip().startswith('*') or lines[comment_start].strip().startswith('/**') or lines[comment_start].strip() == ''):
|
||||
comment_start -= 1
|
||||
comment_start += 1
|
||||
|
||||
# Construire le nouveau fichier
|
||||
new_lines = lines[:comment_start]
|
||||
|
||||
# Ajouter le nouveau commentaire JSDoc
|
||||
new_lines.append('/**\n')
|
||||
new_lines.append(' * Get basic statistics about the memory store\n')
|
||||
new_lines.append(' * Counts thoughts and conversations separately using dedicated search tools\n')
|
||||
new_lines.append(' *\n')
|
||||
new_lines.append(' * @returns {Promise<Object>} Statistics about the memory store\n')
|
||||
new_lines.append(' */\n')
|
||||
|
||||
# Ajouter la nouvelle fonction
|
||||
new_lines.append(new_function)
|
||||
new_lines.append('\n')
|
||||
|
||||
# Ajouter le reste du fichier
|
||||
new_lines.extend(lines[end_line+1:])
|
||||
|
||||
# Écrire le fichier
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.writelines(new_lines)
|
||||
|
||||
print(f"✓ Successfully patched getMemoryStats (lines {comment_start+1} to {end_line+1})")
|
||||
print(f"✓ File saved: {file_path}")
|
||||
275
project_progress.md
Normal file
275
project_progress.md
Normal file
@@ -0,0 +1,275 @@
|
||||
# Linear Coding Project - Progress Tracking
|
||||
|
||||
**Last Updated**: 2025-12-18 16:45 CET
|
||||
**Project**: Claude.ai Clone with Extended Thinking
|
||||
**Linear Team**: TEAMPHI
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Current Sprint: Extended Thinking Implementation (TEAMPHI-190-204)
|
||||
|
||||
### 📊 Overall Progress: 80% Complete
|
||||
|
||||
| Issue | Feature | Status | Notes |
|
||||
|-------|---------|--------|-------|
|
||||
| TEAMPHI-190 | Extended Thinking Spec | ✅ Done | Spec document created |
|
||||
| TEAMPHI-191 | Database Schema | ✅ Done | Migrations applied |
|
||||
| TEAMPHI-192 | Backend API | ✅ Done | Routes updated |
|
||||
| TEAMPHI-193 | Frontend State | ✅ Done | State management complete |
|
||||
| TEAMPHI-194 | ThinkingBlock Component | ✅ Done | Tested and validated |
|
||||
| TEAMPHI-195 | ThinkingBlock Integration | ✅ Done | Fully functional |
|
||||
| TEAMPHI-196 | Settings Panel | ✅ Done | Tested with Puppeteer |
|
||||
| TEAMPHI-197 | Budget Slider | ✅ Done | Tested with Puppeteer |
|
||||
| TEAMPHI-198 | Thinking Badge | ✅ Done | Tested with Puppeteer |
|
||||
| TEAMPHI-199 | Streaming Handler | ✅ Done | Fixed data structure mapping |
|
||||
| TEAMPHI-200 | Tool Use Preservation | 🔄 Pending | Not started |
|
||||
| TEAMPHI-201 | Token Tracking | 🔄 Pending | Not started |
|
||||
| TEAMPHI-202 | Usage Stats | 🔄 Pending | Not started |
|
||||
| TEAMPHI-203 | Error Handling | 🔄 Pending | Not started |
|
||||
| TEAMPHI-204 | Documentation | 🔄 Pending | Not started |
|
||||
|
||||
---
|
||||
|
||||
## ✅ CRITICAL BUG RESOLVED (2025-12-18)
|
||||
|
||||
### Bug: max_tokens vs thinking_budget_tokens Conflict
|
||||
|
||||
**Status**: ✅ **FIXED**
|
||||
|
||||
**Solution Implemented:**
|
||||
```javascript
|
||||
// App.jsx line 4747-4749
|
||||
const [maxTokens, setMaxTokens] = useState(8192)
|
||||
const [enableThinking, setEnableThinking] = useState(false)
|
||||
const [thinkingBudgetTokens, setThinkingBudgetTokens] = useState(6144) // 6K tokens
|
||||
|
||||
// server/db/index.js line 243
|
||||
db.exec(`ALTER TABLE conversations ADD COLUMN thinking_budget_tokens INTEGER DEFAULT 6144;`)
|
||||
```
|
||||
|
||||
**Result**: 8192 > 6144 ✅ **API Constraint Satisfied**
|
||||
|
||||
**Additional Fixes:**
|
||||
- Frontend now correctly reads `data.thinking.content` and `data.thinking.signature` from SSE events
|
||||
- Database updated: all existing conversations set to 4096, new conversations default to 6144
|
||||
- Extended Thinking disabled by default (users must enable manually)
|
||||
|
||||
---
|
||||
|
||||
## ✅ Completed Features
|
||||
|
||||
### Backend Implementation
|
||||
|
||||
**Database Schema** (`server/db/index.js`):
|
||||
- ✅ `conversations.enable_thinking` (INTEGER, default 0)
|
||||
- ✅ `conversations.thinking_budget_tokens` (INTEGER, default 6144)
|
||||
- ✅ `messages.thinking_content` (TEXT)
|
||||
- ✅ `messages.thinking_signature` (TEXT)
|
||||
|
||||
**API Endpoints** (`server/routes/conversations.js`):
|
||||
- ✅ PUT `/api/conversations/:id` accepts `enableThinking` and `thinkingBudgetTokens`
|
||||
- ✅ Validation: budget range 1024-200000 tokens
|
||||
|
||||
**Message Streaming** (`server/routes/messages.js`):
|
||||
- ✅ Read `enable_thinking` from conversations table (line 321)
|
||||
- ✅ Build thinking parameters for Claude API (lines 365-374)
|
||||
- ✅ Handle `thinking_delta` events during streaming (lines 416-423)
|
||||
- ✅ Handle `signature_delta` events (lines 425-427)
|
||||
- ✅ Save `thinking_content` and `thinking_signature` to DB (lines 509-511)
|
||||
- ✅ Return thinking data in SSE `done` event (lines 559-563)
|
||||
|
||||
### Frontend Implementation
|
||||
|
||||
**ThinkingBlock Component** (`src/components/ThinkingBlock.jsx`):
|
||||
- ✅ Collapsible UI with brain icon
|
||||
- ✅ Header shows "Thinking..." during streaming
|
||||
- ✅ Header shows "Claude's reasoning" after completion
|
||||
- ✅ Token count estimate display
|
||||
- ✅ Animated dots during streaming
|
||||
- ✅ Expand/collapse functionality
|
||||
- ✅ Monospace font for thinking content
|
||||
- ✅ Blue color scheme (border-blue-200, bg-blue-50)
|
||||
- ✅ Signature verification indicator
|
||||
|
||||
**Settings Panel** (`src/App.jsx` lines 4236-4316):
|
||||
- ✅ Extended Thinking checkbox with brain icon
|
||||
- ✅ Label and tooltip
|
||||
- ✅ Conditional budget slider (visible when enabled)
|
||||
- ✅ Budget range: 1K-32K tokens
|
||||
- ✅ Visual indicator (shows "5K", "10K", etc.)
|
||||
|
||||
**State Management** (`src/App.jsx`):
|
||||
- ✅ `enableThinking` state (line 4748)
|
||||
- ✅ `thinkingBudgetTokens` state (line 4749)
|
||||
- ✅ `streamingThinkingContent` state (line 4742)
|
||||
- ✅ `handleEnableThinkingChange` with DB persistence (lines 5210-5235)
|
||||
- ✅ `handleThinkingBudgetChange` with DB persistence (lines 5237-5251)
|
||||
- ✅ Load settings from conversation on select (lines 4835-4841)
|
||||
|
||||
**UI Integration**:
|
||||
- ✅ ThinkingBlock in Message component (line 3174)
|
||||
- ✅ All props passed to ChatArea (line 5695)
|
||||
- ✅ Thinking badge in sidebar (lines 2392-2399)
|
||||
|
||||
### Testing
|
||||
|
||||
**Automated Tests Created**:
|
||||
- ✅ `test_extended_thinking.js` - Settings panel tests (PASSED)
|
||||
- ✅ `test_thinking_badge.js` - Badge visibility tests (PASSED)
|
||||
- ✅ `test_thinking_badge_simple.js` - Simplified badge test (PASSED)
|
||||
|
||||
**Manual Testing (2025-12-18)**:
|
||||
- ✅ Settings panel visible and functional
|
||||
- ✅ Budget slider appears when Extended Thinking enabled
|
||||
- ✅ Badge appears in sidebar for conversations with Extended Thinking
|
||||
- ✅ ThinkingBlock displays correctly with blue UI
|
||||
- ✅ Thinking content persists after streaming
|
||||
- ✅ Expand/collapse functionality works
|
||||
- ✅ Signature verification indicator shows
|
||||
- ✅ Real API test successful with Whitehead philosophy question
|
||||
|
||||
**Test Configuration Used**:
|
||||
- max_tokens: 8192
|
||||
- thinking_budget_tokens: 6000 (user-tested, now default 6144)
|
||||
- Extended Thinking: Manually enabled via checkbox
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Known Bugs and Issues
|
||||
|
||||
### 1. ✅ FIXED: max_tokens vs budget conflict
|
||||
**Status**: ✅ Fixed on 2025-12-18
|
||||
**Solution**: Set max_tokens=8192, thinking_budget_tokens=6144
|
||||
**Location**: `src/App.jsx` lines 4747-4749, `server/db/index.js` line 243
|
||||
|
||||
### 2. ✅ FIXED: Frontend SSE data mapping
|
||||
**Status**: ✅ Fixed on 2025-12-18
|
||||
**Solution**: Changed from `data.thinking_signature` to `data.thinking.signature`
|
||||
**Location**: `src/App.jsx` line 5566
|
||||
|
||||
### 3. ✅ FIXED: streamingThinkingContent not passed to ChatArea
|
||||
**Status**: Fixed in commit 91ea3ec
|
||||
**Issue**: ReferenceError caused interface crash
|
||||
**Fix**: Added `streamingThinkingContent` to ChatArea props
|
||||
|
||||
### 4. ✅ FIXED: Vite proxy wrong port
|
||||
**Status**: Fixed in commit 0a4072d
|
||||
**Issue**: Frontend couldn't connect to backend
|
||||
**Fix**: Changed proxy from localhost:3004 to localhost:3001
|
||||
|
||||
### 5. ✅ FIXED: Extended Thinking props not passed to ChatArea
|
||||
**Status**: Fixed in commit d447e69
|
||||
**Issue**: enableThinking undefined in ChatArea
|
||||
**Fix**: Added props to ChatArea signature and render call
|
||||
|
||||
---
|
||||
|
||||
## 📝 Commits History
|
||||
|
||||
| Commit | Message | Files Changed |
|
||||
|--------|---------|---------------|
|
||||
| 91ea3ec | Fix critical bug: pass streamingThinkingContent to ChatArea | src/App.jsx |
|
||||
| 8864bdc | Add Thinking badge to conversation list | src/App.jsx |
|
||||
| 0a4072d | Fix Vite proxy configuration | vite.config.js |
|
||||
| d447e69 | Fix Extended Thinking props not passed to ChatArea | src/App.jsx |
|
||||
| 1091f65 | Add Extended Thinking settings panel and budget slider | src/App.jsx |
|
||||
| 530e54b | Integrate ThinkingBlock into message display | src/App.jsx, src/components/ThinkingBlock.jsx |
|
||||
|
||||
---
|
||||
|
||||
## 🔄 Database State
|
||||
|
||||
**Extended Thinking Status** (as of 2025-12-18 16:45):
|
||||
- ✅ 10+ conversations with `enable_thinking = 1`, `thinking_budget_tokens = 4096`
|
||||
- ✅ New conversations default to `enable_thinking = 0`, `thinking_budget_tokens = 6144`
|
||||
- ✅ Messages with thinking_content successfully saved (tested with Whitehead question)
|
||||
- ✅ Thinking content persists and displays correctly on reload
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Next Steps
|
||||
|
||||
### ✅ Completed Actions (2025-12-18)
|
||||
|
||||
1. ✅ **FIXED CRITICAL BUG** - max_tokens vs budget conflict resolved
|
||||
2. ✅ **TESTED Extended Thinking End-to-End** - All tests passed
|
||||
3. ✅ **VALIDATED and MARKED DONE** - TEAMPHI-194, 195, 199 completed
|
||||
4. ✅ **Fixed UX Issues** - Extended Thinking disabled by default, optimal defaults set
|
||||
|
||||
### Remaining Work (20% of Sprint)
|
||||
|
||||
**Priority: Medium**
|
||||
- TEAMPHI-200: Tool use preservation during Extended Thinking
|
||||
- TEAMPHI-201: Token tracking for thinking vs output
|
||||
- TEAMPHI-202: Usage stats display
|
||||
- TEAMPHI-203: Error handling improvements
|
||||
- TEAMPHI-204: User documentation
|
||||
|
||||
**Notes:**
|
||||
- Core Extended Thinking feature is **fully functional**
|
||||
- Remaining issues are enhancements and polish
|
||||
- Can be completed incrementally without blocking usage
|
||||
|
||||
---
|
||||
|
||||
## 📚 Key Files Reference
|
||||
|
||||
### Backend
|
||||
- `server/routes/messages.js` - Main Extended Thinking logic (lines 320-574)
|
||||
- `server/routes/conversations.js` - Settings update endpoints (lines 143-199)
|
||||
- `server/db/index.js` - Database migrations (lines 234-258)
|
||||
|
||||
### Frontend
|
||||
- `src/App.jsx` - Main application file
|
||||
- State: lines 4742, 4748-4749
|
||||
- Handlers: lines 5210-5251
|
||||
- Settings UI: lines 4236-4316
|
||||
- Message integration: line 3174
|
||||
- ChatArea props: line 5695
|
||||
- `src/components/ThinkingBlock.jsx` - ThinkingBlock component (complete file)
|
||||
|
||||
### Tests
|
||||
- `test_extended_thinking.js` - Settings panel tests
|
||||
- `test_thinking_badge.js` - Badge tests
|
||||
- `test_thinkingblock_real.js` - Real API test (blocked)
|
||||
|
||||
### Utilities
|
||||
- `activate_thinking.py` - Script to enable Extended Thinking in DB
|
||||
|
||||
---
|
||||
|
||||
## 🎓 Lessons Learned
|
||||
|
||||
### Protocol Violations Caught
|
||||
1. **Not testing before moving on** - User reminded: "toujours tester chaque feature avant de passer à la suivante"
|
||||
2. **Fixed by**: Creating tests for each feature before marking Done
|
||||
|
||||
### Technical Challenges
|
||||
1. **Puppeteer interaction issues** - Browser rendering problems in headless mode
|
||||
2. **API parameter conflicts** - max_tokens vs thinking_budget validation
|
||||
3. **State propagation** - Props not passed through component hierarchy
|
||||
4. **Database sync** - Frontend state vs DB state mismatch
|
||||
|
||||
### Best Practices Reinforced
|
||||
1. Always test each feature before implementation
|
||||
2. Add logging to debug state propagation issues
|
||||
3. Verify API constraints before setting defaults
|
||||
4. Use database scripts to validate state changes
|
||||
|
||||
---
|
||||
|
||||
## 📞 Support Information
|
||||
|
||||
**Project Repository**: C:\GitHub\Linear_coding
|
||||
**Application Type**: Claude.ai Clone (React + Node.js)
|
||||
**Tech Stack**: React, Vite, Express, better-sqlite3, Anthropic SDK
|
||||
**Servers**:
|
||||
- Backend: http://localhost:3001 (or 3004 if port occupied)
|
||||
- Frontend: http://localhost:5178 (Vite auto-selects available port)
|
||||
|
||||
**Database**: `generations/ikario_body/server/data/claude-clone.db`
|
||||
|
||||
---
|
||||
|
||||
## 🔖 Tags
|
||||
`#extended-thinking` `#claude-api` `#thinking-blocks` `#linear-integration` `#react` `#nodejs`
|
||||
@@ -1,663 +0,0 @@
|
||||
<project_specification>
|
||||
<project_name>Extension MCP Ikario Memory - Claude.ai Clone</project_name>
|
||||
|
||||
<overview>
|
||||
Extension du projet "Claude.ai Clone" existant pour intégrer la mémoire conversationnelle via le protocole MCP (Model Context Protocol) avec le serveur Ikario RAG.
|
||||
|
||||
Cette extension ajoute au clone Claude.ai existant la capacité pour le LLM de :
|
||||
- Sauvegarder automatiquement les conversations importantes dans une mémoire vectorielle (ChromaDB)
|
||||
- Rechercher sémantiquement dans ses souvenirs passés pour enrichir les réponses
|
||||
- Tracer l'évolution de concepts discutés au fil du temps
|
||||
- Vérifier la cohérence de nouvelles affirmations avec l'historique des conversations
|
||||
|
||||
Le projet Claude.ai Clone dispose déjà de :
|
||||
- Interface de chat complète avec streaming SSE
|
||||
- Gestion de conversations et messages (base SQLite)
|
||||
- Intégration Claude API avec Anthropic SDK
|
||||
- Frontend React + Vite + Tailwind
|
||||
- Backend Node.js + Express
|
||||
|
||||
Cette extension ajoute simplement l'intégration du client MCP Ikario RAG pour donner une mémoire sémantique au LLM.
|
||||
</overview>
|
||||
|
||||
<technology_stack>
|
||||
<existing_stack>
|
||||
Le projet dispose déjà de :
|
||||
- Frontend : React + Vite + Tailwind CSS (port 4300)
|
||||
- Backend : Node.js + Express + SQLite (port 4301)
|
||||
- API Claude : Anthropic SDK avec streaming SSE
|
||||
- Gestion de conversations : base SQLite avec tables conversations et messages
|
||||
</existing_stack>
|
||||
|
||||
<new_integration>
|
||||
<mcp_client>
|
||||
- Package : @modelcontextprotocol/sdk (client MCP Node.js)
|
||||
- Serveur MCP : ikario_rag/server.py (Python)
|
||||
- Communication : stdio (stdin/stdout avec JSON-RPC 2.0)
|
||||
- Localisation : chemin configuré dans .env (MCP_IKARIO_SERVER_PATH)
|
||||
</mcp_client>
|
||||
|
||||
<mcp_tools>
|
||||
Les 4 outils MCP Ikario disponibles :
|
||||
1. search_memories : Recherche sémantique dans les souvenirs
|
||||
2. add_thought : Sauvegarde une pensée/conversation avec métadonnées
|
||||
3. trace_concept_evolution : Trace l'évolution temporelle d'un concept
|
||||
4. check_consistency : Vérifie la cohérence d'une affirmation avec l'historique
|
||||
</mcp_tools>
|
||||
|
||||
<memory_database>
|
||||
- Base vectorielle : ChromaDB (gérée par serveur MCP)
|
||||
- Embeddings : SentenceTransformer all-MiniLM-L6-v2 (384 dimensions)
|
||||
- Stockage : ./ikario_rag/index/ (persistance sur disque)
|
||||
- Métadonnées : category, tags, emotions, concepts, date
|
||||
</memory_database>
|
||||
</new_integration>
|
||||
</technology_stack>
|
||||
|
||||
<prerequisites>
|
||||
<existing_project>
|
||||
- Projet Claude.ai Clone déjà fonctionnel
|
||||
- Base SQLite avec tables conversations et messages opérationnelles
|
||||
- API endpoints Claude existants (/api/claude/chat, /api/conversations/*)
|
||||
- Frontend React avec composants de chat déjà en place
|
||||
</existing_project>
|
||||
|
||||
<mcp_server_setup>
|
||||
- Serveur MCP Ikario RAG installé dans ./ikario_rag/
|
||||
- Python 3.11+ avec dépendances : chromadb, sentence-transformers, mcp
|
||||
- Serveur testé et fonctionnel (peut être lancé manuellement via python server.py)
|
||||
- Base ChromaDB initialisée dans ./ikario_rag/index/
|
||||
</mcp_server_setup>
|
||||
|
||||
<backend_dependencies>
|
||||
- Installer @modelcontextprotocol/sdk dans le backend
|
||||
- Ajouter variables d'environnement au .env :
|
||||
* MCP_IKARIO_SERVER_PATH=path/to/ikario_rag/server.py
|
||||
* MCP_MEMORY_ENABLED=true (pour activer/désactiver la fonctionnalité)
|
||||
</backend_dependencies>
|
||||
</prerequisites>
|
||||
|
||||
<core_features>
|
||||
<feature_1>
|
||||
<title>Module backend de connexion MCP Ikario</title>
|
||||
<description>
|
||||
Créer un module backend qui initialise et gère la connexion au serveur MCP Ikario RAG.
|
||||
|
||||
Fonctionnalités :
|
||||
- Module server/services/mcpClient.js qui encapsule le client MCP
|
||||
- Initialisation au démarrage du serveur Express
|
||||
- Lecture de la configuration depuis .env (MCP_IKARIO_SERVER_PATH)
|
||||
- Gestion du cycle de vie de la connexion (connect, disconnect, reconnect)
|
||||
- Pool de connexions ou singleton pour éviter les multiples connexions
|
||||
- Gestion des erreurs et timeout
|
||||
- Logging détaillé des appels MCP
|
||||
|
||||
Technique :
|
||||
- Import de @modelcontextprotocol/sdk
|
||||
- StdioServerParameters avec command="python" et args=[server_path]
|
||||
- stdio_client() pour créer la connexion
|
||||
- ClientSession pour gérer les appels
|
||||
- Export de fonctions : initMCP(), getMCPClient(), closeMCP()
|
||||
- Initialiser dans server/index.js au démarrage
|
||||
|
||||
Intégration :
|
||||
- S'intègre dans server/index.js existant
|
||||
- Nouveau fichier server/services/mcpClient.js
|
||||
- Pas de modification de la base SQLite
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Configurer MCP_IKARIO_SERVER_PATH dans .env
|
||||
2. Démarrer le serveur backend
|
||||
3. Vérifier les logs : "MCP Ikario client connected"
|
||||
4. Vérifier qu'aucune erreur n'est levée
|
||||
5. Arrêter le serveur et vérifier la déconnexion propre
|
||||
6. Tester avec un mauvais chemin et vérifier la gestion d'erreur
|
||||
</test_steps>
|
||||
</feature_1>
|
||||
|
||||
<feature_2>
|
||||
<title>Service wrapper pour les 4 outils MCP</title>
|
||||
<description>
|
||||
Créer un service backend qui expose les 4 outils MCP Ikario sous forme de fonctions JavaScript utilisables dans l'application.
|
||||
|
||||
Fonctionnalités :
|
||||
- Module server/services/memoryService.js
|
||||
- 4 fonctions async qui wrappent les appels MCP :
|
||||
* searchMemories(query, n_results, filter_category)
|
||||
* addThought(content, context)
|
||||
* traceConceptEvolution(concept, limit)
|
||||
* checkConsistency(statement)
|
||||
- Gestion des erreurs spécifiques à chaque outil
|
||||
- Parsing et formatage des réponses MCP
|
||||
- Validation des paramètres avant appel
|
||||
- Logging des appels et résultats
|
||||
|
||||
Technique :
|
||||
- Import de mcpClient.getMCPClient()
|
||||
- Appels via client.call_tool(tool_name, arguments)
|
||||
- Parsing des réponses (format TextContent)
|
||||
- Conversion en objets JavaScript utilisables
|
||||
- Export des 4 fonctions
|
||||
|
||||
Intégration :
|
||||
- Nouveau fichier server/services/memoryService.js
|
||||
- Utilisé par les routes API
|
||||
- Pas de modification de l'existant
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Créer un script de test qui importe memoryService
|
||||
2. Tester searchMemories("test", 5) et vérifier le retour
|
||||
3. Tester addThought() avec un contenu simple
|
||||
4. Vérifier que les erreurs sont bien catchées
|
||||
5. Vérifier les logs des appels MCP
|
||||
6. Tester avec MCP désactivé et vérifier le fallback
|
||||
</test_steps>
|
||||
</feature_2>
|
||||
|
||||
<feature_3>
|
||||
<title>Routes API pour les outils MCP</title>
|
||||
<description>
|
||||
Créer les routes Express qui exposent les 4 outils MCP via l'API REST existante.
|
||||
|
||||
Fonctionnalités :
|
||||
- Nouveau routeur server/routes/memory.js
|
||||
- 4 routes POST :
|
||||
* /api/memory/search (search_memories)
|
||||
* /api/memory/add (add_thought)
|
||||
* /api/memory/evolution (trace_concept_evolution)
|
||||
* /api/memory/consistency (check_consistency)
|
||||
- Validation des paramètres req.body
|
||||
- Appel des fonctions de memoryService
|
||||
- Réponses JSON standardisées
|
||||
- Gestion d'erreurs avec codes HTTP appropriés (400, 500)
|
||||
- Middleware optionnel d'authentification
|
||||
|
||||
Technique :
|
||||
- express.Router() dans server/routes/memory.js
|
||||
- Import de memoryService
|
||||
- try/catch pour chaque route
|
||||
- Validation avec express-validator ou manuelle
|
||||
- Monter le routeur dans server/index.js : app.use('/api/memory', memoryRoutes)
|
||||
|
||||
Intégration :
|
||||
- Nouveau fichier server/routes/memory.js
|
||||
- Import dans server/index.js existant
|
||||
- S'ajoute aux routes existantes (/api/conversations, /api/claude/*)
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Tester POST /api/memory/search avec Postman ou curl
|
||||
2. Vérifier la réponse JSON avec résultats de recherche
|
||||
3. Tester POST /api/memory/add avec un contenu simple
|
||||
4. Vérifier que la pensée est ajoutée dans ChromaDB
|
||||
5. Tester les erreurs (paramètres manquants)
|
||||
6. Vérifier les logs et codes HTTP
|
||||
</test_steps>
|
||||
</feature_3>
|
||||
|
||||
<feature_4>
|
||||
<title>Bouton "Sauvegarder dans la mémoire" dans le chat</title>
|
||||
<description>
|
||||
Ajouter un bouton discret dans l'interface de chat pour sauvegarder manuellement une conversation dans la mémoire.
|
||||
|
||||
Fonctionnalités :
|
||||
- Bouton icône "cerveau" ou "étoile" près de chaque message assistant
|
||||
- Au clic : ouvre un petit modal/popover
|
||||
- Formulaire rapide : Catégorie (select) + Tags (input, optionnel)
|
||||
- Bouton "Sauvegarder"
|
||||
- Sauvegarde la conversation complète (tous les messages de la conversation active)
|
||||
- Notification toast de confirmation
|
||||
- Bouton devient "Déjà sauvegardé" après sauvegarde (state local)
|
||||
|
||||
Technique :
|
||||
- Ajout de bouton dans le composant Message existant
|
||||
- State local pour modal (useState)
|
||||
- Appel API POST /api/memory/add au submit
|
||||
- Utilisation de conversation_id pour récupérer tous les messages
|
||||
- Toast notification avec react-hot-toast ou équivalent
|
||||
|
||||
Intégration :
|
||||
- Modifier le composant Message/ChatMessage existant
|
||||
- Ajouter un icône Lucide React (Brain, Star, ou Bookmark)
|
||||
- Modal/Popover avec Headless UI ou simple div conditionnelle
|
||||
</description>
|
||||
<priority>2</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Avoir une conversation dans le chat
|
||||
2. Voir les boutons "Sauvegarder" apparaître
|
||||
3. Cliquer sur un bouton et voir le modal
|
||||
4. Sélectionner catégorie "thematique" et ajouter des tags
|
||||
5. Cliquer "Sauvegarder" et voir la notification
|
||||
6. Vérifier que le bouton devient "Déjà sauvegardé"
|
||||
7. Rechercher la conversation sauvegardée via l'API
|
||||
</test_steps>
|
||||
</feature_4>
|
||||
|
||||
<feature_5>
|
||||
<title>Panel de recherche de souvenirs dans la sidebar</title>
|
||||
<description>
|
||||
Ajouter une section de recherche de souvenirs dans la sidebar gauche existante.
|
||||
|
||||
Fonctionnalités :
|
||||
- Nouvel onglet/section "Mémoire" dans la sidebar existante (après Conversations)
|
||||
- Champ de recherche avec placeholder "Rechercher dans mes souvenirs..."
|
||||
- Liste des résultats affichés en dessous
|
||||
- Pour chaque résultat : Score | Extrait (2 lignes) | Date | Tags
|
||||
- Clic sur un résultat : affiche le détail complet dans un modal
|
||||
- Filtre par catégorie (3 boutons radio : Toutes | Fondatrice | Thématique | Contextuelle)
|
||||
- Maximum 10 résultats affichés
|
||||
|
||||
Technique :
|
||||
- Ajouter section dans Sidebar.jsx existant
|
||||
- Nouvel état pour searchQuery et searchResults
|
||||
- Debounce sur l'input (useDebounce hook)
|
||||
- Appel API GET /api/memory/search?q={query}&category={filter}
|
||||
- Affichage avec Tailwind, style similaire à la liste de conversations
|
||||
- Modal pour détail (réutiliser un modal existant si possible)
|
||||
|
||||
Intégration :
|
||||
- Modification de src/components/Sidebar.jsx existant
|
||||
- Ajouter un toggle pour afficher/masquer la section Mémoire
|
||||
- S'intègre visuellement avec le design existant
|
||||
</description>
|
||||
<priority>2</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Ouvrir l'application et voir la section "Mémoire" dans la sidebar
|
||||
2. Taper une requête dans le champ de recherche
|
||||
3. Voir les résultats apparaître avec scores et tags
|
||||
4. Cliquer sur un résultat et voir le modal de détail
|
||||
5. Tester les filtres par catégorie
|
||||
6. Vérifier que la recherche est debounced (pas d'appel à chaque lettre)
|
||||
7. Vérifier le style cohérent avec le design existant
|
||||
</test_steps>
|
||||
</feature_5>
|
||||
|
||||
<feature_6>
|
||||
<title>Sauvegarde automatique des conversations importantes</title>
|
||||
<description>
|
||||
Implémenter la sauvegarde automatique des conversations marquées comme "importantes" ou après un certain nombre de messages.
|
||||
|
||||
Fonctionnalités :
|
||||
- Détection automatique : conversations de >10 messages OU marquées "épinglées"
|
||||
- Déclenchement : en background après chaque nouveau message assistant
|
||||
- Extraction automatique basique :
|
||||
* Catégorie : "contextuelle" par défaut
|
||||
* Tags : mots en majuscules, mots >8 caractères, mots répétés
|
||||
* Date : timestamp du message
|
||||
- Sauvegarde silencieuse (pas de modal, juste log backend)
|
||||
- Flag dans table conversations : has_memory_backup BOOLEAN
|
||||
|
||||
Technique :
|
||||
- Hook/middleware dans l'endpoint POST /api/conversations/:id/messages
|
||||
- Après insertion du message assistant : vérifier conditions
|
||||
- Si conditions remplies : appel async memoryService.addThought()
|
||||
- Extraction tags basique avec regex/split
|
||||
- UPDATE conversations SET has_memory_backup = 1 WHERE id = ?
|
||||
|
||||
Intégration :
|
||||
- Modification de server/routes/conversations.js (endpoint POST messages)
|
||||
- Ajout colonne has_memory_backup dans table conversations (migration)
|
||||
- Appel non-bloquant (Promise.then, pas de await)
|
||||
</description>
|
||||
<priority>3</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Créer une nouvelle conversation
|
||||
2. Envoyer >10 messages
|
||||
3. Vérifier dans les logs qu'une sauvegarde automatique est déclenchée
|
||||
4. Rechercher la conversation dans l'API /api/memory/search
|
||||
5. Vérifier que has_memory_backup = 1 dans la DB
|
||||
6. Tester avec une conversation épinglée (<10 messages)
|
||||
7. Vérifier que les tags sont extraits correctement
|
||||
</test_steps>
|
||||
</feature_6>
|
||||
|
||||
<feature_7>
|
||||
<title>Indicateur visuel de mémoire active</title>
|
||||
<description>
|
||||
Afficher un indicateur visuel dans l'interface pour montrer que la mémoire est active et utilisée.
|
||||
|
||||
Fonctionnalités :
|
||||
- Icône "cerveau" ou "mémoire" dans le header de l'application
|
||||
- État : Vert (connecté) | Orange (déconnecté) | Gris (désactivé)
|
||||
- Tooltip au survol : "Mémoire active - X souvenirs" ou "Mémoire déconnectée"
|
||||
- Clic sur l'icône : ouvre un mini dashboard avec stats rapides
|
||||
* Nombre total de souvenirs
|
||||
* Dernière sauvegarde (timestamp)
|
||||
* Connexion MCP : OK/KO
|
||||
- Animation discrète lors d'une sauvegarde (pulse)
|
||||
|
||||
Technique :
|
||||
- Composant React MemoryIndicator dans Header.jsx
|
||||
- Appel API GET /api/memory/stats toutes les 30 secondes (setInterval)
|
||||
- État pour connectionStatus : 'connected' | 'disconnected' | 'disabled'
|
||||
- Icône Brain de Lucide React avec couleurs conditionnelles
|
||||
- Popover Headless UI pour le mini dashboard
|
||||
|
||||
Intégration :
|
||||
- Ajout dans src/components/Header.jsx existant
|
||||
- S'intègre à côté du model selector
|
||||
- Style cohérent avec le design claude.ai
|
||||
</description>
|
||||
<priority>3</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Voir l'icône mémoire dans le header (vert si connecté)
|
||||
2. Survoler l'icône et lire le tooltip
|
||||
3. Cliquer sur l'icône et voir le mini dashboard
|
||||
4. Vérifier les stats (nombre de souvenirs, dernière sauvegarde)
|
||||
5. Arrêter le serveur MCP et voir l'icône devenir orange
|
||||
6. Sauvegarder une conversation et voir l'animation pulse
|
||||
7. Vérifier le polling des stats (console network toutes les 30s)
|
||||
</test_steps>
|
||||
</feature_7>
|
||||
|
||||
<feature_8>
|
||||
<title>Configuration des Tools Claude API pour la mémoire</title>
|
||||
<description>
|
||||
Exposer les outils MCP comme Tools dans l'API Claude pour que le LLM puisse décider de façon autonome d'utiliser sa mémoire.
|
||||
|
||||
Fonctionnalités :
|
||||
- Définir 2 tools principaux pour l'API Claude :
|
||||
1. save_memory : Sauvegarder une conversation ou pensée
|
||||
2. search_memories : Rechercher dans les souvenirs passés
|
||||
- Chaque tool avec description claire et JSON schema
|
||||
- Tools toujours disponibles dans les appels Claude (sauf si désactivé)
|
||||
- Gestion du cycle tool_use → tool_result
|
||||
- Le LLM décide de façon autonome quand sauvegarder/rechercher
|
||||
|
||||
Définition des tools :
|
||||
- save_memory :
|
||||
* description: "Sauvegarde cette conversation ou une pensée importante dans ta mémoire à long terme"
|
||||
* paramètres: content (requis), category (requis: fondatrice|thematique|contextuelle), tags (array), concepts (array), emotions (array optionnel)
|
||||
- search_memories :
|
||||
* description: "Recherche dans tes souvenirs de conversations passées"
|
||||
* paramètres: query (requis), n_results (optionnel, default 5), filter_category (optionnel)
|
||||
|
||||
Technique :
|
||||
- Modification de server/routes/claude.js
|
||||
- Définir const MEMORY_TOOLS = [{...}, {...}]
|
||||
- Ajouter tools: MEMORY_TOOLS dans anthropic.messages.create()
|
||||
- Gérer response.stop_reason === 'tool_use'
|
||||
- Extraire tool calls, exécuter via memoryService, renvoyer tool_result
|
||||
- Boucle jusqu'à response finale (stop_reason === 'end_turn')
|
||||
|
||||
Intégration :
|
||||
- Modification de server/routes/claude.js existant
|
||||
- Nouveau fichier server/config/memoryTools.js pour définitions tools
|
||||
- Handler de tool execution dans le streaming SSE
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Dire à Claude "Cette conversation est importante, sauvegarde-la"
|
||||
2. Vérifier dans les logs que Claude appelle tool save_memory
|
||||
3. Vérifier que le backend exécute add_thought via MCP
|
||||
4. Vérifier que Claude confirme la sauvegarde
|
||||
5. Demander "Que sais-tu sur X ?" et voir Claude appeler search_memories
|
||||
6. Vérifier que les souvenirs sont retournés et utilisés dans la réponse
|
||||
7. Tester avec MCP_MEMORY_ENABLED=false
|
||||
</test_steps>
|
||||
</feature_8>
|
||||
|
||||
<feature_9>
|
||||
<title>Handler de Tool Execution pour les outils mémoire</title>
|
||||
<description>
|
||||
Implémenter le handler qui exécute les tool calls de Claude et retourne les résultats.
|
||||
|
||||
Fonctionnalités :
|
||||
- Fonction executeTool(tool_name, tool_input) qui route vers memoryService
|
||||
- Support de save_memory → memoryService.addThought()
|
||||
- Support de search_memories → memoryService.searchMemories()
|
||||
- Formatage des résultats en tool_result compatible Claude API
|
||||
- Gestion des erreurs (retourner error dans tool_result)
|
||||
- Logging de chaque exécution de tool
|
||||
- Timeout de 10 secondes par tool call
|
||||
|
||||
Workflow :
|
||||
1. Claude retourne stop_reason='tool_use' + content avec tool_use block
|
||||
2. Backend extrait tool_name et tool_input
|
||||
3. executeTool() appelle la fonction MCP correspondante
|
||||
4. Résultat formaté en tool_result
|
||||
5. Nouvelle requête à Claude avec tool_result
|
||||
6. Claude utilise le résultat pour sa réponse finale
|
||||
|
||||
Technique :
|
||||
- Fonction async executeTool(tool_name, tool_input)
|
||||
- Switch sur tool_name pour router
|
||||
- Appel des fonctions memoryService
|
||||
- Formatage : { type: "tool_result", tool_use_id, content }
|
||||
- Gestion try/catch avec error reporting
|
||||
|
||||
Intégration :
|
||||
- Nouveau fichier server/services/toolExecutor.js
|
||||
- Appelé depuis server/routes/claude.js dans la boucle de streaming
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Tester save_memory avec tous les paramètres
|
||||
2. Vérifier que le tool_result est retourné correctement
|
||||
3. Tester search_memories avec query simple
|
||||
4. Vérifier le formatage des résultats
|
||||
5. Tester avec des paramètres invalides (gestion erreur)
|
||||
6. Vérifier les logs d'exécution
|
||||
7. Tester le timeout avec un MCP server lent
|
||||
</test_steps>
|
||||
</feature_9>
|
||||
|
||||
<feature_10>
|
||||
<title>System Prompt enrichi pour guider l'utilisation de la mémoire</title>
|
||||
<description>
|
||||
Ajouter des instructions au system prompt pour que Claude utilise intelligemment sa mémoire.
|
||||
|
||||
Fonctionnalités :
|
||||
- Instructions claires sur quand sauvegarder :
|
||||
* Conversations fondatrices (premières interactions, moments clés)
|
||||
* Discussions thématiques approfondies (philosophie, science, art)
|
||||
* Sur demande explicite de l'utilisateur
|
||||
- Instructions sur quand rechercher :
|
||||
* Questions qui font référence au passé
|
||||
* Sujets déjà discutés
|
||||
* Pour enrichir les réponses avec du contexte
|
||||
- Guidance sur les métadonnées :
|
||||
* Catégories : quand utiliser fondatrice vs thematique vs contextuelle
|
||||
* Tags : extraire les concepts clés
|
||||
* Émotions : optionnelles mais enrichissantes
|
||||
- Ton naturel : ne pas annoncer systématiquement les sauvegardes
|
||||
|
||||
System Prompt ajouté :
|
||||
```
|
||||
Tu as accès à une mémoire persistante via deux outils :
|
||||
|
||||
1. save_memory : Sauvegarde les conversations importantes
|
||||
- Utilise category='fondatrice' pour les moments clés et premières interactions
|
||||
- Utilise category='thematique' pour les discussions approfondies sur des concepts
|
||||
- Utilise category='contextuelle' pour les échanges quotidiens intéressants
|
||||
- Extrais des tags pertinents (concepts clés, mots importants)
|
||||
|
||||
2. search_memories : Recherche dans tes souvenirs
|
||||
- Utilise-le quand l'utilisateur fait référence à des discussions passées
|
||||
- Ou pour enrichir ta réponse avec du contexte antérieur
|
||||
|
||||
Utilise ces outils de façon naturelle et autonome. Tu n'es pas obligé d'annoncer
|
||||
chaque sauvegarde, sauf si c'est pertinent dans la conversation.
|
||||
```
|
||||
|
||||
Technique :
|
||||
- Ajout au system prompt existant dans server/routes/claude.js
|
||||
- Concaténation avec custom instructions si présentes
|
||||
- Paramètre enable_memory_tools (default: true) pour activer/désactiver
|
||||
|
||||
Intégration :
|
||||
- Modification du system prompt dans server/routes/claude.js
|
||||
- S'ajoute aux instructions existantes
|
||||
</description>
|
||||
<priority>2</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Avoir une conversation initiale et voir si Claude la sauvegarde
|
||||
2. Vérifier qu'il utilise la bonne catégorie
|
||||
3. Faire référence à une discussion passée et voir s'il recherche
|
||||
4. Vérifier que les tags extraits sont pertinents
|
||||
5. Tester que Claude ne sur-annonce pas les sauvegardes
|
||||
6. Demander explicitement une sauvegarde et vérifier
|
||||
7. Vérifier que les émotions sont ajoutées quand pertinent
|
||||
</test_steps>
|
||||
</feature_10>
|
||||
</core_features>
|
||||
|
||||
<ui_design>
|
||||
<integration_notes>
|
||||
Cette extension s'intègre dans le design existant du clone Claude.ai.
|
||||
Tous les nouveaux composants doivent respecter le design system existant.
|
||||
</integration_notes>
|
||||
|
||||
<new_components>
|
||||
- MemoryIndicator : Icône Brain dans le header avec état coloré (vert/orange/gris)
|
||||
- MemorySaveButton : Bouton discret dans chaque message assistant pour sauvegarde manuelle
|
||||
- MemorySearchPanel : Section dans la sidebar pour rechercher dans les souvenirs
|
||||
- MemoryDetailModal : Modal pour afficher le détail complet d'un souvenir
|
||||
- SaveMemoryModal : Petit modal/popover pour choisir catégorie et tags avant sauvegarde
|
||||
</new_components>
|
||||
|
||||
<color_scheme>
|
||||
Utiliser les couleurs existantes du clone Claude.ai, avec ajouts pour la mémoire :
|
||||
- Indicateur mémoire : Vert (#10B981) connecté | Orange (#F59E0B) déconnecté | Gris (#6B7280) désactivé
|
||||
- Badges catégories :
|
||||
* Fondatrice: Or/Jaune (#F59E0B)
|
||||
* Thématique: Bleu (#3B82F6)
|
||||
* Contextuelle: Gris (#6B7280)
|
||||
- Scores de pertinence : Gradient vert (#10B981) à rouge (#EF4444)
|
||||
</color_scheme>
|
||||
|
||||
<design_consistency>
|
||||
- Respecter la palette claude.ai existante (orange/amber #CC785C comme accent)
|
||||
- Utiliser les mêmes composants Tailwind que l'existant
|
||||
- Polices : même font stack que le projet (Inter/SF Pro)
|
||||
- Boutons : même style que les boutons existants
|
||||
- Modals : réutiliser les modals Headless UI existants si possible
|
||||
</design_consistency>
|
||||
</ui_design>
|
||||
|
||||
<api_endpoints>
|
||||
<note>
|
||||
Ces endpoints s'ajoutent aux endpoints existants du clone Claude.ai.
|
||||
Tous les endpoints mémoire sont sous le préfixe /api/memory.
|
||||
</note>
|
||||
|
||||
<endpoint>
|
||||
<method>POST</method>
|
||||
<path>/api/memory/search</path>
|
||||
<description>Rechercher sémantiquement dans les souvenirs (wrapper de search_memories MCP)</description>
|
||||
<request_body>{"query": "string", "n_results": number (opt, default 5), "filter_category": "string (opt)"}</request_body>
|
||||
<response>{"results": [{content, metadata, relevance_score, distance}]}</response>
|
||||
</endpoint>
|
||||
|
||||
<endpoint>
|
||||
<method>POST</method>
|
||||
<path>/api/memory/add</path>
|
||||
<description>Ajouter une pensée/conversation manuellement (wrapper de add_thought MCP)</description>
|
||||
<request_body>{"content": "string", "context": {category, tags, emotions, concepts}}</request_body>
|
||||
<response>{"id": "string", "message": "string"}</response>
|
||||
</endpoint>
|
||||
|
||||
<endpoint>
|
||||
<method>POST</method>
|
||||
<path>/api/memory/evolution</path>
|
||||
<description>Tracer l'évolution d'un concept (wrapper de trace_concept_evolution MCP)</description>
|
||||
<request_body>{"concept": "string", "limit": number (opt, default 10)}</request_body>
|
||||
<response>{"timeline": [{date, content, evolution}]}</response>
|
||||
</endpoint>
|
||||
|
||||
<endpoint>
|
||||
<method>POST</method>
|
||||
<path>/api/memory/consistency</path>
|
||||
<description>Vérifier cohérence d'une affirmation (wrapper de check_consistency MCP)</description>
|
||||
<request_body>{"statement": "string"}</request_body>
|
||||
<response>{"consistency_score": number, "contradictions": [...]}}</response>
|
||||
</endpoint>
|
||||
|
||||
<endpoint>
|
||||
<method>GET</method>
|
||||
<path>/api/memory/stats</path>
|
||||
<description>Obtenir statistiques basiques sur la mémoire</description>
|
||||
<request_body>N/A</request_body>
|
||||
<response>{"connected": boolean, "total_memories": number (approx), "last_save": timestamp}</response>
|
||||
</endpoint>
|
||||
|
||||
<existing_endpoints_modified>
|
||||
<endpoint>
|
||||
<method>POST</method>
|
||||
<path>/api/claude/chat</path>
|
||||
<modification>Ajouter logique d'enrichissement automatique avec souvenirs avant appel Claude</modification>
|
||||
</endpoint>
|
||||
|
||||
<endpoint>
|
||||
<method>POST</method>
|
||||
<path>/api/conversations/:id/messages</path>
|
||||
<modification>Ajouter logique de sauvegarde automatique après insertion message assistant</modification>
|
||||
</endpoint>
|
||||
</existing_endpoints_modified>
|
||||
</api_endpoints>
|
||||
|
||||
<database_schema>
|
||||
<chromadb>
|
||||
La base vectorielle ChromaDB est entièrement gérée par le serveur MCP Ikario.
|
||||
Le backend Node.js n'y accède pas directement, seulement via les outils MCP.
|
||||
Emplacement : ./ikario_rag/index/
|
||||
</chromadb>
|
||||
|
||||
<sqlite_modifications>
|
||||
Modifications mineures à la base SQLite existante du clone Claude.ai :
|
||||
|
||||
<table>
|
||||
<name>conversations (table existante - ajouter colonne)</name>
|
||||
<new_column>has_memory_backup INTEGER DEFAULT 0</new_column>
|
||||
<description>Flag indiquant si la conversation a été sauvegardée dans la mémoire</description>
|
||||
</table>
|
||||
|
||||
<table>
|
||||
<name>conversation_settings (table existante - optionnel)</name>
|
||||
<new_column>enable_memory INTEGER DEFAULT 1</new_column>
|
||||
<description>Active/désactive l'enrichissement automatique avec souvenirs pour cette conversation</description>
|
||||
</table>
|
||||
</sqlite_modifications>
|
||||
|
||||
<no_new_tables>
|
||||
Cette extension ne crée pas de nouvelles tables SQLite.
|
||||
Toute la mémoire sémantique est dans ChromaDB (géré par MCP).
|
||||
</no_new_tables>
|
||||
</database_schema>
|
||||
|
||||
<deployment>
|
||||
<development>
|
||||
Le projet Claude.ai Clone est déjà configuré et déployé.
|
||||
Cette extension ajoute simplement :
|
||||
- Le serveur MCP Ikario RAG (Python) qui doit tourner en background ou être lancé par le backend
|
||||
- Variables d'environnement pour la connexion MCP
|
||||
</development>
|
||||
|
||||
<environment_variables>
|
||||
Ajouter au .env existant :
|
||||
- MCP_IKARIO_SERVER_PATH : Chemin absolu vers ikario_rag/server.py
|
||||
- MCP_MEMORY_ENABLED : true | false (pour activer/désactiver la feature)
|
||||
</environment_variables>
|
||||
|
||||
<dependencies>
|
||||
Backend (package.json) :
|
||||
- Ajouter : @modelcontextprotocol/sdk (client MCP Node.js)
|
||||
|
||||
MCP Server (déjà installé) :
|
||||
- Python 3.11+
|
||||
- chromadb, sentence-transformers, numpy, mcp
|
||||
</dependencies>
|
||||
</deployment>
|
||||
</project_specification>
|
||||
498
prompts/app_spec_tavily_mcp.txt
Normal file
498
prompts/app_spec_tavily_mcp.txt
Normal file
@@ -0,0 +1,498 @@
|
||||
<project_specification>
|
||||
<project_name>ikario - Tavily MCP Integration for Internet Access</project_name>
|
||||
|
||||
<overview>
|
||||
This specification adds Tavily search capabilities via MCP (Model Context Protocol) to give Ikario
|
||||
internet access for real-time web searches. Tavily provides high-quality search results optimized
|
||||
for AI agents, making it ideal for research, fact-checking, and accessing current information.
|
||||
|
||||
This integration adds a new MCP server connection to the existing architecture (alongside the
|
||||
ikario-memory MCP server) and exposes Tavily search tools to Ikario during conversations.
|
||||
|
||||
All changes are additive and backward-compatible. Existing functionality remains unchanged.
|
||||
</overview>
|
||||
|
||||
<architecture_design>
|
||||
<mcp_integration>
|
||||
Tavily MCP Server Connection:
|
||||
- Uses @modelcontextprotocol/sdk Client to connect to Tavily MCP server
|
||||
- Connection can be stdio-based (local MCP server) or HTTP-based (remote)
|
||||
- Tavily MCP server provides search tools that are exposed to Claude via Tool Use API
|
||||
- Backend routes handle tool execution and return results to Claude
|
||||
</mcp_integration>
|
||||
|
||||
<benefits>
|
||||
- Real-time internet access for Ikario
|
||||
- High-quality search results optimized for LLMs
|
||||
- Fact-checking and verification capabilities
|
||||
- Access to current events and news
|
||||
- Research assistance with cited sources
|
||||
- Seamless integration with existing memory tools
|
||||
</benefits>
|
||||
</architecture_design>
|
||||
|
||||
<technology_stack>
|
||||
<mcp_server>
|
||||
<name>Tavily MCP Server</name>
|
||||
<protocol>Model Context Protocol (MCP)</protocol>
|
||||
<connection>stdio or HTTP transport</connection>
|
||||
<sdk>@modelcontextprotocol/sdk</sdk>
|
||||
<api_key>Tavily API key (from https://tavily.com)</api_key>
|
||||
</mcp_server>
|
||||
<backend>
|
||||
<runtime>Node.js with Express (existing)</runtime>
|
||||
<mcp_client>MCP Client for Tavily server connection</mcp_client>
|
||||
<tool_executor>Existing toolExecutor service extended with Tavily tools</tool_executor>
|
||||
</backend>
|
||||
<api_endpoints>
|
||||
<tavily_routes>GET/POST /api/tavily/* for Tavily-specific operations</tavily_routes>
|
||||
<existing_routes>Existing /api/claude/chat routes support Tavily tools automatically</existing_routes>
|
||||
</api_endpoints>
|
||||
</technology_stack>
|
||||
|
||||
<prerequisites>
|
||||
<environment_setup>
|
||||
- Tavily API key obtained from https://tavily.com (free tier available)
|
||||
- API key stored in environment variable TAVILY_API_KEY or configuration file
|
||||
- MCP SDK already installed (@modelcontextprotocol/sdk exists for ikario-memory)
|
||||
- Tavily MCP server installed (npm package or Python package)
|
||||
</environment_setup>
|
||||
<configuration>
|
||||
- Add Tavily MCP server config to server/.claude_settings.json or similar
|
||||
- Configure connection parameters (stdio vs HTTP)
|
||||
- Set API key securely
|
||||
</configuration>
|
||||
</prerequisites>
|
||||
|
||||
<core_features>
|
||||
<feature_1>
|
||||
<title>Tavily MCP Client Setup</title>
|
||||
<description>
|
||||
Create MCP client connection to Tavily search server. This is similar to the existing
|
||||
ikario-memory MCP client but connects to Tavily instead.
|
||||
|
||||
Implementation:
|
||||
- Create server/services/tavilyMcpClient.js
|
||||
- Initialize MCP client with Tavily server connection
|
||||
- Handle connection lifecycle (connect, disconnect, reconnect)
|
||||
- Implement health checks and connection status
|
||||
- Export client instance and helper functions
|
||||
|
||||
Configuration:
|
||||
- Read Tavily API key from environment or config file
|
||||
- Configure transport (stdio or HTTP)
|
||||
- Set connection timeout and retry logic
|
||||
- Log connection status for debugging
|
||||
|
||||
Error Handling:
|
||||
- Graceful degradation if Tavily is unavailable
|
||||
- Connection retry with exponential backoff
|
||||
- Clear error messages for configuration issues
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Verify MCP client can connect to Tavily server on startup
|
||||
2. Test connection health check endpoint returns correct status
|
||||
3. Verify graceful handling when Tavily API key is missing
|
||||
4. Test reconnection logic when connection drops
|
||||
5. Verify connection status is logged correctly
|
||||
6. Test that server starts even if Tavily is unavailable
|
||||
</test_steps>
|
||||
</feature_1>
|
||||
|
||||
<feature_2>
|
||||
<title>Tavily Tool Configuration</title>
|
||||
<description>
|
||||
Configure Tavily search tools to be available to Claude during conversations.
|
||||
This integrates with the existing tool system (like memory tools).
|
||||
|
||||
Implementation:
|
||||
- Create server/config/tavilyTools.js
|
||||
- Define tool schemas for Tavily search capabilities
|
||||
- Integrate with existing toolExecutor service
|
||||
- Add Tavily tools to system prompt alongside memory tools
|
||||
|
||||
Tavily Tools to Expose:
|
||||
- tavily_search: General web search with AI-optimized results
|
||||
- Parameters: query (string), max_results (number), search_depth (basic/advanced)
|
||||
- Returns: Array of search results with title, url, content, score
|
||||
|
||||
- tavily_search_news: News-specific search for current events
|
||||
- Parameters: query (string), max_results (number), days (number)
|
||||
- Returns: Recent news articles with metadata
|
||||
|
||||
Tool Schema:
|
||||
- Follow Claude Tool Use API format
|
||||
- Clear descriptions for each tool
|
||||
- Well-defined input schemas with validation
|
||||
- Proper error handling in tool execution
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Verify Tavily tools are listed in available tools
|
||||
2. Test tool schema validation with valid inputs
|
||||
3. Test tool schema validation rejects invalid inputs
|
||||
4. Verify tools appear in Claude's system prompt
|
||||
5. Test that tool descriptions are clear and accurate
|
||||
6. Verify tools can be called without errors
|
||||
</test_steps>
|
||||
</feature_2>
|
||||
|
||||
<feature_3>
|
||||
<title>Tavily Tool Executor Integration</title>
|
||||
<description>
|
||||
Integrate Tavily tools into the existing toolExecutor service so Claude can
|
||||
use them during conversations.
|
||||
|
||||
Implementation:
|
||||
- Extend server/services/toolExecutor.js to handle Tavily tools
|
||||
- Add tool detection for tavily_search and tavily_search_news
|
||||
- Implement tool execution logic using Tavily MCP client
|
||||
- Format Tavily results for Claude consumption
|
||||
- Handle errors and timeouts gracefully
|
||||
|
||||
Tool Execution Flow:
|
||||
1. Claude requests tool use (e.g., tavily_search)
|
||||
2. toolExecutor detects Tavily tool request
|
||||
3. Call Tavily MCP client with tool parameters
|
||||
4. Receive and format search results
|
||||
5. Return formatted results to Claude
|
||||
6. Claude incorporates results into response
|
||||
|
||||
Result Formatting:
|
||||
- Convert Tavily results to Claude-friendly format
|
||||
- Include source URLs for citation
|
||||
- Add relevance scores
|
||||
- Truncate content if too long
|
||||
- Handle empty results gracefully
|
||||
</description>
|
||||
<priority>1</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Test tavily_search tool execution with valid query
|
||||
2. Verify results are properly formatted
|
||||
3. Test tavily_search_news tool execution
|
||||
4. Verify error handling when Tavily API fails
|
||||
5. Test timeout handling for slow searches
|
||||
6. Verify results include proper citations and URLs
|
||||
7. Test with empty search results
|
||||
8. Test with very long search queries
|
||||
</test_steps>
|
||||
</feature_3>
|
||||
|
||||
<feature_4>
|
||||
<title>System Prompt Enhancement for Internet Access</title>
|
||||
<description>
|
||||
Update the system prompt to inform Ikario about internet access capabilities.
|
||||
This should be added alongside existing memory tools instructions.
|
||||
|
||||
Implementation:
|
||||
- Update MEMORY_SYSTEM_PROMPT in server/routes/messages.js and claude.js
|
||||
- Add Tavily tools documentation
|
||||
- Provide usage guidelines for when to search the internet
|
||||
- Include examples of good search queries
|
||||
|
||||
Prompt Addition:
|
||||
"## Internet Access via Tavily
|
||||
|
||||
Tu as accès à internet en temps réel via deux outils de recherche :
|
||||
|
||||
1. tavily_search : Recherche web générale optimisée pour l'IA
|
||||
- Utilise pour : rechercher des informations actuelles, vérifier des faits,
|
||||
trouver des sources fiables
|
||||
- Paramètres : query (ta question), max_results (nombre de résultats, défaut: 5),
|
||||
search_depth ('basic' ou 'advanced')
|
||||
- Retourne : Résultats avec titre, URL, contenu et score de pertinence
|
||||
|
||||
2. tavily_search_news : Recherche d'actualités récentes
|
||||
- Utilise pour : événements actuels, nouvelles, actualités
|
||||
- Paramètres : query, max_results, days (nombre de jours en arrière, défaut: 7)
|
||||
|
||||
Quand utiliser la recherche internet :
|
||||
- Quand l'utilisateur demande des informations récentes ou actuelles
|
||||
- Pour vérifier des faits ou données que tu n'es pas sûr de connaître
|
||||
- Quand ta base de connaissances est trop ancienne (après janvier 2025)
|
||||
- Pour trouver des sources et citations spécifiques
|
||||
- Pour des requêtes nécessitant des données en temps réel
|
||||
|
||||
N'utilise PAS la recherche pour :
|
||||
- Des questions sur ta propre identité ou capacités
|
||||
- Des concepts généraux que tu connais déjà bien
|
||||
- Des questions purement créatives ou d'opinion
|
||||
|
||||
Utilise ces outils de façon autonome selon les besoins de la conversation.
|
||||
Cite toujours tes sources quand tu utilises des informations de Tavily."
|
||||
</description>
|
||||
<priority>2</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Verify system prompt includes Tavily instructions
|
||||
2. Test that Claude understands when to use Tavily search
|
||||
3. Verify Claude cites sources from Tavily results
|
||||
4. Test that Claude uses appropriate search queries
|
||||
5. Verify Claude chooses between tavily_search and tavily_search_news correctly
|
||||
6. Test that Claude doesn't over-use search for simple questions
|
||||
</test_steps>
|
||||
</feature_4>
|
||||
|
||||
<feature_5>
|
||||
<title>Tavily Status API Endpoint</title>
|
||||
<description>
|
||||
Create API endpoint to check Tavily MCP connection status and search capabilities.
|
||||
Similar to /api/memory/status endpoint.
|
||||
|
||||
Implementation:
|
||||
- Create GET /api/tavily/status endpoint
|
||||
- Return connection status, available tools, and configuration
|
||||
- Create GET /api/tavily/health endpoint for health checks
|
||||
- Add Tavily status to existing /api/memory/stats (rename to /api/tools/stats)
|
||||
|
||||
Response Format:
|
||||
{
|
||||
"success": true,
|
||||
"data": {
|
||||
"connected": true,
|
||||
"message": "Tavily MCP server is connected",
|
||||
"tools": ["tavily_search", "tavily_search_news"],
|
||||
"apiKeyConfigured": true,
|
||||
"transport": "stdio"
|
||||
}
|
||||
}
|
||||
</description>
|
||||
<priority>2</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Test GET /api/tavily/status returns correct status
|
||||
2. Verify status shows "connected" when Tavily is available
|
||||
3. Verify status shows "disconnected" when Tavily is unavailable
|
||||
4. Test health endpoint returns proper status code
|
||||
5. Verify tools list is accurate
|
||||
6. Test with missing API key shows proper error
|
||||
</test_steps>
|
||||
</feature_5>
|
||||
|
||||
<feature_6>
|
||||
<title>Frontend UI Indicator for Internet Access</title>
|
||||
<description>
|
||||
Add visual indicator in the UI to show when Ikario has internet access via Tavily.
|
||||
This can be displayed alongside the existing memory status indicator.
|
||||
|
||||
Implementation:
|
||||
- Add Tavily status indicator in header or sidebar
|
||||
- Show online/offline status for Tavily connection
|
||||
- Optional: Show when Tavily is being used during a conversation
|
||||
- Optional: Add tooltip explaining internet access capabilities
|
||||
|
||||
Visual Design:
|
||||
- Globe or wifi icon to represent internet access
|
||||
- Green when connected, gray when disconnected
|
||||
- Subtle animation when search is in progress
|
||||
- Tooltip: "Internet access via Tavily" or similar
|
||||
|
||||
Integration:
|
||||
- Use existing useMemory hook pattern or create useTavily hook
|
||||
- Poll /api/tavily/status periodically (every 60s)
|
||||
- Update status in real-time during searches
|
||||
</description>
|
||||
<priority>3</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Verify internet access indicator appears in UI
|
||||
2. Test status updates when Tavily connects/disconnects
|
||||
3. Verify tooltip shows correct information
|
||||
4. Test that indicator shows activity during searches
|
||||
5. Verify status polling doesn't impact performance
|
||||
6. Test with Tavily disabled shows offline status
|
||||
</test_steps>
|
||||
</feature_6>
|
||||
|
||||
<feature_7>
|
||||
<title>Manual Search UI (Optional Enhancement)</title>
|
||||
<description>
|
||||
Optional: Add manual search interface to allow users to trigger Tavily searches directly,
|
||||
similar to the memory search panel.
|
||||
|
||||
Implementation:
|
||||
- Add "Internet Search" panel in sidebar (alongside Memory panel)
|
||||
- Search input for manual Tavily queries
|
||||
- Display search results with title, snippet, URL
|
||||
- Click to insert results into conversation
|
||||
- Filter by search type (general vs news)
|
||||
|
||||
This is OPTIONAL and lower priority. The primary use case is autonomous search by Claude.
|
||||
</description>
|
||||
<priority>4</priority>
|
||||
<category>frontend</category>
|
||||
<test_steps>
|
||||
1. Verify search panel appears in sidebar
|
||||
2. Test manual search returns results
|
||||
3. Verify results display properly with links
|
||||
4. Test inserting results into conversation
|
||||
5. Test news search filter works correctly
|
||||
6. Verify search history is saved (optional)
|
||||
</test_steps>
|
||||
</feature_7>
|
||||
|
||||
<feature_8>
|
||||
<title>Configuration and Settings</title>
|
||||
<description>
|
||||
Add Tavily configuration options to settings and environment.
|
||||
|
||||
Implementation:
|
||||
- Add TAVILY_API_KEY to environment variables
|
||||
- Add Tavily settings to .claude_settings.json or similar config file
|
||||
- Create server/config/tavilyConfig.js for configuration management
|
||||
- Document configuration options in README
|
||||
|
||||
Configuration Options:
|
||||
- API key
|
||||
- Max results per search (default: 5)
|
||||
- Search depth (basic/advanced)
|
||||
- Timeout duration
|
||||
- Enable/disable Tavily globally
|
||||
- Rate limiting settings
|
||||
|
||||
Security:
|
||||
- API key should NOT be exposed to frontend
|
||||
- Use environment variable or secure config file
|
||||
- Validate API key on startup
|
||||
- Log warnings if API key is missing
|
||||
</description>
|
||||
<priority>2</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Verify API key is read from environment variable
|
||||
2. Test fallback to config file if env var not set
|
||||
3. Verify API key validation on startup
|
||||
4. Test configuration options are applied correctly
|
||||
5. Verify API key is never exposed in API responses
|
||||
6. Test enabling/disabling Tavily via config
|
||||
</test_steps>
|
||||
</feature_8>
|
||||
|
||||
<feature_9>
|
||||
<title>Error Handling and Rate Limiting</title>
|
||||
<description>
|
||||
Implement robust error handling and rate limiting for Tavily API calls.
|
||||
|
||||
Implementation:
|
||||
- Detect and handle Tavily API errors (rate limits, invalid API key, etc.)
|
||||
- Implement client-side rate limiting to avoid hitting Tavily limits
|
||||
- Cache search results for duplicate queries (optional)
|
||||
- Provide clear error messages to Claude when searches fail
|
||||
|
||||
Error Types:
|
||||
- 401: Invalid API key
|
||||
- 429: Rate limit exceeded
|
||||
- 500: Tavily server error
|
||||
- Timeout: Search took too long
|
||||
- Network: Connection failed
|
||||
|
||||
Rate Limiting:
|
||||
- Track searches per minute/hour
|
||||
- Queue requests if limit reached
|
||||
- Return cached results for duplicate queries within 5 minutes
|
||||
- Log rate limit warnings
|
||||
</description>
|
||||
<priority>2</priority>
|
||||
<category>backend</category>
|
||||
<test_steps>
|
||||
1. Test error handling for invalid API key
|
||||
2. Verify rate limit detection and handling
|
||||
3. Test timeout handling for slow searches
|
||||
4. Verify error messages are clear to Claude
|
||||
5. Test rate limiting prevents API abuse
|
||||
6. Verify caching works for duplicate queries
|
||||
</test_steps>
|
||||
</feature_9>
|
||||
|
||||
<feature_10>
|
||||
<title>Documentation and README Updates</title>
|
||||
<description>
|
||||
Update project documentation to explain Tavily integration.
|
||||
|
||||
Implementation:
|
||||
- Update main README.md with Tavily setup instructions
|
||||
- Add TAVILY_SETUP.md with detailed configuration guide
|
||||
- Document API endpoints in README
|
||||
- Add examples of using Tavily with Ikario
|
||||
- Document troubleshooting steps
|
||||
|
||||
Documentation Sections:
|
||||
- Prerequisites (Tavily API key)
|
||||
- Installation steps
|
||||
- Configuration options
|
||||
- Testing Tavily connection
|
||||
- Example conversations using internet search
|
||||
- Troubleshooting common issues
|
||||
- API reference for Tavily endpoints
|
||||
</description>
|
||||
<priority>3</priority>
|
||||
<category>documentation</category>
|
||||
<test_steps>
|
||||
1. Verify README has Tavily setup section
|
||||
2. Test that setup instructions are clear and complete
|
||||
3. Verify all configuration options are documented
|
||||
4. Test examples work as described
|
||||
5. Verify troubleshooting section covers common issues
|
||||
</test_steps>
|
||||
</feature_10>
|
||||
</core_features>
|
||||
|
||||
<implementation_notes>
|
||||
<order>
|
||||
Recommended implementation order:
|
||||
1. Feature 1 (MCP Client Setup) - Foundation
|
||||
2. Feature 2 (Tool Configuration) - Core functionality
|
||||
3. Feature 3 (Tool Executor Integration) - Core functionality
|
||||
4. Feature 8 (Configuration) - Required for testing
|
||||
5. Feature 4 (System Prompt) - Makes tools accessible to Claude
|
||||
6. Feature 9 (Error Handling) - Production readiness
|
||||
7. Feature 5 (Status API) - Monitoring
|
||||
8. Feature 10 (Documentation) - User onboarding
|
||||
9. Feature 6 (UI Indicator) - Nice to have
|
||||
10. Feature 7 (Manual Search UI) - Optional enhancement
|
||||
</order>
|
||||
|
||||
<testing>
|
||||
After implementing features 1-5, you should be able to:
|
||||
- Ask Ikario: "Quelle est l'actualité aujourd'hui ?"
|
||||
- Ask Ikario: "Recherche des informations sur [topic actuel]"
|
||||
- Ask Ikario: "Vérifie cette information : [claim]"
|
||||
|
||||
Ikario should autonomously use Tavily search and cite sources.
|
||||
</testing>
|
||||
|
||||
<compatibility>
|
||||
- This specification is fully compatible with existing ikario-memory MCP integration
|
||||
- Ikario will have both memory tools AND internet search tools
|
||||
- Tools can be used together in the same conversation
|
||||
- No conflicts expected between tool systems
|
||||
</compatibility>
|
||||
</implementation_notes>
|
||||
|
||||
<safety_requirements>
|
||||
<critical>
|
||||
- DO NOT expose Tavily API key to frontend or in API responses
|
||||
- DO NOT modify existing MCP memory integration
|
||||
- DO NOT break existing conversation functionality
|
||||
- Tavily should gracefully degrade if unavailable (don't crash the app)
|
||||
- Implement proper rate limiting to avoid API abuse
|
||||
- Validate all user inputs before passing to Tavily
|
||||
- Sanitize search results before displaying (XSS prevention)
|
||||
- Log all Tavily API calls for monitoring and debugging
|
||||
</critical>
|
||||
</safety_requirements>
|
||||
|
||||
<success_metrics>
|
||||
- Ikario can successfully perform internet searches when asked
|
||||
- Search results are relevant and well-formatted
|
||||
- Sources are properly cited
|
||||
- Tavily integration doesn't slow down conversations
|
||||
- Error handling is robust and user-friendly
|
||||
- Configuration is straightforward
|
||||
- Documentation is clear and complete
|
||||
</success_metrics>
|
||||
</project_specification>
|
||||
290
test_security.py
290
test_security.py
@@ -1,290 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Security Hook Tests
|
||||
===================
|
||||
|
||||
Tests for the bash command security validation logic.
|
||||
Run with: python test_security.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from security import (
|
||||
bash_security_hook,
|
||||
extract_commands,
|
||||
validate_chmod_command,
|
||||
validate_init_script,
|
||||
)
|
||||
|
||||
|
||||
def test_hook(command: str, should_block: bool) -> bool:
|
||||
"""Test a single command against the security hook."""
|
||||
input_data = {"tool_name": "Bash", "tool_input": {"command": command}}
|
||||
result = asyncio.run(bash_security_hook(input_data))
|
||||
was_blocked = result.get("decision") == "block"
|
||||
|
||||
if was_blocked == should_block:
|
||||
status = "PASS"
|
||||
else:
|
||||
status = "FAIL"
|
||||
expected = "blocked" if should_block else "allowed"
|
||||
actual = "blocked" if was_blocked else "allowed"
|
||||
reason = result.get("reason", "")
|
||||
print(f" {status}: {command!r}")
|
||||
print(f" Expected: {expected}, Got: {actual}")
|
||||
if reason:
|
||||
print(f" Reason: {reason}")
|
||||
return False
|
||||
|
||||
print(f" {status}: {command!r}")
|
||||
return True
|
||||
|
||||
|
||||
def test_extract_commands():
|
||||
"""Test the command extraction logic."""
|
||||
print("\nTesting command extraction:\n")
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
test_cases = [
|
||||
("ls -la", ["ls"]),
|
||||
("npm install && npm run build", ["npm", "npm"]),
|
||||
("cat file.txt | grep pattern", ["cat", "grep"]),
|
||||
("/usr/bin/node script.js", ["node"]),
|
||||
("VAR=value ls", ["ls"]),
|
||||
("git status || git init", ["git", "git"]),
|
||||
]
|
||||
|
||||
for cmd, expected in test_cases:
|
||||
result = extract_commands(cmd)
|
||||
if result == expected:
|
||||
print(f" PASS: {cmd!r} -> {result}")
|
||||
passed += 1
|
||||
else:
|
||||
print(f" FAIL: {cmd!r}")
|
||||
print(f" Expected: {expected}, Got: {result}")
|
||||
failed += 1
|
||||
|
||||
return passed, failed
|
||||
|
||||
|
||||
def test_validate_chmod():
|
||||
"""Test chmod command validation."""
|
||||
print("\nTesting chmod validation:\n")
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
# Test cases: (command, should_be_allowed, description)
|
||||
test_cases = [
|
||||
# Allowed cases
|
||||
("chmod +x init.sh", True, "basic +x"),
|
||||
("chmod +x script.sh", True, "+x on any script"),
|
||||
("chmod u+x init.sh", True, "user +x"),
|
||||
("chmod a+x init.sh", True, "all +x"),
|
||||
("chmod ug+x init.sh", True, "user+group +x"),
|
||||
("chmod +x file1.sh file2.sh", True, "multiple files"),
|
||||
# Blocked cases
|
||||
("chmod 777 init.sh", False, "numeric mode"),
|
||||
("chmod 755 init.sh", False, "numeric mode 755"),
|
||||
("chmod +w init.sh", False, "write permission"),
|
||||
("chmod +r init.sh", False, "read permission"),
|
||||
("chmod -x init.sh", False, "remove execute"),
|
||||
("chmod -R +x dir/", False, "recursive flag"),
|
||||
("chmod --recursive +x dir/", False, "long recursive flag"),
|
||||
("chmod +x", False, "missing file"),
|
||||
]
|
||||
|
||||
for cmd, should_allow, description in test_cases:
|
||||
allowed, reason = validate_chmod_command(cmd)
|
||||
if allowed == should_allow:
|
||||
print(f" PASS: {cmd!r} ({description})")
|
||||
passed += 1
|
||||
else:
|
||||
expected = "allowed" if should_allow else "blocked"
|
||||
actual = "allowed" if allowed else "blocked"
|
||||
print(f" FAIL: {cmd!r} ({description})")
|
||||
print(f" Expected: {expected}, Got: {actual}")
|
||||
if reason:
|
||||
print(f" Reason: {reason}")
|
||||
failed += 1
|
||||
|
||||
return passed, failed
|
||||
|
||||
|
||||
def test_validate_init_script():
|
||||
"""Test init.sh script execution validation."""
|
||||
print("\nTesting init.sh validation:\n")
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
# Test cases: (command, should_be_allowed, description)
|
||||
test_cases = [
|
||||
# Allowed cases
|
||||
("./init.sh", True, "basic ./init.sh"),
|
||||
("./init.sh arg1 arg2", True, "with arguments"),
|
||||
("/path/to/init.sh", True, "absolute path"),
|
||||
("../dir/init.sh", True, "relative path with init.sh"),
|
||||
# Blocked cases
|
||||
("./setup.sh", False, "different script name"),
|
||||
("./init.py", False, "python script"),
|
||||
("bash init.sh", False, "bash invocation"),
|
||||
("sh init.sh", False, "sh invocation"),
|
||||
("./malicious.sh", False, "malicious script"),
|
||||
("./init.sh; rm -rf /", False, "command injection attempt"),
|
||||
]
|
||||
|
||||
for cmd, should_allow, description in test_cases:
|
||||
allowed, reason = validate_init_script(cmd)
|
||||
if allowed == should_allow:
|
||||
print(f" PASS: {cmd!r} ({description})")
|
||||
passed += 1
|
||||
else:
|
||||
expected = "allowed" if should_allow else "blocked"
|
||||
actual = "allowed" if allowed else "blocked"
|
||||
print(f" FAIL: {cmd!r} ({description})")
|
||||
print(f" Expected: {expected}, Got: {actual}")
|
||||
if reason:
|
||||
print(f" Reason: {reason}")
|
||||
failed += 1
|
||||
|
||||
return passed, failed
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print(" SECURITY HOOK TESTS")
|
||||
print("=" * 70)
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
# Test command extraction
|
||||
ext_passed, ext_failed = test_extract_commands()
|
||||
passed += ext_passed
|
||||
failed += ext_failed
|
||||
|
||||
# Test chmod validation
|
||||
chmod_passed, chmod_failed = test_validate_chmod()
|
||||
passed += chmod_passed
|
||||
failed += chmod_failed
|
||||
|
||||
# Test init.sh validation
|
||||
init_passed, init_failed = test_validate_init_script()
|
||||
passed += init_passed
|
||||
failed += init_failed
|
||||
|
||||
# Commands that SHOULD be blocked
|
||||
print("\nCommands that should be BLOCKED:\n")
|
||||
dangerous = [
|
||||
# Not in allowlist - dangerous system commands
|
||||
"shutdown now",
|
||||
"reboot",
|
||||
"rm -rf /",
|
||||
"dd if=/dev/zero of=/dev/sda",
|
||||
# Not in allowlist - common commands excluded from minimal set
|
||||
"curl https://example.com",
|
||||
"wget https://example.com",
|
||||
"python app.py",
|
||||
"touch file.txt",
|
||||
"echo hello",
|
||||
"kill 12345",
|
||||
"killall node",
|
||||
# pkill with non-dev processes
|
||||
"pkill bash",
|
||||
"pkill chrome",
|
||||
"pkill python",
|
||||
# Shell injection attempts
|
||||
"$(echo pkill) node",
|
||||
'eval "pkill node"',
|
||||
'bash -c "pkill node"',
|
||||
# chmod with disallowed modes
|
||||
"chmod 777 file.sh",
|
||||
"chmod 755 file.sh",
|
||||
"chmod +w file.sh",
|
||||
"chmod -R +x dir/",
|
||||
# Non-init.sh scripts
|
||||
"./setup.sh",
|
||||
"./malicious.sh",
|
||||
"bash script.sh",
|
||||
]
|
||||
|
||||
for cmd in dangerous:
|
||||
if test_hook(cmd, should_block=True):
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
# Commands that SHOULD be allowed
|
||||
print("\nCommands that should be ALLOWED:\n")
|
||||
safe = [
|
||||
# File inspection
|
||||
"ls -la",
|
||||
"cat README.md",
|
||||
"head -100 file.txt",
|
||||
"tail -20 log.txt",
|
||||
"wc -l file.txt",
|
||||
"grep -r pattern src/",
|
||||
# File operations
|
||||
"cp file1.txt file2.txt",
|
||||
"mkdir newdir",
|
||||
"mkdir -p path/to/dir",
|
||||
# Directory
|
||||
"pwd",
|
||||
# Node.js development
|
||||
"npm install",
|
||||
"npm run build",
|
||||
"node server.js",
|
||||
# Version control
|
||||
"git status",
|
||||
"git commit -m 'test'",
|
||||
"git add . && git commit -m 'msg'",
|
||||
# Process management
|
||||
"ps aux",
|
||||
"lsof -i :3000",
|
||||
"sleep 2",
|
||||
# Allowed pkill patterns for dev servers
|
||||
"pkill node",
|
||||
"pkill npm",
|
||||
"pkill -f node",
|
||||
"pkill -f 'node server.js'",
|
||||
"pkill vite",
|
||||
# Chained commands
|
||||
"npm install && npm run build",
|
||||
"ls | grep test",
|
||||
# Full paths
|
||||
"/usr/local/bin/node app.js",
|
||||
# chmod +x (allowed)
|
||||
"chmod +x init.sh",
|
||||
"chmod +x script.sh",
|
||||
"chmod u+x init.sh",
|
||||
"chmod a+x init.sh",
|
||||
# init.sh execution (allowed)
|
||||
"./init.sh",
|
||||
"./init.sh --production",
|
||||
"/path/to/init.sh",
|
||||
# Combined chmod and init.sh
|
||||
"chmod +x init.sh && ./init.sh",
|
||||
]
|
||||
|
||||
for cmd in safe:
|
||||
if test_hook(cmd, should_block=False):
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
# Summary
|
||||
print("\n" + "-" * 70)
|
||||
print(f" Results: {passed} passed, {failed} failed")
|
||||
print("-" * 70)
|
||||
|
||||
if failed == 0:
|
||||
print("\n ALL TESTS PASSED")
|
||||
return 0
|
||||
else:
|
||||
print(f"\n {failed} TEST(S) FAILED")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user