Merge branch 'main' of https://github.com/davidblanc347/linear-coding-agent
This commit is contained in:
@@ -6,38 +6,64 @@
|
|||||||
"Bash(netstat:*)",
|
"Bash(netstat:*)",
|
||||||
"Bash(docker-compose:*)",
|
"Bash(docker-compose:*)",
|
||||||
"Bash(ls:*)",
|
"Bash(ls:*)",
|
||||||
|
"Bash(git:*)",
|
||||||
"Bash(rm:*)",
|
"Bash(rm:*)",
|
||||||
"Bash(python autonomous_agent_demo.py:*)",
|
"Bash(python autonomous_agent_demo.py:*)",
|
||||||
"Bash(dir C:GitHublinear_coding_philosophia_raggenerationslibrary_rag*.py)",
|
|
||||||
"Bash(git add:*)",
|
|
||||||
"Bash(git commit -m \"$\\(cat <<''EOF''\nFix import error: rename delete_document_passages to delete_document_chunks\n\nThe function was renamed in weaviate_ingest.py but the import in __init__.py\nwas not updated, causing ImportError when using the library.\n\nChanges:\n- Updated import statement in utils/__init__.py\n- Updated __all__ export list to use correct function name\nEOF\n\\)\")",
|
|
||||||
"Bash(dir \"C:\\\\GitHub\\\\linear_coding_philosophia_rag\\\\generations\\\\library_rag\\\\.env\")",
|
|
||||||
"Bash(git commit:*)",
|
|
||||||
"Bash(tasklist:*)",
|
|
||||||
"Bash(findstr:*)",
|
|
||||||
"Bash(wmic process:*)",
|
|
||||||
"Bash(powershell -Command \"Get-Process python | Select-Object Id,Path,StartTime | Format-Table -AutoSize\")",
|
|
||||||
"Bash(powershell -Command \"Get-WmiObject Win32_Process -Filter \"\"name = ''python.exe''\"\" | Select-Object ProcessId, CommandLine | Format-List\")",
|
|
||||||
"Bash(timeout:*)",
|
|
||||||
"Bash(powershell -Command:*)",
|
|
||||||
"Bash(python:*)",
|
"Bash(python:*)",
|
||||||
"Bash(dir \"C:\\\\GitHub\\\\linear_coding_library_rag\\\\generations\\\\library_rag\")",
|
"Bash(docker logs:*)",
|
||||||
"Bash(docker ps:*)",
|
|
||||||
"Bash(curl:*)",
|
"Bash(curl:*)",
|
||||||
"Bash(dir:*)",
|
"Bash(timeout 30 tail:*)",
|
||||||
|
"Bash(xargs:*)",
|
||||||
|
"Bash(npm run dev:*)",
|
||||||
|
"Bash(npm run build:*)",
|
||||||
|
"Bash(npm install:*)",
|
||||||
|
"WebFetch(domain:docs.anthropic.com)",
|
||||||
|
"WebFetch(domain:docs.claude.com)",
|
||||||
|
"Bash(npm start)",
|
||||||
|
"Bash(node test_extended_thinking.js:*)",
|
||||||
|
"Bash(node test_screenshot.js:*)",
|
||||||
|
"Bash(node test_thinking_badge.js:*)",
|
||||||
|
"Bash(node test_thinking_badge_simple.js:*)",
|
||||||
|
"Bash(node:*)",
|
||||||
|
"Bash(npx sqlite3:*)",
|
||||||
|
"Bash(taskkill:*)",
|
||||||
|
"Bash(findstr:*)",
|
||||||
|
"Bash(tee:*)",
|
||||||
"Bash(grep:*)",
|
"Bash(grep:*)",
|
||||||
"Bash(git push:*)",
|
"Bash(timeout 10 tail:*)",
|
||||||
"Bash(mypy:*)",
|
"Bash(iconv:*)",
|
||||||
"WebSearch",
|
"Bash(pip install:*)",
|
||||||
"Bash(nvidia-smi:*)",
|
"Bash(sqlite3:*)",
|
||||||
"WebFetch(domain:cr.weaviate.io)",
|
"Bash(wmic process where \"name=''python.exe''\" get ProcessId,CommandLine /format:list)",
|
||||||
"Bash(git restore:*)",
|
"Bash(powershell -Command \"Get-Process python | Select-Object Id, Path, StartTime | Format-List\")",
|
||||||
"Bash(git log:*)",
|
"Bash(powershell:*)",
|
||||||
"Bash(done)",
|
"Bash(timeout 5 tail:*)",
|
||||||
"Bash(git remote set-url:*)",
|
"Bash(timeout 8 tail:*)",
|
||||||
"Bash(docker compose:*)",
|
"Bash(find:*)",
|
||||||
"Bash(pytest:*)",
|
"Bash(npm:*)",
|
||||||
"Bash(git pull:*)"
|
"Bash(tasklist:*)",
|
||||||
|
"Bash(chmod:*)",
|
||||||
|
"Bash(restart.bat)",
|
||||||
|
"Bash(npm run dev)",
|
||||||
|
"Bash(pkill:*)",
|
||||||
|
"Bash(claude doctor:*)",
|
||||||
|
"Bash(dir /s /b \".claude_settings.json\")",
|
||||||
|
"Bash(dir /s /b \"settings.local.json\")",
|
||||||
|
"Bash(python -m json.tool:*)",
|
||||||
|
"Bash(del NUL)",
|
||||||
|
"Bash(lsof:*)",
|
||||||
|
"Bash(dir:*)",
|
||||||
|
"Bash(docker ps:*)",
|
||||||
|
"Bash(wmic process where \"name=''node.exe'' OR name=''python.exe''\" get ProcessId,CommandLine /format:list)",
|
||||||
|
"Bash(cmd /c \"cd C:\\\\GitHub\\\\Linear_coding_ikario_body\\\\generations\\\\ikario_body && restart.bat\")",
|
||||||
|
"Bash(cmd /c \"C:\\\\GitHub\\\\linear_coding_library_rag\\\\generations\\\\library_rag\\\\diagnose_wsl.bat\")",
|
||||||
|
"Bash(wsl --status:*)",
|
||||||
|
"Bash(wsl --list:*)",
|
||||||
|
"Bash(docker version:*)",
|
||||||
|
"Bash(docker info:*)",
|
||||||
|
"Bash(docker stats:*)",
|
||||||
|
"Bash(timeout:*)",
|
||||||
|
"Bash(docker inspect:*)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -7,3 +7,7 @@ logs/
|
|||||||
.env
|
.env
|
||||||
venv
|
venv
|
||||||
__pycache__
|
__pycache__
|
||||||
|
|
||||||
|
# Node modules (if any)
|
||||||
|
node_modules/
|
||||||
|
package-lock.json
|
||||||
337
GUIDE_NEW_APP.md
337
GUIDE_NEW_APP.md
@@ -1,337 +0,0 @@
|
|||||||
# Guide : Créer une Nouvelle Application avec le Framework Linear Coding
|
|
||||||
|
|
||||||
Ce guide explique comment utiliser ce framework pour créer une **toute nouvelle application** à partir de zéro.
|
|
||||||
|
|
||||||
## Vue d'ensemble
|
|
||||||
|
|
||||||
Ce framework permet de générer automatiquement une application complète en utilisant :
|
|
||||||
- **Linear** pour la gestion de projet (issues, suivi, commentaires)
|
|
||||||
- **Claude Agent SDK** pour le développement autonome
|
|
||||||
- **Spécifications en format XML** pour décrire l'application
|
|
||||||
|
|
||||||
## Structure du Framework
|
|
||||||
|
|
||||||
### Fichiers génériques (à NE PAS modifier)
|
|
||||||
|
|
||||||
Ces fichiers font partie du framework et sont réutilisables pour toutes les applications :
|
|
||||||
|
|
||||||
```
|
|
||||||
linear-coding-agent/
|
|
||||||
├── autonomous_agent_demo.py # Point d'entrée principal
|
|
||||||
├── agent.py # Logique des sessions d'agent
|
|
||||||
├── client.py # Configuration SDK Claude + MCP
|
|
||||||
├── security.py # Validation et whitelist des commandes
|
|
||||||
├── progress.py # Utilitaires de suivi de progression
|
|
||||||
├── prompts.py # Utilitaires de chargement des prompts
|
|
||||||
├── linear_config.py # Constantes de configuration Linear
|
|
||||||
├── requirements.txt # Dépendances Python
|
|
||||||
└── prompts/
|
|
||||||
├── initializer_prompt.md # Prompt pour la session initiale
|
|
||||||
├── initializer_bis_prompt.md # Prompt pour ajouter des features
|
|
||||||
└── coding_prompt.md # Prompt pour les sessions de codage
|
|
||||||
```
|
|
||||||
|
|
||||||
**⚠️ Ne modifiez PAS ces fichiers** - ils sont génériques et fonctionnent pour toutes les applications.
|
|
||||||
|
|
||||||
### Fichiers spécifiques à votre application (à CRÉER)
|
|
||||||
|
|
||||||
Le seul fichier que vous devez créer est :
|
|
||||||
|
|
||||||
```
|
|
||||||
prompts/
|
|
||||||
└── app_spec.txt # Votre spécification d'application (format XML)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Étapes pour Créer une Nouvelle Application
|
|
||||||
|
|
||||||
### Étape 1 : Créer votre fichier de spécification
|
|
||||||
|
|
||||||
Créez un fichier `prompts/app_spec.txt` qui décrit votre application. Utilisez le format XML suivant :
|
|
||||||
|
|
||||||
```xml
|
|
||||||
<project_specification>
|
|
||||||
<project_name>Nom de Votre Application</project_name>
|
|
||||||
|
|
||||||
<overview>
|
|
||||||
Description complète de votre application. Expliquez ce que vous voulez construire,
|
|
||||||
les objectifs principaux, et les fonctionnalités clés.
|
|
||||||
</overview>
|
|
||||||
|
|
||||||
<technology_stack>
|
|
||||||
<frontend>
|
|
||||||
<framework>React avec Vite</framework>
|
|
||||||
<styling>Tailwind CSS</styling>
|
|
||||||
<state_management>React hooks</state_management>
|
|
||||||
<!-- Ajoutez d'autres technologies frontend -->
|
|
||||||
</frontend>
|
|
||||||
<backend>
|
|
||||||
<runtime>Node.js avec Express</runtime>
|
|
||||||
<database>SQLite</database>
|
|
||||||
<!-- Ajoutez d'autres technologies backend -->
|
|
||||||
</backend>
|
|
||||||
</technology_stack>
|
|
||||||
|
|
||||||
<prerequisites>
|
|
||||||
<environment_setup>
|
|
||||||
- Liste des prérequis (dépendances, clés API, etc.)
|
|
||||||
</environment_setup>
|
|
||||||
</prerequisites>
|
|
||||||
|
|
||||||
<core_features>
|
|
||||||
<feature_1>
|
|
||||||
<title>Titre de la fonctionnalité 1</title>
|
|
||||||
<description>Description détaillée</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>frontend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Étape de test 1
|
|
||||||
2. Étape de test 2
|
|
||||||
</test_steps>
|
|
||||||
</feature_1>
|
|
||||||
|
|
||||||
<feature_2>
|
|
||||||
<!-- Autres fonctionnalités -->
|
|
||||||
</feature_2>
|
|
||||||
</core_features>
|
|
||||||
</project_specification>
|
|
||||||
```
|
|
||||||
|
|
||||||
### Étape 2 : Exemple de structure complète
|
|
||||||
|
|
||||||
Voici un exemple basé sur l'application "Claude Clone" que vous pouvez utiliser comme référence :
|
|
||||||
|
|
||||||
**Structure recommandée de `app_spec.txt` :**
|
|
||||||
|
|
||||||
```xml
|
|
||||||
<project_specification>
|
|
||||||
<project_name>Mon Application</project_name>
|
|
||||||
|
|
||||||
<overview>
|
|
||||||
Description de votre application...
|
|
||||||
</overview>
|
|
||||||
|
|
||||||
<technology_stack>
|
|
||||||
<!-- Stack technique complète -->
|
|
||||||
</technology_stack>
|
|
||||||
|
|
||||||
<prerequisites>
|
|
||||||
<!-- Prérequis -->
|
|
||||||
</prerequisites>
|
|
||||||
|
|
||||||
<core_features>
|
|
||||||
<!-- Liste toutes vos fonctionnalités avec des balises <feature_X> -->
|
|
||||||
</core_features>
|
|
||||||
|
|
||||||
<ui_design>
|
|
||||||
<!-- Spécifications UI si nécessaire -->
|
|
||||||
</ui_design>
|
|
||||||
|
|
||||||
<api_endpoints>
|
|
||||||
<!-- Endpoints API si nécessaire -->
|
|
||||||
</api_endpoints>
|
|
||||||
|
|
||||||
<database_schema>
|
|
||||||
<!-- Schéma de base de données si nécessaire -->
|
|
||||||
</database_schema>
|
|
||||||
</project_specification>
|
|
||||||
```
|
|
||||||
|
|
||||||
### Étape 3 : Lancer l'initialisation
|
|
||||||
|
|
||||||
Une fois votre `app_spec.txt` créé, lancez l'agent initializer :
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python autonomous_agent_demo.py --project-dir ./ma_nouvelle_app
|
|
||||||
```
|
|
||||||
|
|
||||||
L'agent va :
|
|
||||||
1. Lire votre `app_spec.txt`
|
|
||||||
2. Créer un projet Linear
|
|
||||||
3. Créer ~50 issues Linear basées sur votre spécification
|
|
||||||
4. Initialiser la structure du projet
|
|
||||||
|
|
||||||
### Étape 4 : Suivre le développement
|
|
||||||
|
|
||||||
Les agents de codage vont ensuite :
|
|
||||||
- Travailler sur les issues Linear une par une
|
|
||||||
- Implémenter les fonctionnalités
|
|
||||||
- Tester avec Puppeteer
|
|
||||||
- Mettre à jour les issues avec leurs commentaires
|
|
||||||
|
|
||||||
## Exemple : Utiliser l'application "Claude Clone" comme référence
|
|
||||||
|
|
||||||
L'application "Claude Clone" dans `prompts/app_spec.txt` est un excellent exemple à suivre car elle contient :
|
|
||||||
|
|
||||||
### ✅ Éléments à copier/adapter :
|
|
||||||
|
|
||||||
1. **Structure XML** : La structure générale avec `<project_specification>`, `<overview>`, `<technology_stack>`, etc.
|
|
||||||
|
|
||||||
2. **Format des fonctionnalités** : Comment structurer les `<feature_X>` avec :
|
|
||||||
- `<title>`
|
|
||||||
- `<description>`
|
|
||||||
- `<priority>`
|
|
||||||
- `<category>`
|
|
||||||
- `<test_steps>`
|
|
||||||
|
|
||||||
3. **Détails techniques** : Comment décrire :
|
|
||||||
- La stack technologique
|
|
||||||
- Les prérequis
|
|
||||||
- Les endpoints API
|
|
||||||
- Le schéma de base de données
|
|
||||||
- Les spécifications UI
|
|
||||||
|
|
||||||
### ❌ Éléments spécifiques à NE PAS copier :
|
|
||||||
|
|
||||||
1. **Le contenu spécifique** : Les détails sur "Claude API", "artifacts", "conversations", etc. sont spécifiques à cette app
|
|
||||||
2. **Les fonctionnalités métier** : Adaptez-les à votre application
|
|
||||||
|
|
||||||
## Checklist pour une Nouvelle Application
|
|
||||||
|
|
||||||
- [ ] Créer `prompts/app_spec.txt` avec votre spécification
|
|
||||||
- [ ] Définir le `<project_name>` de votre application
|
|
||||||
- [ ] Décrire l'`<overview>` complète
|
|
||||||
- [ ] Spécifier la `<technology_stack>` (frontend + backend)
|
|
||||||
- [ ] Lister les `<prerequisites>` nécessaires
|
|
||||||
- [ ] Définir toutes les `<core_features>` avec des balises `<feature_X>`
|
|
||||||
- [ ] Ajouter des `<test_steps>` pour chaque fonctionnalité
|
|
||||||
- [ ] Lancer : `python autonomous_agent_demo.py --project-dir ./mon_app`
|
|
||||||
- [ ] Vérifier dans Linear que les issues sont créées correctement
|
|
||||||
|
|
||||||
## Conseils pour Rédiger une Bonne Spécification
|
|
||||||
|
|
||||||
### 1. Soyez détaillé mais structuré
|
|
||||||
|
|
||||||
Chaque fonctionnalité doit avoir :
|
|
||||||
- Un titre clair
|
|
||||||
- Une description complète de ce qu'elle fait
|
|
||||||
- Des étapes de test précises
|
|
||||||
- Une priorité (1=urgent, 4=optionnel)
|
|
||||||
|
|
||||||
### 2. Utilisez le format XML cohérent
|
|
||||||
|
|
||||||
```xml
|
|
||||||
<feature_1>
|
|
||||||
<title>Authentification - Connexion utilisateur</title>
|
|
||||||
<description>
|
|
||||||
Implémenter un système d'authentification avec :
|
|
||||||
- Formulaire de connexion (email/mot de passe)
|
|
||||||
- Validation côté client et serveur
|
|
||||||
- Gestion des sessions avec JWT
|
|
||||||
- Page de réinitialisation de mot de passe
|
|
||||||
</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>auth</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Accéder à la page de connexion
|
|
||||||
2. Entrer un email invalide → voir erreur
|
|
||||||
3. Entrer des identifiants valides → redirection vers dashboard
|
|
||||||
4. Vérifier que le token JWT est stocké
|
|
||||||
5. Tester la déconnexion
|
|
||||||
</test_steps>
|
|
||||||
</feature_1>
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Organisez par catégories
|
|
||||||
|
|
||||||
Groupez les fonctionnalités par catégorie :
|
|
||||||
- `auth` : Authentification
|
|
||||||
- `frontend` : Interface utilisateur
|
|
||||||
- `backend` : API et logique serveur
|
|
||||||
- `database` : Modèles et migrations
|
|
||||||
- `integration` : Intégrations externes
|
|
||||||
|
|
||||||
### 4. Priorisez les fonctionnalités
|
|
||||||
|
|
||||||
- **Priority 1** : Fonctionnalités critiques (auth, base de données)
|
|
||||||
- **Priority 2** : Fonctionnalités importantes (core features)
|
|
||||||
- **Priority 3** : Fonctionnalités secondaires (améliorations UX)
|
|
||||||
- **Priority 4** : Nice-to-have (polish, optimisations)
|
|
||||||
|
|
||||||
## Exemple Minimal
|
|
||||||
|
|
||||||
Voici un exemple minimal pour démarrer :
|
|
||||||
|
|
||||||
```xml
|
|
||||||
<project_specification>
|
|
||||||
<project_name>Todo App - Gestionnaire de Tâches</project_name>
|
|
||||||
|
|
||||||
<overview>
|
|
||||||
Application web simple pour gérer des listes de tâches.
|
|
||||||
Les utilisateurs peuvent créer, modifier, compléter et supprimer des tâches.
|
|
||||||
</overview>
|
|
||||||
|
|
||||||
<technology_stack>
|
|
||||||
<frontend>
|
|
||||||
<framework>React avec Vite</framework>
|
|
||||||
<styling>Tailwind CSS</styling>
|
|
||||||
</frontend>
|
|
||||||
<backend>
|
|
||||||
<runtime>Node.js avec Express</runtime>
|
|
||||||
<database>SQLite</database>
|
|
||||||
</backend>
|
|
||||||
</technology_stack>
|
|
||||||
|
|
||||||
<core_features>
|
|
||||||
<feature_1>
|
|
||||||
<title>Interface principale - Liste des tâches</title>
|
|
||||||
<description>Afficher une liste de toutes les tâches avec leur statut</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>frontend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Ouvrir l'application
|
|
||||||
2. Vérifier que la liste des tâches s'affiche
|
|
||||||
</test_steps>
|
|
||||||
</feature_1>
|
|
||||||
|
|
||||||
<feature_2>
|
|
||||||
<title>Créer une nouvelle tâche</title>
|
|
||||||
<description>Formulaire pour ajouter une nouvelle tâche à la liste</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>frontend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Cliquer sur "Nouvelle tâche"
|
|
||||||
2. Entrer un titre
|
|
||||||
3. Cliquer sur "Ajouter"
|
|
||||||
4. Vérifier que la tâche apparaît dans la liste
|
|
||||||
</test_steps>
|
|
||||||
</feature_2>
|
|
||||||
</core_features>
|
|
||||||
</project_specification>
|
|
||||||
```
|
|
||||||
|
|
||||||
## Fichiers à Conserver du Framework
|
|
||||||
|
|
||||||
Ces fichiers sont **génériques** et fonctionnent pour toutes les applications :
|
|
||||||
|
|
||||||
- ✅ `autonomous_agent_demo.py` - Point d'entrée
|
|
||||||
- ✅ `agent.py` - Logique des agents
|
|
||||||
- ✅ `client.py` - Configuration Claude SDK
|
|
||||||
- ✅ `prompts.py` - Chargement des prompts
|
|
||||||
- ✅ `progress.py` - Suivi de progression
|
|
||||||
- ✅ `security.py` - Sécurité
|
|
||||||
- ✅ `linear_config.py` - Config Linear
|
|
||||||
- ✅ `prompts/initializer_prompt.md` - Template initializer
|
|
||||||
- ✅ `prompts/initializer_bis_prompt.md` - Template initializer bis
|
|
||||||
- ✅ `prompts/coding_prompt.md` - Template coding agent
|
|
||||||
- ✅ `requirements.txt` - Dépendances Python
|
|
||||||
|
|
||||||
## Fichiers à Créer pour Votre Application
|
|
||||||
|
|
||||||
- ✅ `prompts/app_spec.txt` - **Votre spécification (le seul fichier à créer !)**
|
|
||||||
|
|
||||||
## Résumé
|
|
||||||
|
|
||||||
Pour créer une nouvelle application :
|
|
||||||
|
|
||||||
1. **Copiez la structure XML** de `prompts/app_spec.txt` (exemple Claude Clone)
|
|
||||||
2. **Adaptez le contenu** à votre application
|
|
||||||
3. **Définissez toutes vos fonctionnalités** avec des balises `<feature_X>`
|
|
||||||
4. **Lancez** : `python autonomous_agent_demo.py --project-dir ./mon_app`
|
|
||||||
5. **Suivez le progrès** dans Linear
|
|
||||||
|
|
||||||
Le framework s'occupe du reste ! 🚀
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
106
LIBRARY_RAG_SETUP.md
Normal file
106
LIBRARY_RAG_SETUP.md
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
# Library RAG MCP Setup Guide
|
||||||
|
|
||||||
|
## Quick Setup
|
||||||
|
|
||||||
|
To enable the Library RAG MCP server (document search with Weaviate), you need to configure the following in your `.env` file:
|
||||||
|
|
||||||
|
### Required Environment Variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Library RAG MCP Configuration
|
||||||
|
LIBRARY_RAG_ENABLED=true
|
||||||
|
MCP_LIBRARY_RAG_SERVER_PATH=C:/GitHub/linear_coding_library_rag/generations/library_rag/mcp_server.py
|
||||||
|
LIBRARY_RAG_PYTHON_COMMAND=python
|
||||||
|
LIBRARY_RAG_CONNECTION_TIMEOUT=10000
|
||||||
|
LIBRARY_RAG_AUTO_RECONNECT=true
|
||||||
|
LIBRARY_RAG_MAX_RETRIES=3
|
||||||
|
|
||||||
|
# Weaviate Configuration
|
||||||
|
WEAVIATE_URL=http://localhost:8080
|
||||||
|
|
||||||
|
# REQUIRED: Mistral API Key (for OCR functionality)
|
||||||
|
MISTRAL_API_KEY=your_mistral_api_key_here
|
||||||
|
```
|
||||||
|
|
||||||
|
### Why MISTRAL_API_KEY is Required
|
||||||
|
|
||||||
|
The Library RAG MCP server uses Mistral API for:
|
||||||
|
- **OCR with annotations**: Extracting text from PDF images with layout annotations
|
||||||
|
- **LLM processing**: Metadata extraction, table of contents generation, semantic chunking
|
||||||
|
|
||||||
|
Without this key, the MCP server **will fail to start** and the backend connection will timeout with error: `MCP error -32001: Request timed out`.
|
||||||
|
|
||||||
|
### Getting Your Mistral API Key
|
||||||
|
|
||||||
|
1. Go to https://console.mistral.ai/
|
||||||
|
2. Sign up or log in
|
||||||
|
3. Navigate to API Keys section
|
||||||
|
4. Create a new API key
|
||||||
|
5. Copy the key and add it to your `.env` file
|
||||||
|
|
||||||
|
### Troubleshooting
|
||||||
|
|
||||||
|
**Issue**: Library RAG shows `"connected": false` in status endpoint
|
||||||
|
|
||||||
|
**Solution**:
|
||||||
|
1. Check that `MISTRAL_API_KEY` is uncommented in `.env`
|
||||||
|
2. Verify the key is valid
|
||||||
|
3. Restart the backend server: `npm run dev`
|
||||||
|
|
||||||
|
**Issue**: Connection timeout error
|
||||||
|
|
||||||
|
**Cause**: The MCP server subprocess cannot start without the Mistral API key
|
||||||
|
|
||||||
|
**Fix**: Add the key to `.env` and restart
|
||||||
|
|
||||||
|
### Verifying Connection
|
||||||
|
|
||||||
|
Check the connection status:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:5175/api/library-rag/status | python -m json.tool
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected response when connected:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"data": {
|
||||||
|
"connected": true,
|
||||||
|
"message": "Library RAG MCP server is connected and ready",
|
||||||
|
"tools": [...],
|
||||||
|
"error": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Available Tools
|
||||||
|
|
||||||
|
Once connected, you'll have access to:
|
||||||
|
|
||||||
|
1. **upload_document** - Upload and index PDF documents with full pipeline (OCR, metadata, chunking, Weaviate ingestion)
|
||||||
|
2. **search_library** - Semantic search over document chunks
|
||||||
|
3. **hybrid_search** - Search summaries and high-level content
|
||||||
|
4. **list_collections** - List all indexed documents
|
||||||
|
5. **get_document** - Retrieve document metadata
|
||||||
|
6. **filter_search** - Filter by author, work, or language
|
||||||
|
7. **extract_text_from_image** - OCR from image URLs
|
||||||
|
|
||||||
|
### Related Configuration
|
||||||
|
|
||||||
|
Make sure Weaviate Docker is running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd C:\GitHub\linear_coding_library_rag\generations\library_rag
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
Check Weaviate status:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/meta
|
||||||
|
```
|
||||||
|
|
||||||
|
## Changelog
|
||||||
|
|
||||||
|
**2025-12-26**: Fixed MCP connection issue by adding MISTRAL_API_KEY requirement to documentation. The key must be present in `.env` for the MCP server to start successfully.
|
||||||
294
README.md
294
README.md
@@ -64,19 +64,25 @@ pip show claude-code-sdk # Check SDK is installed
|
|||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
|
### Option 1: Use the Example (Claude Clone)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Initialize a new project
|
# Initialize the Claude Clone example project
|
||||||
python autonomous_agent_demo.py --project-dir ./my_project
|
python autonomous_agent_demo.py --project-dir ./ikario_body
|
||||||
|
|
||||||
# Add new features to an existing project
|
# Add new features to an existing project
|
||||||
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_theme_customization.txt
|
python autonomous_agent_demo.py --project-dir ./ikario_body --new-spec app_spec_theme_customization.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
For testing with limited iterations:
|
For testing with limited iterations:
|
||||||
```bash
|
```bash
|
||||||
python autonomous_agent_demo.py --project-dir ./my_project --max-iterations 3
|
python autonomous_agent_demo.py --project-dir ./ikario_body --max-iterations 3
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Option 2: Create Your Own Application
|
||||||
|
|
||||||
|
See the [Creating a New Application](#creating-a-new-application) section below for detailed instructions on creating a custom application from scratch.
|
||||||
|
|
||||||
## How It Works
|
## How It Works
|
||||||
|
|
||||||
### Linear-Centric Workflow
|
### Linear-Centric Workflow
|
||||||
@@ -142,7 +148,7 @@ The **Initializer Bis** agent allows you to add new features to an existing proj
|
|||||||
**Example:**
|
**Example:**
|
||||||
```bash
|
```bash
|
||||||
# Add theme customization features to an existing project
|
# Add theme customization features to an existing project
|
||||||
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_theme_customization.txt
|
python autonomous_agent_demo.py --project-dir ./ikario_body --new-spec app_spec_theme_customization.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
This will create multiple Linear issues (one per `<feature>` tag) that will be worked on by subsequent coding agent sessions.
|
This will create multiple Linear issues (one per `<feature>` tag) that will be worked on by subsequent coding agent sessions.
|
||||||
@@ -192,7 +198,6 @@ linear-agent-harness/
|
|||||||
│ ├── initializer_prompt.md # First session prompt (creates Linear issues)
|
│ ├── initializer_prompt.md # First session prompt (creates Linear issues)
|
||||||
│ ├── initializer_bis_prompt.md # Prompt for adding new features
|
│ ├── initializer_bis_prompt.md # Prompt for adding new features
|
||||||
│ └── coding_prompt.md # Continuation session prompt (works issues)
|
│ └── coding_prompt.md # Continuation session prompt (works issues)
|
||||||
├── GUIDE_NEW_APP.md # Guide pour créer une nouvelle application
|
|
||||||
└── requirements.txt # Python dependencies
|
└── requirements.txt # Python dependencies
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -201,7 +206,7 @@ linear-agent-harness/
|
|||||||
After running, your project directory will contain:
|
After running, your project directory will contain:
|
||||||
|
|
||||||
```
|
```
|
||||||
my_project/
|
ikario_body/
|
||||||
├── .linear_project.json # Linear project state (marker file)
|
├── .linear_project.json # Linear project state (marker file)
|
||||||
├── app_spec.txt # Copied specification
|
├── app_spec.txt # Copied specification
|
||||||
├── app_spec_theme_customization.txt # New spec file (if using --new-spec)
|
├── app_spec_theme_customization.txt # New spec file (if using --new-spec)
|
||||||
@@ -241,35 +246,268 @@ The initializer agent will create:
|
|||||||
|
|
||||||
All subsequent coding agents will work from this Linear project.
|
All subsequent coding agents will work from this Linear project.
|
||||||
|
|
||||||
|
## Creating a New Application
|
||||||
|
|
||||||
|
This framework is designed to be **generic and reusable** for any web application. Here's how to create your own application from scratch.
|
||||||
|
|
||||||
|
### Understanding the Framework Structure
|
||||||
|
|
||||||
|
#### Generic Framework Files (DO NOT MODIFY)
|
||||||
|
|
||||||
|
These files work for all applications and should remain unchanged:
|
||||||
|
|
||||||
|
```
|
||||||
|
linear-coding-agent/
|
||||||
|
├── autonomous_agent_demo.py # Main entry point
|
||||||
|
├── agent.py # Agent session logic
|
||||||
|
├── client.py # Claude SDK + MCP client configuration
|
||||||
|
├── security.py # Bash command allowlist and validation
|
||||||
|
├── progress.py # Progress tracking utilities
|
||||||
|
├── prompts.py # Prompt loading utilities
|
||||||
|
├── linear_config.py # Linear configuration constants
|
||||||
|
├── requirements.txt # Python dependencies
|
||||||
|
└── prompts/
|
||||||
|
├── initializer_prompt.md # First session prompt template
|
||||||
|
├── initializer_bis_prompt.md # New features prompt template
|
||||||
|
└── coding_prompt.md # Continuation session prompt template
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Application-Specific Files (CREATE THESE)
|
||||||
|
|
||||||
|
The **only file you need to create** is your application specification:
|
||||||
|
|
||||||
|
```
|
||||||
|
prompts/
|
||||||
|
└── app_spec.txt # Your application specification (XML format)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step-by-Step Guide
|
||||||
|
|
||||||
|
#### Step 1: Create Your Specification File
|
||||||
|
|
||||||
|
Create `prompts/app_spec.txt` using this XML structure:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<project_specification>
|
||||||
|
<project_name>Your Application Name</project_name>
|
||||||
|
|
||||||
|
<overview>
|
||||||
|
Complete description of your application. Explain what you want to build,
|
||||||
|
main objectives, and key features.
|
||||||
|
</overview>
|
||||||
|
|
||||||
|
<technology_stack>
|
||||||
|
<frontend>
|
||||||
|
<framework>React with Vite</framework>
|
||||||
|
<styling>Tailwind CSS</styling>
|
||||||
|
<state_management>React hooks</state_management>
|
||||||
|
</frontend>
|
||||||
|
<backend>
|
||||||
|
<runtime>Node.js with Express</runtime>
|
||||||
|
<database>SQLite</database>
|
||||||
|
</backend>
|
||||||
|
</technology_stack>
|
||||||
|
|
||||||
|
<prerequisites>
|
||||||
|
<environment_setup>
|
||||||
|
- List of prerequisites (dependencies, API keys, etc.)
|
||||||
|
</environment_setup>
|
||||||
|
</prerequisites>
|
||||||
|
|
||||||
|
<core_features>
|
||||||
|
<feature_1>
|
||||||
|
<title>Feature 1 Title</title>
|
||||||
|
<description>Detailed description</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>frontend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Test step 1
|
||||||
|
2. Test step 2
|
||||||
|
</test_steps>
|
||||||
|
</feature_1>
|
||||||
|
|
||||||
|
<feature_2>
|
||||||
|
<!-- More features -->
|
||||||
|
</feature_2>
|
||||||
|
</core_features>
|
||||||
|
</project_specification>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 2: Define Your Features
|
||||||
|
|
||||||
|
Each feature should have:
|
||||||
|
|
||||||
|
- **Title**: Clear, descriptive title
|
||||||
|
- **Description**: Complete explanation of what it does
|
||||||
|
- **Priority**: 1 (urgent) to 4 (optional)
|
||||||
|
- **Category**: `frontend`, `backend`, `database`, `auth`, `integration`, etc.
|
||||||
|
- **Test Steps**: Precise verification steps
|
||||||
|
|
||||||
|
Example feature:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<feature_1>
|
||||||
|
<title>User Authentication - Login Flow</title>
|
||||||
|
<description>
|
||||||
|
Implement authentication system with:
|
||||||
|
- Login form (email/password)
|
||||||
|
- Client and server-side validation
|
||||||
|
- JWT session management
|
||||||
|
- Password reset page
|
||||||
|
</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>auth</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Access login page
|
||||||
|
2. Enter invalid email → see error
|
||||||
|
3. Enter valid credentials → redirect to dashboard
|
||||||
|
4. Verify JWT token is stored
|
||||||
|
5. Test logout functionality
|
||||||
|
</test_steps>
|
||||||
|
</feature_1>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 3: Launch Initialization
|
||||||
|
|
||||||
|
Once your `app_spec.txt` is ready:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python autonomous_agent_demo.py --project-dir ./my_new_app
|
||||||
|
```
|
||||||
|
|
||||||
|
The initializer agent will:
|
||||||
|
1. Read your `app_spec.txt`
|
||||||
|
2. Create a Linear project
|
||||||
|
3. Create ~50 Linear issues based on your spec
|
||||||
|
4. Initialize project structure, `init.sh`, and git
|
||||||
|
|
||||||
|
#### Step 4: Monitor Development
|
||||||
|
|
||||||
|
Coding agents will then:
|
||||||
|
- Work on Linear issues one by one
|
||||||
|
- Implement features
|
||||||
|
- Test with Puppeteer browser automation
|
||||||
|
- Update issues with implementation comments
|
||||||
|
- Mark issues as complete
|
||||||
|
|
||||||
|
### Minimal Example
|
||||||
|
|
||||||
|
Here's a minimal Todo App example to get started:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<project_specification>
|
||||||
|
<project_name>Todo App - Task Manager</project_name>
|
||||||
|
|
||||||
|
<overview>
|
||||||
|
Simple web application for managing task lists.
|
||||||
|
Users can create, edit, complete, and delete tasks.
|
||||||
|
</overview>
|
||||||
|
|
||||||
|
<technology_stack>
|
||||||
|
<frontend>
|
||||||
|
<framework>React with Vite</framework>
|
||||||
|
<styling>Tailwind CSS</styling>
|
||||||
|
</frontend>
|
||||||
|
<backend>
|
||||||
|
<runtime>Node.js with Express</runtime>
|
||||||
|
<database>SQLite</database>
|
||||||
|
</backend>
|
||||||
|
</technology_stack>
|
||||||
|
|
||||||
|
<core_features>
|
||||||
|
<feature_1>
|
||||||
|
<title>Main Interface - Task List</title>
|
||||||
|
<description>Display a list of all tasks with their status</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>frontend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Open application
|
||||||
|
2. Verify task list displays
|
||||||
|
</test_steps>
|
||||||
|
</feature_1>
|
||||||
|
|
||||||
|
<feature_2>
|
||||||
|
<title>Create New Task</title>
|
||||||
|
<description>Form to add a new task to the list</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>frontend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Click "New Task"
|
||||||
|
2. Enter a title
|
||||||
|
3. Click "Add"
|
||||||
|
4. Verify task appears in list
|
||||||
|
</test_steps>
|
||||||
|
</feature_2>
|
||||||
|
</core_features>
|
||||||
|
</project_specification>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Best Practices
|
||||||
|
|
||||||
|
#### 1. Be Detailed but Structured
|
||||||
|
|
||||||
|
Each feature must have:
|
||||||
|
- Clear title
|
||||||
|
- Complete description of functionality
|
||||||
|
- Precise test steps
|
||||||
|
- Priority (1=urgent, 4=optional)
|
||||||
|
|
||||||
|
#### 2. Use Consistent XML Format
|
||||||
|
|
||||||
|
Follow the structure shown above for all features using `<feature_X>` tags.
|
||||||
|
|
||||||
|
#### 3. Organize by Categories
|
||||||
|
|
||||||
|
Group features by category:
|
||||||
|
- `auth`: Authentication
|
||||||
|
- `frontend`: User interface
|
||||||
|
- `backend`: API and server logic
|
||||||
|
- `database`: Models and migrations
|
||||||
|
- `integration`: External integrations
|
||||||
|
|
||||||
|
#### 4. Prioritize Features
|
||||||
|
|
||||||
|
- **Priority 1**: Critical features (auth, database)
|
||||||
|
- **Priority 2**: Important features (core functionality)
|
||||||
|
- **Priority 3**: Secondary features (UX improvements)
|
||||||
|
- **Priority 4**: Nice-to-have (polish, optimizations)
|
||||||
|
|
||||||
|
### Using the Claude Clone as Reference
|
||||||
|
|
||||||
|
The Claude Clone example in `prompts/app_spec.txt` is excellent reference material:
|
||||||
|
|
||||||
|
#### ✅ Elements to Copy/Adapt:
|
||||||
|
|
||||||
|
1. **XML Structure**: Overall structure with `<project_specification>`, `<overview>`, `<technology_stack>`, etc.
|
||||||
|
2. **Feature Format**: How to structure `<feature_X>` tags with all required fields
|
||||||
|
3. **Technical Details**: How to describe technology stack, prerequisites, API endpoints, database schema, UI specs
|
||||||
|
|
||||||
|
#### ❌ Elements NOT to Copy:
|
||||||
|
|
||||||
|
1. **Specific Content**: Details about "Claude API", "artifacts", "conversations" are app-specific
|
||||||
|
2. **Business Features**: Adapt features to your application's needs
|
||||||
|
|
||||||
|
### Checklist for New Application
|
||||||
|
|
||||||
|
- [ ] Create `prompts/app_spec.txt` with your specification
|
||||||
|
- [ ] Define `<project_name>` for your application
|
||||||
|
- [ ] Write complete `<overview>`
|
||||||
|
- [ ] Specify `<technology_stack>` (frontend + backend)
|
||||||
|
- [ ] List all `<prerequisites>`
|
||||||
|
- [ ] Define all `<core_features>` with `<feature_X>` tags
|
||||||
|
- [ ] Add `<test_steps>` for each feature
|
||||||
|
- [ ] Launch: `python autonomous_agent_demo.py --project-dir ./my_app`
|
||||||
|
- [ ] Verify in Linear that issues are created correctly
|
||||||
|
|
||||||
## Customization
|
## Customization
|
||||||
|
|
||||||
### Creating a New Application from Scratch
|
|
||||||
|
|
||||||
To create a **completely new application** (not based on the Claude Clone example):
|
|
||||||
|
|
||||||
1. **Read the guide**: See [GUIDE_NEW_APP.md](GUIDE_NEW_APP.md) for detailed instructions
|
|
||||||
2. **Use the template**: Copy `prompts/app_spec_template.txt` as a starting point
|
|
||||||
3. **Reference the example**: Use `prompts/app_spec.txt` (Claude Clone) as a reference for structure and detail level
|
|
||||||
4. **Create your spec**: Write your `prompts/app_spec.txt` with your application specification
|
|
||||||
5. **Launch**: Run `python autonomous_agent_demo.py --project-dir ./my_new_app`
|
|
||||||
|
|
||||||
**Key points:**
|
|
||||||
- Keep the framework files unchanged (they're generic and reusable)
|
|
||||||
- Only create/modify `prompts/app_spec.txt` for your new application
|
|
||||||
- Use the XML structure from the Claude Clone example as a template
|
|
||||||
- Define features with `<feature_X>` tags - each will become a Linear issue
|
|
||||||
|
|
||||||
### Changing the Application
|
|
||||||
|
|
||||||
Edit `prompts/app_spec.txt` to specify a different application to build.
|
|
||||||
|
|
||||||
### Adding New Features to Existing Projects
|
### Adding New Features to Existing Projects
|
||||||
|
|
||||||
1. Create a new specification file in `prompts/` directory (e.g., `app_spec_new_feature.txt`)
|
1. Create a new specification file in `prompts/` directory (e.g., `app_spec_new_feature.txt`)
|
||||||
2. Format it with `<feature>` tags following the same structure as `app_spec.txt`
|
2. Format it with `<feature>` tags following the same structure as `app_spec.txt`
|
||||||
3. Run with `--new-spec` flag:
|
3. Run with `--new-spec` flag:
|
||||||
```bash
|
```bash
|
||||||
python autonomous_agent_demo.py --project-dir ./my_project --new-spec app_spec_new_feature.txt
|
python autonomous_agent_demo.py --project-dir ./ikario_body --new-spec app_spec_new_feature.txt
|
||||||
```
|
```
|
||||||
4. The Initializer Bis agent will create new Linear issues for each feature in the spec file
|
4. The Initializer Bis agent will create new Linear issues for each feature in the spec file
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,15 @@ Example Usage:
|
|||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Fix Windows encoding issues with emojis and Unicode characters
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
import io
|
||||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
||||||
|
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from agent import run_autonomous_agent
|
from agent import run_autonomous_agent
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
services:
|
services:
|
||||||
my_project_frontend:
|
ikario_body_frontend:
|
||||||
image: node:20
|
image: node:20
|
||||||
working_dir: /app
|
working_dir: /app
|
||||||
volumes:
|
volumes:
|
||||||
- ./generations/my_project:/app
|
- ./generations/ikario_body:/app
|
||||||
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
||||||
- /app/node_modules
|
- /app/node_modules
|
||||||
command: ["sh", "-c", "npm install && npm run dev -- --host 0.0.0.0 --port 3000"]
|
command: ["sh", "-c", "npm install && npm run dev -- --host 0.0.0.0 --port 3000"]
|
||||||
@@ -11,12 +11,13 @@ services:
|
|||||||
- "4300:3000"
|
- "4300:3000"
|
||||||
environment:
|
environment:
|
||||||
- NODE_ENV=development
|
- NODE_ENV=development
|
||||||
|
- DOCKER_ENV=true
|
||||||
|
|
||||||
my_project_server:
|
ikario_body_server:
|
||||||
image: node:20
|
image: node:20
|
||||||
working_dir: /app/server
|
working_dir: /app/server
|
||||||
volumes:
|
volumes:
|
||||||
- ./generations/my_project:/app
|
- ./generations/ikario_body:/app
|
||||||
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
||||||
- /app/server/node_modules
|
- /app/server/node_modules
|
||||||
command: ["sh", "-c", "npm install && npm start"]
|
command: ["sh", "-c", "npm install && npm start"]
|
||||||
@@ -25,5 +26,5 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
- NODE_ENV=development
|
- NODE_ENV=development
|
||||||
depends_on:
|
depends_on:
|
||||||
- my_project_frontend
|
- ikario_body_frontend
|
||||||
|
|
||||||
@@ -1,24 +1,24 @@
|
|||||||
"""
|
"""
|
||||||
Dockerization helper for my_project
|
Dockerization helper for ikario_body
|
||||||
===================================
|
===================================
|
||||||
|
|
||||||
Ce script crée les fichiers Docker nécessaires pour exécuter l'application
|
Ce script crée les fichiers Docker nécessaires pour exécuter l'application
|
||||||
`generations/my_project` (frontend + serveur + base SQLite) dans Docker,
|
`generations/ikario_body` (frontend + serveur + base SQLite) dans Docker,
|
||||||
SANS modifier aucun fichier existant.
|
SANS modifier aucun fichier existant.
|
||||||
|
|
||||||
Il génère un fichier de composition :
|
Il génère un fichier de composition :
|
||||||
- docker-compose.my_project.yml (à la racine du repo)
|
- docker-compose.ikario_body.yml (à la racine du repo)
|
||||||
|
|
||||||
Ce fichier utilise l'image officielle Node et monte le code existant
|
Ce fichier utilise l'image officielle Node et monte le code existant
|
||||||
ainsi que la base SQLite dans les conteneurs (mode développement).
|
ainsi que la base SQLite dans les conteneurs (mode développement).
|
||||||
|
|
||||||
Utilisation :
|
Utilisation :
|
||||||
1) Depuis la racine du repo :
|
1) Depuis la racine du repo :
|
||||||
python dockerize_my_project.py
|
python dockerize_ikario_body.py
|
||||||
2) Puis pour lancer l'appli dans Docker :
|
2) Puis pour lancer l'appli dans Docker :
|
||||||
docker compose -f docker-compose.my_project.yml up
|
docker compose -f docker-compose.ikario_body.yml up
|
||||||
ou, selon votre installation :
|
ou, selon votre installation :
|
||||||
docker-compose -f docker-compose.my_project.yml up
|
docker-compose -f docker-compose.ikario_body.yml up
|
||||||
|
|
||||||
- Frontend accessible sur: http://localhost:3000
|
- Frontend accessible sur: http://localhost:3000
|
||||||
- API backend (server) sur : http://localhost:3001
|
- API backend (server) sur : http://localhost:3001
|
||||||
@@ -28,13 +28,13 @@ from pathlib import Path
|
|||||||
|
|
||||||
|
|
||||||
def generate_docker_compose(root: Path) -> None:
|
def generate_docker_compose(root: Path) -> None:
|
||||||
"""Génère le fichier docker-compose.my_project.yml sans toucher au code existant."""
|
"""Génère le fichier docker-compose.ikario_body.yml sans toucher au code existant."""
|
||||||
project_dir = root / "generations" / "my_project"
|
project_dir = root / "generations" / "ikario_body"
|
||||||
|
|
||||||
if not project_dir.exists():
|
if not project_dir.exists():
|
||||||
raise SystemExit(f"Project directory not found: {project_dir}")
|
raise SystemExit(f"Project directory not found: {project_dir}")
|
||||||
|
|
||||||
compose_path = root / "docker-compose.my_project.yml"
|
compose_path = root / "docker-compose.ikario_body.yml"
|
||||||
|
|
||||||
# On utilise les scripts npm déjà définis :
|
# On utilise les scripts npm déjà définis :
|
||||||
# - frontend: npm run dev (Vite) en écoutant sur 0.0.0.0:3000 (dans le conteneur)
|
# - frontend: npm run dev (Vite) en écoutant sur 0.0.0.0:3000 (dans le conteneur)
|
||||||
@@ -45,14 +45,14 @@ def generate_docker_compose(root: Path) -> None:
|
|||||||
# - frontend : host 4300 -> container 3000
|
# - frontend : host 4300 -> container 3000
|
||||||
# - backend : host 4301 -> container 3001
|
# - backend : host 4301 -> container 3001
|
||||||
#
|
#
|
||||||
# Le volume ./generations/my_project est monté dans /app,
|
# Le volume ./generations/ikario_body est monté dans /app,
|
||||||
# ce qui inclut aussi la base SQLite dans server/data/claude-clone.db.
|
# ce qui inclut aussi la base SQLite dans server/data/claude-clone.db.
|
||||||
compose_content = f"""services:
|
compose_content = f"""services:
|
||||||
my_project_frontend:
|
ikario_body_frontend:
|
||||||
image: node:20
|
image: node:20
|
||||||
working_dir: /app
|
working_dir: /app
|
||||||
volumes:
|
volumes:
|
||||||
- ./generations/my_project:/app
|
- ./generations/ikario_body:/app
|
||||||
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
||||||
- /app/node_modules
|
- /app/node_modules
|
||||||
command: ["sh", "-c", "npm install && npm run dev -- --host 0.0.0.0 --port 3000"]
|
command: ["sh", "-c", "npm install && npm run dev -- --host 0.0.0.0 --port 3000"]
|
||||||
@@ -61,11 +61,11 @@ def generate_docker_compose(root: Path) -> None:
|
|||||||
environment:
|
environment:
|
||||||
- NODE_ENV=development
|
- NODE_ENV=development
|
||||||
|
|
||||||
my_project_server:
|
ikario_body_server:
|
||||||
image: node:20
|
image: node:20
|
||||||
working_dir: /app/server
|
working_dir: /app/server
|
||||||
volumes:
|
volumes:
|
||||||
- ./generations/my_project:/app
|
- ./generations/ikario_body:/app
|
||||||
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
# Eviter de réutiliser les node_modules Windows dans le conteneur Linux
|
||||||
- /app/server/node_modules
|
- /app/server/node_modules
|
||||||
command: ["sh", "-c", "npm install && npm start"]
|
command: ["sh", "-c", "npm install && npm start"]
|
||||||
@@ -74,7 +74,7 @@ def generate_docker_compose(root: Path) -> None:
|
|||||||
environment:
|
environment:
|
||||||
- NODE_ENV=development
|
- NODE_ENV=development
|
||||||
depends_on:
|
depends_on:
|
||||||
- my_project_frontend
|
- ikario_body_frontend
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
26021
ikario_memories_export.md
Normal file
26021
ikario_memories_export.md
Normal file
File diff suppressed because it is too large
Load Diff
2510
navette.txt
Normal file
2510
navette.txt
Normal file
File diff suppressed because it is too large
Load Diff
5
package.json
Normal file
5
package.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"dependencies": {
|
||||||
|
"puppeteer": "^24.33.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
151
patch_stats.py
Normal file
151
patch_stats.py
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Patch getMemoryStats to count thoughts and conversations separately
|
||||||
|
"""
|
||||||
|
|
||||||
|
file_path = "C:/GitHub/Linear_coding/generations/ikario_body/server/services/memoryService.js"
|
||||||
|
|
||||||
|
# Lire le fichier
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
# Trouver la ligne qui contient "export async function getMemoryStats"
|
||||||
|
start_line = None
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if 'export async function getMemoryStats()' in line:
|
||||||
|
start_line = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if start_line is None:
|
||||||
|
print("ERROR: Could not find getMemoryStats function")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Trouver la fin de la fonction (ligne qui contient uniquement '}')
|
||||||
|
end_line = None
|
||||||
|
brace_count = 0
|
||||||
|
for i in range(start_line, len(lines)):
|
||||||
|
if '{' in lines[i]:
|
||||||
|
brace_count += lines[i].count('{')
|
||||||
|
if '}' in lines[i]:
|
||||||
|
brace_count -= lines[i].count('}')
|
||||||
|
if brace_count == 0 and i > start_line:
|
||||||
|
end_line = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if end_line is None:
|
||||||
|
print("ERROR: Could not find end of getMemoryStats function")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
print(f"Found getMemoryStats from line {start_line+1} to {end_line+1}")
|
||||||
|
|
||||||
|
# Nouvelle fonction
|
||||||
|
new_function = '''export async function getMemoryStats() {
|
||||||
|
const status = getMCPStatus();
|
||||||
|
|
||||||
|
if (!isMCPConnected()) {
|
||||||
|
return {
|
||||||
|
connected: false,
|
||||||
|
enabled: status.enabled,
|
||||||
|
configured: status.configured,
|
||||||
|
total_memories: 0,
|
||||||
|
thoughts_count: 0,
|
||||||
|
conversations_count: 0,
|
||||||
|
last_save: null,
|
||||||
|
error: status.error,
|
||||||
|
serverPath: status.serverPath,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Count thoughts using search_thoughts with broad query
|
||||||
|
let thoughtsCount = 0;
|
||||||
|
try {
|
||||||
|
const thoughtsResult = await callMCPTool('search_thoughts', {
|
||||||
|
query: 'a', // Simple query that will match most thoughts
|
||||||
|
n_results: 100
|
||||||
|
});
|
||||||
|
|
||||||
|
// Parse the text response to count thoughts
|
||||||
|
const thoughtsText = thoughtsResult.content?.[0]?.text || '';
|
||||||
|
const thoughtMatches = thoughtsText.match(/\\[Pertinence:/g);
|
||||||
|
thoughtsCount = thoughtMatches ? thoughtMatches.length : 0;
|
||||||
|
} catch (err) {
|
||||||
|
console.log('[getMemoryStats] Could not count thoughts:', err.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count conversations using search_conversations with search_level="full"
|
||||||
|
let conversationsCount = 0;
|
||||||
|
try {
|
||||||
|
const convsResult = await callMCPTool('search_conversations', {
|
||||||
|
query: 'a', // Simple query that will match most conversations
|
||||||
|
n_results: 100,
|
||||||
|
search_level: 'full'
|
||||||
|
});
|
||||||
|
|
||||||
|
// Parse the text response to count conversations
|
||||||
|
const convsText = convsResult.content?.[0]?.text || '';
|
||||||
|
const convMatches = convsText.match(/\\[Pertinence:/g);
|
||||||
|
conversationsCount = convMatches ? convMatches.length : 0;
|
||||||
|
} catch (err) {
|
||||||
|
console.log('[getMemoryStats] Could not count conversations:', err.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalMemories = thoughtsCount + conversationsCount;
|
||||||
|
|
||||||
|
return {
|
||||||
|
connected: true,
|
||||||
|
enabled: status.enabled,
|
||||||
|
configured: status.configured,
|
||||||
|
total_memories: totalMemories,
|
||||||
|
thoughts_count: thoughtsCount,
|
||||||
|
conversations_count: conversationsCount,
|
||||||
|
last_save: new Date().toISOString(), // Would need to track this separately
|
||||||
|
error: null,
|
||||||
|
serverPath: status.serverPath,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
connected: true,
|
||||||
|
enabled: status.enabled,
|
||||||
|
configured: status.configured,
|
||||||
|
total_memories: 0,
|
||||||
|
thoughts_count: 0,
|
||||||
|
conversations_count: 0,
|
||||||
|
last_save: null,
|
||||||
|
error: error.message,
|
||||||
|
serverPath: status.serverPath,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Conserver le commentaire JSDoc avant la fonction
|
||||||
|
comment_start = start_line - 1
|
||||||
|
while comment_start >= 0 and (lines[comment_start].strip().startswith('*') or lines[comment_start].strip().startswith('/**') or lines[comment_start].strip() == ''):
|
||||||
|
comment_start -= 1
|
||||||
|
comment_start += 1
|
||||||
|
|
||||||
|
# Construire le nouveau fichier
|
||||||
|
new_lines = lines[:comment_start]
|
||||||
|
|
||||||
|
# Ajouter le nouveau commentaire JSDoc
|
||||||
|
new_lines.append('/**\n')
|
||||||
|
new_lines.append(' * Get basic statistics about the memory store\n')
|
||||||
|
new_lines.append(' * Counts thoughts and conversations separately using dedicated search tools\n')
|
||||||
|
new_lines.append(' *\n')
|
||||||
|
new_lines.append(' * @returns {Promise<Object>} Statistics about the memory store\n')
|
||||||
|
new_lines.append(' */\n')
|
||||||
|
|
||||||
|
# Ajouter la nouvelle fonction
|
||||||
|
new_lines.append(new_function)
|
||||||
|
new_lines.append('\n')
|
||||||
|
|
||||||
|
# Ajouter le reste du fichier
|
||||||
|
new_lines.extend(lines[end_line+1:])
|
||||||
|
|
||||||
|
# Écrire le fichier
|
||||||
|
with open(file_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.writelines(new_lines)
|
||||||
|
|
||||||
|
print(f"✓ Successfully patched getMemoryStats (lines {comment_start+1} to {end_line+1})")
|
||||||
|
print(f"✓ File saved: {file_path}")
|
||||||
275
project_progress.md
Normal file
275
project_progress.md
Normal file
@@ -0,0 +1,275 @@
|
|||||||
|
# Linear Coding Project - Progress Tracking
|
||||||
|
|
||||||
|
**Last Updated**: 2025-12-18 16:45 CET
|
||||||
|
**Project**: Claude.ai Clone with Extended Thinking
|
||||||
|
**Linear Team**: TEAMPHI
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎯 Current Sprint: Extended Thinking Implementation (TEAMPHI-190-204)
|
||||||
|
|
||||||
|
### 📊 Overall Progress: 80% Complete
|
||||||
|
|
||||||
|
| Issue | Feature | Status | Notes |
|
||||||
|
|-------|---------|--------|-------|
|
||||||
|
| TEAMPHI-190 | Extended Thinking Spec | ✅ Done | Spec document created |
|
||||||
|
| TEAMPHI-191 | Database Schema | ✅ Done | Migrations applied |
|
||||||
|
| TEAMPHI-192 | Backend API | ✅ Done | Routes updated |
|
||||||
|
| TEAMPHI-193 | Frontend State | ✅ Done | State management complete |
|
||||||
|
| TEAMPHI-194 | ThinkingBlock Component | ✅ Done | Tested and validated |
|
||||||
|
| TEAMPHI-195 | ThinkingBlock Integration | ✅ Done | Fully functional |
|
||||||
|
| TEAMPHI-196 | Settings Panel | ✅ Done | Tested with Puppeteer |
|
||||||
|
| TEAMPHI-197 | Budget Slider | ✅ Done | Tested with Puppeteer |
|
||||||
|
| TEAMPHI-198 | Thinking Badge | ✅ Done | Tested with Puppeteer |
|
||||||
|
| TEAMPHI-199 | Streaming Handler | ✅ Done | Fixed data structure mapping |
|
||||||
|
| TEAMPHI-200 | Tool Use Preservation | 🔄 Pending | Not started |
|
||||||
|
| TEAMPHI-201 | Token Tracking | 🔄 Pending | Not started |
|
||||||
|
| TEAMPHI-202 | Usage Stats | 🔄 Pending | Not started |
|
||||||
|
| TEAMPHI-203 | Error Handling | 🔄 Pending | Not started |
|
||||||
|
| TEAMPHI-204 | Documentation | 🔄 Pending | Not started |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✅ CRITICAL BUG RESOLVED (2025-12-18)
|
||||||
|
|
||||||
|
### Bug: max_tokens vs thinking_budget_tokens Conflict
|
||||||
|
|
||||||
|
**Status**: ✅ **FIXED**
|
||||||
|
|
||||||
|
**Solution Implemented:**
|
||||||
|
```javascript
|
||||||
|
// App.jsx line 4747-4749
|
||||||
|
const [maxTokens, setMaxTokens] = useState(8192)
|
||||||
|
const [enableThinking, setEnableThinking] = useState(false)
|
||||||
|
const [thinkingBudgetTokens, setThinkingBudgetTokens] = useState(6144) // 6K tokens
|
||||||
|
|
||||||
|
// server/db/index.js line 243
|
||||||
|
db.exec(`ALTER TABLE conversations ADD COLUMN thinking_budget_tokens INTEGER DEFAULT 6144;`)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Result**: 8192 > 6144 ✅ **API Constraint Satisfied**
|
||||||
|
|
||||||
|
**Additional Fixes:**
|
||||||
|
- Frontend now correctly reads `data.thinking.content` and `data.thinking.signature` from SSE events
|
||||||
|
- Database updated: all existing conversations set to 4096, new conversations default to 6144
|
||||||
|
- Extended Thinking disabled by default (users must enable manually)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✅ Completed Features
|
||||||
|
|
||||||
|
### Backend Implementation
|
||||||
|
|
||||||
|
**Database Schema** (`server/db/index.js`):
|
||||||
|
- ✅ `conversations.enable_thinking` (INTEGER, default 0)
|
||||||
|
- ✅ `conversations.thinking_budget_tokens` (INTEGER, default 6144)
|
||||||
|
- ✅ `messages.thinking_content` (TEXT)
|
||||||
|
- ✅ `messages.thinking_signature` (TEXT)
|
||||||
|
|
||||||
|
**API Endpoints** (`server/routes/conversations.js`):
|
||||||
|
- ✅ PUT `/api/conversations/:id` accepts `enableThinking` and `thinkingBudgetTokens`
|
||||||
|
- ✅ Validation: budget range 1024-200000 tokens
|
||||||
|
|
||||||
|
**Message Streaming** (`server/routes/messages.js`):
|
||||||
|
- ✅ Read `enable_thinking` from conversations table (line 321)
|
||||||
|
- ✅ Build thinking parameters for Claude API (lines 365-374)
|
||||||
|
- ✅ Handle `thinking_delta` events during streaming (lines 416-423)
|
||||||
|
- ✅ Handle `signature_delta` events (lines 425-427)
|
||||||
|
- ✅ Save `thinking_content` and `thinking_signature` to DB (lines 509-511)
|
||||||
|
- ✅ Return thinking data in SSE `done` event (lines 559-563)
|
||||||
|
|
||||||
|
### Frontend Implementation
|
||||||
|
|
||||||
|
**ThinkingBlock Component** (`src/components/ThinkingBlock.jsx`):
|
||||||
|
- ✅ Collapsible UI with brain icon
|
||||||
|
- ✅ Header shows "Thinking..." during streaming
|
||||||
|
- ✅ Header shows "Claude's reasoning" after completion
|
||||||
|
- ✅ Token count estimate display
|
||||||
|
- ✅ Animated dots during streaming
|
||||||
|
- ✅ Expand/collapse functionality
|
||||||
|
- ✅ Monospace font for thinking content
|
||||||
|
- ✅ Blue color scheme (border-blue-200, bg-blue-50)
|
||||||
|
- ✅ Signature verification indicator
|
||||||
|
|
||||||
|
**Settings Panel** (`src/App.jsx` lines 4236-4316):
|
||||||
|
- ✅ Extended Thinking checkbox with brain icon
|
||||||
|
- ✅ Label and tooltip
|
||||||
|
- ✅ Conditional budget slider (visible when enabled)
|
||||||
|
- ✅ Budget range: 1K-32K tokens
|
||||||
|
- ✅ Visual indicator (shows "5K", "10K", etc.)
|
||||||
|
|
||||||
|
**State Management** (`src/App.jsx`):
|
||||||
|
- ✅ `enableThinking` state (line 4748)
|
||||||
|
- ✅ `thinkingBudgetTokens` state (line 4749)
|
||||||
|
- ✅ `streamingThinkingContent` state (line 4742)
|
||||||
|
- ✅ `handleEnableThinkingChange` with DB persistence (lines 5210-5235)
|
||||||
|
- ✅ `handleThinkingBudgetChange` with DB persistence (lines 5237-5251)
|
||||||
|
- ✅ Load settings from conversation on select (lines 4835-4841)
|
||||||
|
|
||||||
|
**UI Integration**:
|
||||||
|
- ✅ ThinkingBlock in Message component (line 3174)
|
||||||
|
- ✅ All props passed to ChatArea (line 5695)
|
||||||
|
- ✅ Thinking badge in sidebar (lines 2392-2399)
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
**Automated Tests Created**:
|
||||||
|
- ✅ `test_extended_thinking.js` - Settings panel tests (PASSED)
|
||||||
|
- ✅ `test_thinking_badge.js` - Badge visibility tests (PASSED)
|
||||||
|
- ✅ `test_thinking_badge_simple.js` - Simplified badge test (PASSED)
|
||||||
|
|
||||||
|
**Manual Testing (2025-12-18)**:
|
||||||
|
- ✅ Settings panel visible and functional
|
||||||
|
- ✅ Budget slider appears when Extended Thinking enabled
|
||||||
|
- ✅ Badge appears in sidebar for conversations with Extended Thinking
|
||||||
|
- ✅ ThinkingBlock displays correctly with blue UI
|
||||||
|
- ✅ Thinking content persists after streaming
|
||||||
|
- ✅ Expand/collapse functionality works
|
||||||
|
- ✅ Signature verification indicator shows
|
||||||
|
- ✅ Real API test successful with Whitehead philosophy question
|
||||||
|
|
||||||
|
**Test Configuration Used**:
|
||||||
|
- max_tokens: 8192
|
||||||
|
- thinking_budget_tokens: 6000 (user-tested, now default 6144)
|
||||||
|
- Extended Thinking: Manually enabled via checkbox
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🐛 Known Bugs and Issues
|
||||||
|
|
||||||
|
### 1. ✅ FIXED: max_tokens vs budget conflict
|
||||||
|
**Status**: ✅ Fixed on 2025-12-18
|
||||||
|
**Solution**: Set max_tokens=8192, thinking_budget_tokens=6144
|
||||||
|
**Location**: `src/App.jsx` lines 4747-4749, `server/db/index.js` line 243
|
||||||
|
|
||||||
|
### 2. ✅ FIXED: Frontend SSE data mapping
|
||||||
|
**Status**: ✅ Fixed on 2025-12-18
|
||||||
|
**Solution**: Changed from `data.thinking_signature` to `data.thinking.signature`
|
||||||
|
**Location**: `src/App.jsx` line 5566
|
||||||
|
|
||||||
|
### 3. ✅ FIXED: streamingThinkingContent not passed to ChatArea
|
||||||
|
**Status**: Fixed in commit 91ea3ec
|
||||||
|
**Issue**: ReferenceError caused interface crash
|
||||||
|
**Fix**: Added `streamingThinkingContent` to ChatArea props
|
||||||
|
|
||||||
|
### 4. ✅ FIXED: Vite proxy wrong port
|
||||||
|
**Status**: Fixed in commit 0a4072d
|
||||||
|
**Issue**: Frontend couldn't connect to backend
|
||||||
|
**Fix**: Changed proxy from localhost:3004 to localhost:3001
|
||||||
|
|
||||||
|
### 5. ✅ FIXED: Extended Thinking props not passed to ChatArea
|
||||||
|
**Status**: Fixed in commit d447e69
|
||||||
|
**Issue**: enableThinking undefined in ChatArea
|
||||||
|
**Fix**: Added props to ChatArea signature and render call
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📝 Commits History
|
||||||
|
|
||||||
|
| Commit | Message | Files Changed |
|
||||||
|
|--------|---------|---------------|
|
||||||
|
| 91ea3ec | Fix critical bug: pass streamingThinkingContent to ChatArea | src/App.jsx |
|
||||||
|
| 8864bdc | Add Thinking badge to conversation list | src/App.jsx |
|
||||||
|
| 0a4072d | Fix Vite proxy configuration | vite.config.js |
|
||||||
|
| d447e69 | Fix Extended Thinking props not passed to ChatArea | src/App.jsx |
|
||||||
|
| 1091f65 | Add Extended Thinking settings panel and budget slider | src/App.jsx |
|
||||||
|
| 530e54b | Integrate ThinkingBlock into message display | src/App.jsx, src/components/ThinkingBlock.jsx |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔄 Database State
|
||||||
|
|
||||||
|
**Extended Thinking Status** (as of 2025-12-18 16:45):
|
||||||
|
- ✅ 10+ conversations with `enable_thinking = 1`, `thinking_budget_tokens = 4096`
|
||||||
|
- ✅ New conversations default to `enable_thinking = 0`, `thinking_budget_tokens = 6144`
|
||||||
|
- ✅ Messages with thinking_content successfully saved (tested with Whitehead question)
|
||||||
|
- ✅ Thinking content persists and displays correctly on reload
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎯 Next Steps
|
||||||
|
|
||||||
|
### ✅ Completed Actions (2025-12-18)
|
||||||
|
|
||||||
|
1. ✅ **FIXED CRITICAL BUG** - max_tokens vs budget conflict resolved
|
||||||
|
2. ✅ **TESTED Extended Thinking End-to-End** - All tests passed
|
||||||
|
3. ✅ **VALIDATED and MARKED DONE** - TEAMPHI-194, 195, 199 completed
|
||||||
|
4. ✅ **Fixed UX Issues** - Extended Thinking disabled by default, optimal defaults set
|
||||||
|
|
||||||
|
### Remaining Work (20% of Sprint)
|
||||||
|
|
||||||
|
**Priority: Medium**
|
||||||
|
- TEAMPHI-200: Tool use preservation during Extended Thinking
|
||||||
|
- TEAMPHI-201: Token tracking for thinking vs output
|
||||||
|
- TEAMPHI-202: Usage stats display
|
||||||
|
- TEAMPHI-203: Error handling improvements
|
||||||
|
- TEAMPHI-204: User documentation
|
||||||
|
|
||||||
|
**Notes:**
|
||||||
|
- Core Extended Thinking feature is **fully functional**
|
||||||
|
- Remaining issues are enhancements and polish
|
||||||
|
- Can be completed incrementally without blocking usage
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📚 Key Files Reference
|
||||||
|
|
||||||
|
### Backend
|
||||||
|
- `server/routes/messages.js` - Main Extended Thinking logic (lines 320-574)
|
||||||
|
- `server/routes/conversations.js` - Settings update endpoints (lines 143-199)
|
||||||
|
- `server/db/index.js` - Database migrations (lines 234-258)
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
- `src/App.jsx` - Main application file
|
||||||
|
- State: lines 4742, 4748-4749
|
||||||
|
- Handlers: lines 5210-5251
|
||||||
|
- Settings UI: lines 4236-4316
|
||||||
|
- Message integration: line 3174
|
||||||
|
- ChatArea props: line 5695
|
||||||
|
- `src/components/ThinkingBlock.jsx` - ThinkingBlock component (complete file)
|
||||||
|
|
||||||
|
### Tests
|
||||||
|
- `test_extended_thinking.js` - Settings panel tests
|
||||||
|
- `test_thinking_badge.js` - Badge tests
|
||||||
|
- `test_thinkingblock_real.js` - Real API test (blocked)
|
||||||
|
|
||||||
|
### Utilities
|
||||||
|
- `activate_thinking.py` - Script to enable Extended Thinking in DB
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎓 Lessons Learned
|
||||||
|
|
||||||
|
### Protocol Violations Caught
|
||||||
|
1. **Not testing before moving on** - User reminded: "toujours tester chaque feature avant de passer à la suivante"
|
||||||
|
2. **Fixed by**: Creating tests for each feature before marking Done
|
||||||
|
|
||||||
|
### Technical Challenges
|
||||||
|
1. **Puppeteer interaction issues** - Browser rendering problems in headless mode
|
||||||
|
2. **API parameter conflicts** - max_tokens vs thinking_budget validation
|
||||||
|
3. **State propagation** - Props not passed through component hierarchy
|
||||||
|
4. **Database sync** - Frontend state vs DB state mismatch
|
||||||
|
|
||||||
|
### Best Practices Reinforced
|
||||||
|
1. Always test each feature before implementation
|
||||||
|
2. Add logging to debug state propagation issues
|
||||||
|
3. Verify API constraints before setting defaults
|
||||||
|
4. Use database scripts to validate state changes
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📞 Support Information
|
||||||
|
|
||||||
|
**Project Repository**: C:\GitHub\Linear_coding
|
||||||
|
**Application Type**: Claude.ai Clone (React + Node.js)
|
||||||
|
**Tech Stack**: React, Vite, Express, better-sqlite3, Anthropic SDK
|
||||||
|
**Servers**:
|
||||||
|
- Backend: http://localhost:3001 (or 3004 if port occupied)
|
||||||
|
- Frontend: http://localhost:5178 (Vite auto-selects available port)
|
||||||
|
|
||||||
|
**Database**: `generations/ikario_body/server/data/claude-clone.db`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔖 Tags
|
||||||
|
`#extended-thinking` `#claude-api` `#thinking-blocks` `#linear-integration` `#react` `#nodejs`
|
||||||
@@ -1,663 +0,0 @@
|
|||||||
<project_specification>
|
|
||||||
<project_name>Extension MCP Ikario Memory - Claude.ai Clone</project_name>
|
|
||||||
|
|
||||||
<overview>
|
|
||||||
Extension du projet "Claude.ai Clone" existant pour intégrer la mémoire conversationnelle via le protocole MCP (Model Context Protocol) avec le serveur Ikario RAG.
|
|
||||||
|
|
||||||
Cette extension ajoute au clone Claude.ai existant la capacité pour le LLM de :
|
|
||||||
- Sauvegarder automatiquement les conversations importantes dans une mémoire vectorielle (ChromaDB)
|
|
||||||
- Rechercher sémantiquement dans ses souvenirs passés pour enrichir les réponses
|
|
||||||
- Tracer l'évolution de concepts discutés au fil du temps
|
|
||||||
- Vérifier la cohérence de nouvelles affirmations avec l'historique des conversations
|
|
||||||
|
|
||||||
Le projet Claude.ai Clone dispose déjà de :
|
|
||||||
- Interface de chat complète avec streaming SSE
|
|
||||||
- Gestion de conversations et messages (base SQLite)
|
|
||||||
- Intégration Claude API avec Anthropic SDK
|
|
||||||
- Frontend React + Vite + Tailwind
|
|
||||||
- Backend Node.js + Express
|
|
||||||
|
|
||||||
Cette extension ajoute simplement l'intégration du client MCP Ikario RAG pour donner une mémoire sémantique au LLM.
|
|
||||||
</overview>
|
|
||||||
|
|
||||||
<technology_stack>
|
|
||||||
<existing_stack>
|
|
||||||
Le projet dispose déjà de :
|
|
||||||
- Frontend : React + Vite + Tailwind CSS (port 4300)
|
|
||||||
- Backend : Node.js + Express + SQLite (port 4301)
|
|
||||||
- API Claude : Anthropic SDK avec streaming SSE
|
|
||||||
- Gestion de conversations : base SQLite avec tables conversations et messages
|
|
||||||
</existing_stack>
|
|
||||||
|
|
||||||
<new_integration>
|
|
||||||
<mcp_client>
|
|
||||||
- Package : @modelcontextprotocol/sdk (client MCP Node.js)
|
|
||||||
- Serveur MCP : ikario_rag/server.py (Python)
|
|
||||||
- Communication : stdio (stdin/stdout avec JSON-RPC 2.0)
|
|
||||||
- Localisation : chemin configuré dans .env (MCP_IKARIO_SERVER_PATH)
|
|
||||||
</mcp_client>
|
|
||||||
|
|
||||||
<mcp_tools>
|
|
||||||
Les 4 outils MCP Ikario disponibles :
|
|
||||||
1. search_memories : Recherche sémantique dans les souvenirs
|
|
||||||
2. add_thought : Sauvegarde une pensée/conversation avec métadonnées
|
|
||||||
3. trace_concept_evolution : Trace l'évolution temporelle d'un concept
|
|
||||||
4. check_consistency : Vérifie la cohérence d'une affirmation avec l'historique
|
|
||||||
</mcp_tools>
|
|
||||||
|
|
||||||
<memory_database>
|
|
||||||
- Base vectorielle : ChromaDB (gérée par serveur MCP)
|
|
||||||
- Embeddings : SentenceTransformer all-MiniLM-L6-v2 (384 dimensions)
|
|
||||||
- Stockage : ./ikario_rag/index/ (persistance sur disque)
|
|
||||||
- Métadonnées : category, tags, emotions, concepts, date
|
|
||||||
</memory_database>
|
|
||||||
</new_integration>
|
|
||||||
</technology_stack>
|
|
||||||
|
|
||||||
<prerequisites>
|
|
||||||
<existing_project>
|
|
||||||
- Projet Claude.ai Clone déjà fonctionnel
|
|
||||||
- Base SQLite avec tables conversations et messages opérationnelles
|
|
||||||
- API endpoints Claude existants (/api/claude/chat, /api/conversations/*)
|
|
||||||
- Frontend React avec composants de chat déjà en place
|
|
||||||
</existing_project>
|
|
||||||
|
|
||||||
<mcp_server_setup>
|
|
||||||
- Serveur MCP Ikario RAG installé dans ./ikario_rag/
|
|
||||||
- Python 3.11+ avec dépendances : chromadb, sentence-transformers, mcp
|
|
||||||
- Serveur testé et fonctionnel (peut être lancé manuellement via python server.py)
|
|
||||||
- Base ChromaDB initialisée dans ./ikario_rag/index/
|
|
||||||
</mcp_server_setup>
|
|
||||||
|
|
||||||
<backend_dependencies>
|
|
||||||
- Installer @modelcontextprotocol/sdk dans le backend
|
|
||||||
- Ajouter variables d'environnement au .env :
|
|
||||||
* MCP_IKARIO_SERVER_PATH=path/to/ikario_rag/server.py
|
|
||||||
* MCP_MEMORY_ENABLED=true (pour activer/désactiver la fonctionnalité)
|
|
||||||
</backend_dependencies>
|
|
||||||
</prerequisites>
|
|
||||||
|
|
||||||
<core_features>
|
|
||||||
<feature_1>
|
|
||||||
<title>Module backend de connexion MCP Ikario</title>
|
|
||||||
<description>
|
|
||||||
Créer un module backend qui initialise et gère la connexion au serveur MCP Ikario RAG.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Module server/services/mcpClient.js qui encapsule le client MCP
|
|
||||||
- Initialisation au démarrage du serveur Express
|
|
||||||
- Lecture de la configuration depuis .env (MCP_IKARIO_SERVER_PATH)
|
|
||||||
- Gestion du cycle de vie de la connexion (connect, disconnect, reconnect)
|
|
||||||
- Pool de connexions ou singleton pour éviter les multiples connexions
|
|
||||||
- Gestion des erreurs et timeout
|
|
||||||
- Logging détaillé des appels MCP
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Import de @modelcontextprotocol/sdk
|
|
||||||
- StdioServerParameters avec command="python" et args=[server_path]
|
|
||||||
- stdio_client() pour créer la connexion
|
|
||||||
- ClientSession pour gérer les appels
|
|
||||||
- Export de fonctions : initMCP(), getMCPClient(), closeMCP()
|
|
||||||
- Initialiser dans server/index.js au démarrage
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- S'intègre dans server/index.js existant
|
|
||||||
- Nouveau fichier server/services/mcpClient.js
|
|
||||||
- Pas de modification de la base SQLite
|
|
||||||
</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>backend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Configurer MCP_IKARIO_SERVER_PATH dans .env
|
|
||||||
2. Démarrer le serveur backend
|
|
||||||
3. Vérifier les logs : "MCP Ikario client connected"
|
|
||||||
4. Vérifier qu'aucune erreur n'est levée
|
|
||||||
5. Arrêter le serveur et vérifier la déconnexion propre
|
|
||||||
6. Tester avec un mauvais chemin et vérifier la gestion d'erreur
|
|
||||||
</test_steps>
|
|
||||||
</feature_1>
|
|
||||||
|
|
||||||
<feature_2>
|
|
||||||
<title>Service wrapper pour les 4 outils MCP</title>
|
|
||||||
<description>
|
|
||||||
Créer un service backend qui expose les 4 outils MCP Ikario sous forme de fonctions JavaScript utilisables dans l'application.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Module server/services/memoryService.js
|
|
||||||
- 4 fonctions async qui wrappent les appels MCP :
|
|
||||||
* searchMemories(query, n_results, filter_category)
|
|
||||||
* addThought(content, context)
|
|
||||||
* traceConceptEvolution(concept, limit)
|
|
||||||
* checkConsistency(statement)
|
|
||||||
- Gestion des erreurs spécifiques à chaque outil
|
|
||||||
- Parsing et formatage des réponses MCP
|
|
||||||
- Validation des paramètres avant appel
|
|
||||||
- Logging des appels et résultats
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Import de mcpClient.getMCPClient()
|
|
||||||
- Appels via client.call_tool(tool_name, arguments)
|
|
||||||
- Parsing des réponses (format TextContent)
|
|
||||||
- Conversion en objets JavaScript utilisables
|
|
||||||
- Export des 4 fonctions
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Nouveau fichier server/services/memoryService.js
|
|
||||||
- Utilisé par les routes API
|
|
||||||
- Pas de modification de l'existant
|
|
||||||
</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>backend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Créer un script de test qui importe memoryService
|
|
||||||
2. Tester searchMemories("test", 5) et vérifier le retour
|
|
||||||
3. Tester addThought() avec un contenu simple
|
|
||||||
4. Vérifier que les erreurs sont bien catchées
|
|
||||||
5. Vérifier les logs des appels MCP
|
|
||||||
6. Tester avec MCP désactivé et vérifier le fallback
|
|
||||||
</test_steps>
|
|
||||||
</feature_2>
|
|
||||||
|
|
||||||
<feature_3>
|
|
||||||
<title>Routes API pour les outils MCP</title>
|
|
||||||
<description>
|
|
||||||
Créer les routes Express qui exposent les 4 outils MCP via l'API REST existante.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Nouveau routeur server/routes/memory.js
|
|
||||||
- 4 routes POST :
|
|
||||||
* /api/memory/search (search_memories)
|
|
||||||
* /api/memory/add (add_thought)
|
|
||||||
* /api/memory/evolution (trace_concept_evolution)
|
|
||||||
* /api/memory/consistency (check_consistency)
|
|
||||||
- Validation des paramètres req.body
|
|
||||||
- Appel des fonctions de memoryService
|
|
||||||
- Réponses JSON standardisées
|
|
||||||
- Gestion d'erreurs avec codes HTTP appropriés (400, 500)
|
|
||||||
- Middleware optionnel d'authentification
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- express.Router() dans server/routes/memory.js
|
|
||||||
- Import de memoryService
|
|
||||||
- try/catch pour chaque route
|
|
||||||
- Validation avec express-validator ou manuelle
|
|
||||||
- Monter le routeur dans server/index.js : app.use('/api/memory', memoryRoutes)
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Nouveau fichier server/routes/memory.js
|
|
||||||
- Import dans server/index.js existant
|
|
||||||
- S'ajoute aux routes existantes (/api/conversations, /api/claude/*)
|
|
||||||
</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>backend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Tester POST /api/memory/search avec Postman ou curl
|
|
||||||
2. Vérifier la réponse JSON avec résultats de recherche
|
|
||||||
3. Tester POST /api/memory/add avec un contenu simple
|
|
||||||
4. Vérifier que la pensée est ajoutée dans ChromaDB
|
|
||||||
5. Tester les erreurs (paramètres manquants)
|
|
||||||
6. Vérifier les logs et codes HTTP
|
|
||||||
</test_steps>
|
|
||||||
</feature_3>
|
|
||||||
|
|
||||||
<feature_4>
|
|
||||||
<title>Bouton "Sauvegarder dans la mémoire" dans le chat</title>
|
|
||||||
<description>
|
|
||||||
Ajouter un bouton discret dans l'interface de chat pour sauvegarder manuellement une conversation dans la mémoire.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Bouton icône "cerveau" ou "étoile" près de chaque message assistant
|
|
||||||
- Au clic : ouvre un petit modal/popover
|
|
||||||
- Formulaire rapide : Catégorie (select) + Tags (input, optionnel)
|
|
||||||
- Bouton "Sauvegarder"
|
|
||||||
- Sauvegarde la conversation complète (tous les messages de la conversation active)
|
|
||||||
- Notification toast de confirmation
|
|
||||||
- Bouton devient "Déjà sauvegardé" après sauvegarde (state local)
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Ajout de bouton dans le composant Message existant
|
|
||||||
- State local pour modal (useState)
|
|
||||||
- Appel API POST /api/memory/add au submit
|
|
||||||
- Utilisation de conversation_id pour récupérer tous les messages
|
|
||||||
- Toast notification avec react-hot-toast ou équivalent
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Modifier le composant Message/ChatMessage existant
|
|
||||||
- Ajouter un icône Lucide React (Brain, Star, ou Bookmark)
|
|
||||||
- Modal/Popover avec Headless UI ou simple div conditionnelle
|
|
||||||
</description>
|
|
||||||
<priority>2</priority>
|
|
||||||
<category>frontend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Avoir une conversation dans le chat
|
|
||||||
2. Voir les boutons "Sauvegarder" apparaître
|
|
||||||
3. Cliquer sur un bouton et voir le modal
|
|
||||||
4. Sélectionner catégorie "thematique" et ajouter des tags
|
|
||||||
5. Cliquer "Sauvegarder" et voir la notification
|
|
||||||
6. Vérifier que le bouton devient "Déjà sauvegardé"
|
|
||||||
7. Rechercher la conversation sauvegardée via l'API
|
|
||||||
</test_steps>
|
|
||||||
</feature_4>
|
|
||||||
|
|
||||||
<feature_5>
|
|
||||||
<title>Panel de recherche de souvenirs dans la sidebar</title>
|
|
||||||
<description>
|
|
||||||
Ajouter une section de recherche de souvenirs dans la sidebar gauche existante.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Nouvel onglet/section "Mémoire" dans la sidebar existante (après Conversations)
|
|
||||||
- Champ de recherche avec placeholder "Rechercher dans mes souvenirs..."
|
|
||||||
- Liste des résultats affichés en dessous
|
|
||||||
- Pour chaque résultat : Score | Extrait (2 lignes) | Date | Tags
|
|
||||||
- Clic sur un résultat : affiche le détail complet dans un modal
|
|
||||||
- Filtre par catégorie (3 boutons radio : Toutes | Fondatrice | Thématique | Contextuelle)
|
|
||||||
- Maximum 10 résultats affichés
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Ajouter section dans Sidebar.jsx existant
|
|
||||||
- Nouvel état pour searchQuery et searchResults
|
|
||||||
- Debounce sur l'input (useDebounce hook)
|
|
||||||
- Appel API GET /api/memory/search?q={query}&category={filter}
|
|
||||||
- Affichage avec Tailwind, style similaire à la liste de conversations
|
|
||||||
- Modal pour détail (réutiliser un modal existant si possible)
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Modification de src/components/Sidebar.jsx existant
|
|
||||||
- Ajouter un toggle pour afficher/masquer la section Mémoire
|
|
||||||
- S'intègre visuellement avec le design existant
|
|
||||||
</description>
|
|
||||||
<priority>2</priority>
|
|
||||||
<category>frontend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Ouvrir l'application et voir la section "Mémoire" dans la sidebar
|
|
||||||
2. Taper une requête dans le champ de recherche
|
|
||||||
3. Voir les résultats apparaître avec scores et tags
|
|
||||||
4. Cliquer sur un résultat et voir le modal de détail
|
|
||||||
5. Tester les filtres par catégorie
|
|
||||||
6. Vérifier que la recherche est debounced (pas d'appel à chaque lettre)
|
|
||||||
7. Vérifier le style cohérent avec le design existant
|
|
||||||
</test_steps>
|
|
||||||
</feature_5>
|
|
||||||
|
|
||||||
<feature_6>
|
|
||||||
<title>Sauvegarde automatique des conversations importantes</title>
|
|
||||||
<description>
|
|
||||||
Implémenter la sauvegarde automatique des conversations marquées comme "importantes" ou après un certain nombre de messages.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Détection automatique : conversations de >10 messages OU marquées "épinglées"
|
|
||||||
- Déclenchement : en background après chaque nouveau message assistant
|
|
||||||
- Extraction automatique basique :
|
|
||||||
* Catégorie : "contextuelle" par défaut
|
|
||||||
* Tags : mots en majuscules, mots >8 caractères, mots répétés
|
|
||||||
* Date : timestamp du message
|
|
||||||
- Sauvegarde silencieuse (pas de modal, juste log backend)
|
|
||||||
- Flag dans table conversations : has_memory_backup BOOLEAN
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Hook/middleware dans l'endpoint POST /api/conversations/:id/messages
|
|
||||||
- Après insertion du message assistant : vérifier conditions
|
|
||||||
- Si conditions remplies : appel async memoryService.addThought()
|
|
||||||
- Extraction tags basique avec regex/split
|
|
||||||
- UPDATE conversations SET has_memory_backup = 1 WHERE id = ?
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Modification de server/routes/conversations.js (endpoint POST messages)
|
|
||||||
- Ajout colonne has_memory_backup dans table conversations (migration)
|
|
||||||
- Appel non-bloquant (Promise.then, pas de await)
|
|
||||||
</description>
|
|
||||||
<priority>3</priority>
|
|
||||||
<category>backend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Créer une nouvelle conversation
|
|
||||||
2. Envoyer >10 messages
|
|
||||||
3. Vérifier dans les logs qu'une sauvegarde automatique est déclenchée
|
|
||||||
4. Rechercher la conversation dans l'API /api/memory/search
|
|
||||||
5. Vérifier que has_memory_backup = 1 dans la DB
|
|
||||||
6. Tester avec une conversation épinglée (<10 messages)
|
|
||||||
7. Vérifier que les tags sont extraits correctement
|
|
||||||
</test_steps>
|
|
||||||
</feature_6>
|
|
||||||
|
|
||||||
<feature_7>
|
|
||||||
<title>Indicateur visuel de mémoire active</title>
|
|
||||||
<description>
|
|
||||||
Afficher un indicateur visuel dans l'interface pour montrer que la mémoire est active et utilisée.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Icône "cerveau" ou "mémoire" dans le header de l'application
|
|
||||||
- État : Vert (connecté) | Orange (déconnecté) | Gris (désactivé)
|
|
||||||
- Tooltip au survol : "Mémoire active - X souvenirs" ou "Mémoire déconnectée"
|
|
||||||
- Clic sur l'icône : ouvre un mini dashboard avec stats rapides
|
|
||||||
* Nombre total de souvenirs
|
|
||||||
* Dernière sauvegarde (timestamp)
|
|
||||||
* Connexion MCP : OK/KO
|
|
||||||
- Animation discrète lors d'une sauvegarde (pulse)
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Composant React MemoryIndicator dans Header.jsx
|
|
||||||
- Appel API GET /api/memory/stats toutes les 30 secondes (setInterval)
|
|
||||||
- État pour connectionStatus : 'connected' | 'disconnected' | 'disabled'
|
|
||||||
- Icône Brain de Lucide React avec couleurs conditionnelles
|
|
||||||
- Popover Headless UI pour le mini dashboard
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Ajout dans src/components/Header.jsx existant
|
|
||||||
- S'intègre à côté du model selector
|
|
||||||
- Style cohérent avec le design claude.ai
|
|
||||||
</description>
|
|
||||||
<priority>3</priority>
|
|
||||||
<category>frontend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Voir l'icône mémoire dans le header (vert si connecté)
|
|
||||||
2. Survoler l'icône et lire le tooltip
|
|
||||||
3. Cliquer sur l'icône et voir le mini dashboard
|
|
||||||
4. Vérifier les stats (nombre de souvenirs, dernière sauvegarde)
|
|
||||||
5. Arrêter le serveur MCP et voir l'icône devenir orange
|
|
||||||
6. Sauvegarder une conversation et voir l'animation pulse
|
|
||||||
7. Vérifier le polling des stats (console network toutes les 30s)
|
|
||||||
</test_steps>
|
|
||||||
</feature_7>
|
|
||||||
|
|
||||||
<feature_8>
|
|
||||||
<title>Configuration des Tools Claude API pour la mémoire</title>
|
|
||||||
<description>
|
|
||||||
Exposer les outils MCP comme Tools dans l'API Claude pour que le LLM puisse décider de façon autonome d'utiliser sa mémoire.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Définir 2 tools principaux pour l'API Claude :
|
|
||||||
1. save_memory : Sauvegarder une conversation ou pensée
|
|
||||||
2. search_memories : Rechercher dans les souvenirs passés
|
|
||||||
- Chaque tool avec description claire et JSON schema
|
|
||||||
- Tools toujours disponibles dans les appels Claude (sauf si désactivé)
|
|
||||||
- Gestion du cycle tool_use → tool_result
|
|
||||||
- Le LLM décide de façon autonome quand sauvegarder/rechercher
|
|
||||||
|
|
||||||
Définition des tools :
|
|
||||||
- save_memory :
|
|
||||||
* description: "Sauvegarde cette conversation ou une pensée importante dans ta mémoire à long terme"
|
|
||||||
* paramètres: content (requis), category (requis: fondatrice|thematique|contextuelle), tags (array), concepts (array), emotions (array optionnel)
|
|
||||||
- search_memories :
|
|
||||||
* description: "Recherche dans tes souvenirs de conversations passées"
|
|
||||||
* paramètres: query (requis), n_results (optionnel, default 5), filter_category (optionnel)
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Modification de server/routes/claude.js
|
|
||||||
- Définir const MEMORY_TOOLS = [{...}, {...}]
|
|
||||||
- Ajouter tools: MEMORY_TOOLS dans anthropic.messages.create()
|
|
||||||
- Gérer response.stop_reason === 'tool_use'
|
|
||||||
- Extraire tool calls, exécuter via memoryService, renvoyer tool_result
|
|
||||||
- Boucle jusqu'à response finale (stop_reason === 'end_turn')
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Modification de server/routes/claude.js existant
|
|
||||||
- Nouveau fichier server/config/memoryTools.js pour définitions tools
|
|
||||||
- Handler de tool execution dans le streaming SSE
|
|
||||||
</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>backend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Dire à Claude "Cette conversation est importante, sauvegarde-la"
|
|
||||||
2. Vérifier dans les logs que Claude appelle tool save_memory
|
|
||||||
3. Vérifier que le backend exécute add_thought via MCP
|
|
||||||
4. Vérifier que Claude confirme la sauvegarde
|
|
||||||
5. Demander "Que sais-tu sur X ?" et voir Claude appeler search_memories
|
|
||||||
6. Vérifier que les souvenirs sont retournés et utilisés dans la réponse
|
|
||||||
7. Tester avec MCP_MEMORY_ENABLED=false
|
|
||||||
</test_steps>
|
|
||||||
</feature_8>
|
|
||||||
|
|
||||||
<feature_9>
|
|
||||||
<title>Handler de Tool Execution pour les outils mémoire</title>
|
|
||||||
<description>
|
|
||||||
Implémenter le handler qui exécute les tool calls de Claude et retourne les résultats.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Fonction executeTool(tool_name, tool_input) qui route vers memoryService
|
|
||||||
- Support de save_memory → memoryService.addThought()
|
|
||||||
- Support de search_memories → memoryService.searchMemories()
|
|
||||||
- Formatage des résultats en tool_result compatible Claude API
|
|
||||||
- Gestion des erreurs (retourner error dans tool_result)
|
|
||||||
- Logging de chaque exécution de tool
|
|
||||||
- Timeout de 10 secondes par tool call
|
|
||||||
|
|
||||||
Workflow :
|
|
||||||
1. Claude retourne stop_reason='tool_use' + content avec tool_use block
|
|
||||||
2. Backend extrait tool_name et tool_input
|
|
||||||
3. executeTool() appelle la fonction MCP correspondante
|
|
||||||
4. Résultat formaté en tool_result
|
|
||||||
5. Nouvelle requête à Claude avec tool_result
|
|
||||||
6. Claude utilise le résultat pour sa réponse finale
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Fonction async executeTool(tool_name, tool_input)
|
|
||||||
- Switch sur tool_name pour router
|
|
||||||
- Appel des fonctions memoryService
|
|
||||||
- Formatage : { type: "tool_result", tool_use_id, content }
|
|
||||||
- Gestion try/catch avec error reporting
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Nouveau fichier server/services/toolExecutor.js
|
|
||||||
- Appelé depuis server/routes/claude.js dans la boucle de streaming
|
|
||||||
</description>
|
|
||||||
<priority>1</priority>
|
|
||||||
<category>backend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Tester save_memory avec tous les paramètres
|
|
||||||
2. Vérifier que le tool_result est retourné correctement
|
|
||||||
3. Tester search_memories avec query simple
|
|
||||||
4. Vérifier le formatage des résultats
|
|
||||||
5. Tester avec des paramètres invalides (gestion erreur)
|
|
||||||
6. Vérifier les logs d'exécution
|
|
||||||
7. Tester le timeout avec un MCP server lent
|
|
||||||
</test_steps>
|
|
||||||
</feature_9>
|
|
||||||
|
|
||||||
<feature_10>
|
|
||||||
<title>System Prompt enrichi pour guider l'utilisation de la mémoire</title>
|
|
||||||
<description>
|
|
||||||
Ajouter des instructions au system prompt pour que Claude utilise intelligemment sa mémoire.
|
|
||||||
|
|
||||||
Fonctionnalités :
|
|
||||||
- Instructions claires sur quand sauvegarder :
|
|
||||||
* Conversations fondatrices (premières interactions, moments clés)
|
|
||||||
* Discussions thématiques approfondies (philosophie, science, art)
|
|
||||||
* Sur demande explicite de l'utilisateur
|
|
||||||
- Instructions sur quand rechercher :
|
|
||||||
* Questions qui font référence au passé
|
|
||||||
* Sujets déjà discutés
|
|
||||||
* Pour enrichir les réponses avec du contexte
|
|
||||||
- Guidance sur les métadonnées :
|
|
||||||
* Catégories : quand utiliser fondatrice vs thematique vs contextuelle
|
|
||||||
* Tags : extraire les concepts clés
|
|
||||||
* Émotions : optionnelles mais enrichissantes
|
|
||||||
- Ton naturel : ne pas annoncer systématiquement les sauvegardes
|
|
||||||
|
|
||||||
System Prompt ajouté :
|
|
||||||
```
|
|
||||||
Tu as accès à une mémoire persistante via deux outils :
|
|
||||||
|
|
||||||
1. save_memory : Sauvegarde les conversations importantes
|
|
||||||
- Utilise category='fondatrice' pour les moments clés et premières interactions
|
|
||||||
- Utilise category='thematique' pour les discussions approfondies sur des concepts
|
|
||||||
- Utilise category='contextuelle' pour les échanges quotidiens intéressants
|
|
||||||
- Extrais des tags pertinents (concepts clés, mots importants)
|
|
||||||
|
|
||||||
2. search_memories : Recherche dans tes souvenirs
|
|
||||||
- Utilise-le quand l'utilisateur fait référence à des discussions passées
|
|
||||||
- Ou pour enrichir ta réponse avec du contexte antérieur
|
|
||||||
|
|
||||||
Utilise ces outils de façon naturelle et autonome. Tu n'es pas obligé d'annoncer
|
|
||||||
chaque sauvegarde, sauf si c'est pertinent dans la conversation.
|
|
||||||
```
|
|
||||||
|
|
||||||
Technique :
|
|
||||||
- Ajout au system prompt existant dans server/routes/claude.js
|
|
||||||
- Concaténation avec custom instructions si présentes
|
|
||||||
- Paramètre enable_memory_tools (default: true) pour activer/désactiver
|
|
||||||
|
|
||||||
Intégration :
|
|
||||||
- Modification du system prompt dans server/routes/claude.js
|
|
||||||
- S'ajoute aux instructions existantes
|
|
||||||
</description>
|
|
||||||
<priority>2</priority>
|
|
||||||
<category>backend</category>
|
|
||||||
<test_steps>
|
|
||||||
1. Avoir une conversation initiale et voir si Claude la sauvegarde
|
|
||||||
2. Vérifier qu'il utilise la bonne catégorie
|
|
||||||
3. Faire référence à une discussion passée et voir s'il recherche
|
|
||||||
4. Vérifier que les tags extraits sont pertinents
|
|
||||||
5. Tester que Claude ne sur-annonce pas les sauvegardes
|
|
||||||
6. Demander explicitement une sauvegarde et vérifier
|
|
||||||
7. Vérifier que les émotions sont ajoutées quand pertinent
|
|
||||||
</test_steps>
|
|
||||||
</feature_10>
|
|
||||||
</core_features>
|
|
||||||
|
|
||||||
<ui_design>
|
|
||||||
<integration_notes>
|
|
||||||
Cette extension s'intègre dans le design existant du clone Claude.ai.
|
|
||||||
Tous les nouveaux composants doivent respecter le design system existant.
|
|
||||||
</integration_notes>
|
|
||||||
|
|
||||||
<new_components>
|
|
||||||
- MemoryIndicator : Icône Brain dans le header avec état coloré (vert/orange/gris)
|
|
||||||
- MemorySaveButton : Bouton discret dans chaque message assistant pour sauvegarde manuelle
|
|
||||||
- MemorySearchPanel : Section dans la sidebar pour rechercher dans les souvenirs
|
|
||||||
- MemoryDetailModal : Modal pour afficher le détail complet d'un souvenir
|
|
||||||
- SaveMemoryModal : Petit modal/popover pour choisir catégorie et tags avant sauvegarde
|
|
||||||
</new_components>
|
|
||||||
|
|
||||||
<color_scheme>
|
|
||||||
Utiliser les couleurs existantes du clone Claude.ai, avec ajouts pour la mémoire :
|
|
||||||
- Indicateur mémoire : Vert (#10B981) connecté | Orange (#F59E0B) déconnecté | Gris (#6B7280) désactivé
|
|
||||||
- Badges catégories :
|
|
||||||
* Fondatrice: Or/Jaune (#F59E0B)
|
|
||||||
* Thématique: Bleu (#3B82F6)
|
|
||||||
* Contextuelle: Gris (#6B7280)
|
|
||||||
- Scores de pertinence : Gradient vert (#10B981) à rouge (#EF4444)
|
|
||||||
</color_scheme>
|
|
||||||
|
|
||||||
<design_consistency>
|
|
||||||
- Respecter la palette claude.ai existante (orange/amber #CC785C comme accent)
|
|
||||||
- Utiliser les mêmes composants Tailwind que l'existant
|
|
||||||
- Polices : même font stack que le projet (Inter/SF Pro)
|
|
||||||
- Boutons : même style que les boutons existants
|
|
||||||
- Modals : réutiliser les modals Headless UI existants si possible
|
|
||||||
</design_consistency>
|
|
||||||
</ui_design>
|
|
||||||
|
|
||||||
<api_endpoints>
|
|
||||||
<note>
|
|
||||||
Ces endpoints s'ajoutent aux endpoints existants du clone Claude.ai.
|
|
||||||
Tous les endpoints mémoire sont sous le préfixe /api/memory.
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<endpoint>
|
|
||||||
<method>POST</method>
|
|
||||||
<path>/api/memory/search</path>
|
|
||||||
<description>Rechercher sémantiquement dans les souvenirs (wrapper de search_memories MCP)</description>
|
|
||||||
<request_body>{"query": "string", "n_results": number (opt, default 5), "filter_category": "string (opt)"}</request_body>
|
|
||||||
<response>{"results": [{content, metadata, relevance_score, distance}]}</response>
|
|
||||||
</endpoint>
|
|
||||||
|
|
||||||
<endpoint>
|
|
||||||
<method>POST</method>
|
|
||||||
<path>/api/memory/add</path>
|
|
||||||
<description>Ajouter une pensée/conversation manuellement (wrapper de add_thought MCP)</description>
|
|
||||||
<request_body>{"content": "string", "context": {category, tags, emotions, concepts}}</request_body>
|
|
||||||
<response>{"id": "string", "message": "string"}</response>
|
|
||||||
</endpoint>
|
|
||||||
|
|
||||||
<endpoint>
|
|
||||||
<method>POST</method>
|
|
||||||
<path>/api/memory/evolution</path>
|
|
||||||
<description>Tracer l'évolution d'un concept (wrapper de trace_concept_evolution MCP)</description>
|
|
||||||
<request_body>{"concept": "string", "limit": number (opt, default 10)}</request_body>
|
|
||||||
<response>{"timeline": [{date, content, evolution}]}</response>
|
|
||||||
</endpoint>
|
|
||||||
|
|
||||||
<endpoint>
|
|
||||||
<method>POST</method>
|
|
||||||
<path>/api/memory/consistency</path>
|
|
||||||
<description>Vérifier cohérence d'une affirmation (wrapper de check_consistency MCP)</description>
|
|
||||||
<request_body>{"statement": "string"}</request_body>
|
|
||||||
<response>{"consistency_score": number, "contradictions": [...]}}</response>
|
|
||||||
</endpoint>
|
|
||||||
|
|
||||||
<endpoint>
|
|
||||||
<method>GET</method>
|
|
||||||
<path>/api/memory/stats</path>
|
|
||||||
<description>Obtenir statistiques basiques sur la mémoire</description>
|
|
||||||
<request_body>N/A</request_body>
|
|
||||||
<response>{"connected": boolean, "total_memories": number (approx), "last_save": timestamp}</response>
|
|
||||||
</endpoint>
|
|
||||||
|
|
||||||
<existing_endpoints_modified>
|
|
||||||
<endpoint>
|
|
||||||
<method>POST</method>
|
|
||||||
<path>/api/claude/chat</path>
|
|
||||||
<modification>Ajouter logique d'enrichissement automatique avec souvenirs avant appel Claude</modification>
|
|
||||||
</endpoint>
|
|
||||||
|
|
||||||
<endpoint>
|
|
||||||
<method>POST</method>
|
|
||||||
<path>/api/conversations/:id/messages</path>
|
|
||||||
<modification>Ajouter logique de sauvegarde automatique après insertion message assistant</modification>
|
|
||||||
</endpoint>
|
|
||||||
</existing_endpoints_modified>
|
|
||||||
</api_endpoints>
|
|
||||||
|
|
||||||
<database_schema>
|
|
||||||
<chromadb>
|
|
||||||
La base vectorielle ChromaDB est entièrement gérée par le serveur MCP Ikario.
|
|
||||||
Le backend Node.js n'y accède pas directement, seulement via les outils MCP.
|
|
||||||
Emplacement : ./ikario_rag/index/
|
|
||||||
</chromadb>
|
|
||||||
|
|
||||||
<sqlite_modifications>
|
|
||||||
Modifications mineures à la base SQLite existante du clone Claude.ai :
|
|
||||||
|
|
||||||
<table>
|
|
||||||
<name>conversations (table existante - ajouter colonne)</name>
|
|
||||||
<new_column>has_memory_backup INTEGER DEFAULT 0</new_column>
|
|
||||||
<description>Flag indiquant si la conversation a été sauvegardée dans la mémoire</description>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<table>
|
|
||||||
<name>conversation_settings (table existante - optionnel)</name>
|
|
||||||
<new_column>enable_memory INTEGER DEFAULT 1</new_column>
|
|
||||||
<description>Active/désactive l'enrichissement automatique avec souvenirs pour cette conversation</description>
|
|
||||||
</table>
|
|
||||||
</sqlite_modifications>
|
|
||||||
|
|
||||||
<no_new_tables>
|
|
||||||
Cette extension ne crée pas de nouvelles tables SQLite.
|
|
||||||
Toute la mémoire sémantique est dans ChromaDB (géré par MCP).
|
|
||||||
</no_new_tables>
|
|
||||||
</database_schema>
|
|
||||||
|
|
||||||
<deployment>
|
|
||||||
<development>
|
|
||||||
Le projet Claude.ai Clone est déjà configuré et déployé.
|
|
||||||
Cette extension ajoute simplement :
|
|
||||||
- Le serveur MCP Ikario RAG (Python) qui doit tourner en background ou être lancé par le backend
|
|
||||||
- Variables d'environnement pour la connexion MCP
|
|
||||||
</development>
|
|
||||||
|
|
||||||
<environment_variables>
|
|
||||||
Ajouter au .env existant :
|
|
||||||
- MCP_IKARIO_SERVER_PATH : Chemin absolu vers ikario_rag/server.py
|
|
||||||
- MCP_MEMORY_ENABLED : true | false (pour activer/désactiver la feature)
|
|
||||||
</environment_variables>
|
|
||||||
|
|
||||||
<dependencies>
|
|
||||||
Backend (package.json) :
|
|
||||||
- Ajouter : @modelcontextprotocol/sdk (client MCP Node.js)
|
|
||||||
|
|
||||||
MCP Server (déjà installé) :
|
|
||||||
- Python 3.11+
|
|
||||||
- chromadb, sentence-transformers, numpy, mcp
|
|
||||||
</dependencies>
|
|
||||||
</deployment>
|
|
||||||
</project_specification>
|
|
||||||
498
prompts/app_spec_tavily_mcp.txt
Normal file
498
prompts/app_spec_tavily_mcp.txt
Normal file
@@ -0,0 +1,498 @@
|
|||||||
|
<project_specification>
|
||||||
|
<project_name>ikario - Tavily MCP Integration for Internet Access</project_name>
|
||||||
|
|
||||||
|
<overview>
|
||||||
|
This specification adds Tavily search capabilities via MCP (Model Context Protocol) to give Ikario
|
||||||
|
internet access for real-time web searches. Tavily provides high-quality search results optimized
|
||||||
|
for AI agents, making it ideal for research, fact-checking, and accessing current information.
|
||||||
|
|
||||||
|
This integration adds a new MCP server connection to the existing architecture (alongside the
|
||||||
|
ikario-memory MCP server) and exposes Tavily search tools to Ikario during conversations.
|
||||||
|
|
||||||
|
All changes are additive and backward-compatible. Existing functionality remains unchanged.
|
||||||
|
</overview>
|
||||||
|
|
||||||
|
<architecture_design>
|
||||||
|
<mcp_integration>
|
||||||
|
Tavily MCP Server Connection:
|
||||||
|
- Uses @modelcontextprotocol/sdk Client to connect to Tavily MCP server
|
||||||
|
- Connection can be stdio-based (local MCP server) or HTTP-based (remote)
|
||||||
|
- Tavily MCP server provides search tools that are exposed to Claude via Tool Use API
|
||||||
|
- Backend routes handle tool execution and return results to Claude
|
||||||
|
</mcp_integration>
|
||||||
|
|
||||||
|
<benefits>
|
||||||
|
- Real-time internet access for Ikario
|
||||||
|
- High-quality search results optimized for LLMs
|
||||||
|
- Fact-checking and verification capabilities
|
||||||
|
- Access to current events and news
|
||||||
|
- Research assistance with cited sources
|
||||||
|
- Seamless integration with existing memory tools
|
||||||
|
</benefits>
|
||||||
|
</architecture_design>
|
||||||
|
|
||||||
|
<technology_stack>
|
||||||
|
<mcp_server>
|
||||||
|
<name>Tavily MCP Server</name>
|
||||||
|
<protocol>Model Context Protocol (MCP)</protocol>
|
||||||
|
<connection>stdio or HTTP transport</connection>
|
||||||
|
<sdk>@modelcontextprotocol/sdk</sdk>
|
||||||
|
<api_key>Tavily API key (from https://tavily.com)</api_key>
|
||||||
|
</mcp_server>
|
||||||
|
<backend>
|
||||||
|
<runtime>Node.js with Express (existing)</runtime>
|
||||||
|
<mcp_client>MCP Client for Tavily server connection</mcp_client>
|
||||||
|
<tool_executor>Existing toolExecutor service extended with Tavily tools</tool_executor>
|
||||||
|
</backend>
|
||||||
|
<api_endpoints>
|
||||||
|
<tavily_routes>GET/POST /api/tavily/* for Tavily-specific operations</tavily_routes>
|
||||||
|
<existing_routes>Existing /api/claude/chat routes support Tavily tools automatically</existing_routes>
|
||||||
|
</api_endpoints>
|
||||||
|
</technology_stack>
|
||||||
|
|
||||||
|
<prerequisites>
|
||||||
|
<environment_setup>
|
||||||
|
- Tavily API key obtained from https://tavily.com (free tier available)
|
||||||
|
- API key stored in environment variable TAVILY_API_KEY or configuration file
|
||||||
|
- MCP SDK already installed (@modelcontextprotocol/sdk exists for ikario-memory)
|
||||||
|
- Tavily MCP server installed (npm package or Python package)
|
||||||
|
</environment_setup>
|
||||||
|
<configuration>
|
||||||
|
- Add Tavily MCP server config to server/.claude_settings.json or similar
|
||||||
|
- Configure connection parameters (stdio vs HTTP)
|
||||||
|
- Set API key securely
|
||||||
|
</configuration>
|
||||||
|
</prerequisites>
|
||||||
|
|
||||||
|
<core_features>
|
||||||
|
<feature_1>
|
||||||
|
<title>Tavily MCP Client Setup</title>
|
||||||
|
<description>
|
||||||
|
Create MCP client connection to Tavily search server. This is similar to the existing
|
||||||
|
ikario-memory MCP client but connects to Tavily instead.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Create server/services/tavilyMcpClient.js
|
||||||
|
- Initialize MCP client with Tavily server connection
|
||||||
|
- Handle connection lifecycle (connect, disconnect, reconnect)
|
||||||
|
- Implement health checks and connection status
|
||||||
|
- Export client instance and helper functions
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
- Read Tavily API key from environment or config file
|
||||||
|
- Configure transport (stdio or HTTP)
|
||||||
|
- Set connection timeout and retry logic
|
||||||
|
- Log connection status for debugging
|
||||||
|
|
||||||
|
Error Handling:
|
||||||
|
- Graceful degradation if Tavily is unavailable
|
||||||
|
- Connection retry with exponential backoff
|
||||||
|
- Clear error messages for configuration issues
|
||||||
|
</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>backend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify MCP client can connect to Tavily server on startup
|
||||||
|
2. Test connection health check endpoint returns correct status
|
||||||
|
3. Verify graceful handling when Tavily API key is missing
|
||||||
|
4. Test reconnection logic when connection drops
|
||||||
|
5. Verify connection status is logged correctly
|
||||||
|
6. Test that server starts even if Tavily is unavailable
|
||||||
|
</test_steps>
|
||||||
|
</feature_1>
|
||||||
|
|
||||||
|
<feature_2>
|
||||||
|
<title>Tavily Tool Configuration</title>
|
||||||
|
<description>
|
||||||
|
Configure Tavily search tools to be available to Claude during conversations.
|
||||||
|
This integrates with the existing tool system (like memory tools).
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Create server/config/tavilyTools.js
|
||||||
|
- Define tool schemas for Tavily search capabilities
|
||||||
|
- Integrate with existing toolExecutor service
|
||||||
|
- Add Tavily tools to system prompt alongside memory tools
|
||||||
|
|
||||||
|
Tavily Tools to Expose:
|
||||||
|
- tavily_search: General web search with AI-optimized results
|
||||||
|
- Parameters: query (string), max_results (number), search_depth (basic/advanced)
|
||||||
|
- Returns: Array of search results with title, url, content, score
|
||||||
|
|
||||||
|
- tavily_search_news: News-specific search for current events
|
||||||
|
- Parameters: query (string), max_results (number), days (number)
|
||||||
|
- Returns: Recent news articles with metadata
|
||||||
|
|
||||||
|
Tool Schema:
|
||||||
|
- Follow Claude Tool Use API format
|
||||||
|
- Clear descriptions for each tool
|
||||||
|
- Well-defined input schemas with validation
|
||||||
|
- Proper error handling in tool execution
|
||||||
|
</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>backend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify Tavily tools are listed in available tools
|
||||||
|
2. Test tool schema validation with valid inputs
|
||||||
|
3. Test tool schema validation rejects invalid inputs
|
||||||
|
4. Verify tools appear in Claude's system prompt
|
||||||
|
5. Test that tool descriptions are clear and accurate
|
||||||
|
6. Verify tools can be called without errors
|
||||||
|
</test_steps>
|
||||||
|
</feature_2>
|
||||||
|
|
||||||
|
<feature_3>
|
||||||
|
<title>Tavily Tool Executor Integration</title>
|
||||||
|
<description>
|
||||||
|
Integrate Tavily tools into the existing toolExecutor service so Claude can
|
||||||
|
use them during conversations.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Extend server/services/toolExecutor.js to handle Tavily tools
|
||||||
|
- Add tool detection for tavily_search and tavily_search_news
|
||||||
|
- Implement tool execution logic using Tavily MCP client
|
||||||
|
- Format Tavily results for Claude consumption
|
||||||
|
- Handle errors and timeouts gracefully
|
||||||
|
|
||||||
|
Tool Execution Flow:
|
||||||
|
1. Claude requests tool use (e.g., tavily_search)
|
||||||
|
2. toolExecutor detects Tavily tool request
|
||||||
|
3. Call Tavily MCP client with tool parameters
|
||||||
|
4. Receive and format search results
|
||||||
|
5. Return formatted results to Claude
|
||||||
|
6. Claude incorporates results into response
|
||||||
|
|
||||||
|
Result Formatting:
|
||||||
|
- Convert Tavily results to Claude-friendly format
|
||||||
|
- Include source URLs for citation
|
||||||
|
- Add relevance scores
|
||||||
|
- Truncate content if too long
|
||||||
|
- Handle empty results gracefully
|
||||||
|
</description>
|
||||||
|
<priority>1</priority>
|
||||||
|
<category>backend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Test tavily_search tool execution with valid query
|
||||||
|
2. Verify results are properly formatted
|
||||||
|
3. Test tavily_search_news tool execution
|
||||||
|
4. Verify error handling when Tavily API fails
|
||||||
|
5. Test timeout handling for slow searches
|
||||||
|
6. Verify results include proper citations and URLs
|
||||||
|
7. Test with empty search results
|
||||||
|
8. Test with very long search queries
|
||||||
|
</test_steps>
|
||||||
|
</feature_3>
|
||||||
|
|
||||||
|
<feature_4>
|
||||||
|
<title>System Prompt Enhancement for Internet Access</title>
|
||||||
|
<description>
|
||||||
|
Update the system prompt to inform Ikario about internet access capabilities.
|
||||||
|
This should be added alongside existing memory tools instructions.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Update MEMORY_SYSTEM_PROMPT in server/routes/messages.js and claude.js
|
||||||
|
- Add Tavily tools documentation
|
||||||
|
- Provide usage guidelines for when to search the internet
|
||||||
|
- Include examples of good search queries
|
||||||
|
|
||||||
|
Prompt Addition:
|
||||||
|
"## Internet Access via Tavily
|
||||||
|
|
||||||
|
Tu as accès à internet en temps réel via deux outils de recherche :
|
||||||
|
|
||||||
|
1. tavily_search : Recherche web générale optimisée pour l'IA
|
||||||
|
- Utilise pour : rechercher des informations actuelles, vérifier des faits,
|
||||||
|
trouver des sources fiables
|
||||||
|
- Paramètres : query (ta question), max_results (nombre de résultats, défaut: 5),
|
||||||
|
search_depth ('basic' ou 'advanced')
|
||||||
|
- Retourne : Résultats avec titre, URL, contenu et score de pertinence
|
||||||
|
|
||||||
|
2. tavily_search_news : Recherche d'actualités récentes
|
||||||
|
- Utilise pour : événements actuels, nouvelles, actualités
|
||||||
|
- Paramètres : query, max_results, days (nombre de jours en arrière, défaut: 7)
|
||||||
|
|
||||||
|
Quand utiliser la recherche internet :
|
||||||
|
- Quand l'utilisateur demande des informations récentes ou actuelles
|
||||||
|
- Pour vérifier des faits ou données que tu n'es pas sûr de connaître
|
||||||
|
- Quand ta base de connaissances est trop ancienne (après janvier 2025)
|
||||||
|
- Pour trouver des sources et citations spécifiques
|
||||||
|
- Pour des requêtes nécessitant des données en temps réel
|
||||||
|
|
||||||
|
N'utilise PAS la recherche pour :
|
||||||
|
- Des questions sur ta propre identité ou capacités
|
||||||
|
- Des concepts généraux que tu connais déjà bien
|
||||||
|
- Des questions purement créatives ou d'opinion
|
||||||
|
|
||||||
|
Utilise ces outils de façon autonome selon les besoins de la conversation.
|
||||||
|
Cite toujours tes sources quand tu utilises des informations de Tavily."
|
||||||
|
</description>
|
||||||
|
<priority>2</priority>
|
||||||
|
<category>backend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify system prompt includes Tavily instructions
|
||||||
|
2. Test that Claude understands when to use Tavily search
|
||||||
|
3. Verify Claude cites sources from Tavily results
|
||||||
|
4. Test that Claude uses appropriate search queries
|
||||||
|
5. Verify Claude chooses between tavily_search and tavily_search_news correctly
|
||||||
|
6. Test that Claude doesn't over-use search for simple questions
|
||||||
|
</test_steps>
|
||||||
|
</feature_4>
|
||||||
|
|
||||||
|
<feature_5>
|
||||||
|
<title>Tavily Status API Endpoint</title>
|
||||||
|
<description>
|
||||||
|
Create API endpoint to check Tavily MCP connection status and search capabilities.
|
||||||
|
Similar to /api/memory/status endpoint.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Create GET /api/tavily/status endpoint
|
||||||
|
- Return connection status, available tools, and configuration
|
||||||
|
- Create GET /api/tavily/health endpoint for health checks
|
||||||
|
- Add Tavily status to existing /api/memory/stats (rename to /api/tools/stats)
|
||||||
|
|
||||||
|
Response Format:
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"data": {
|
||||||
|
"connected": true,
|
||||||
|
"message": "Tavily MCP server is connected",
|
||||||
|
"tools": ["tavily_search", "tavily_search_news"],
|
||||||
|
"apiKeyConfigured": true,
|
||||||
|
"transport": "stdio"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</description>
|
||||||
|
<priority>2</priority>
|
||||||
|
<category>backend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Test GET /api/tavily/status returns correct status
|
||||||
|
2. Verify status shows "connected" when Tavily is available
|
||||||
|
3. Verify status shows "disconnected" when Tavily is unavailable
|
||||||
|
4. Test health endpoint returns proper status code
|
||||||
|
5. Verify tools list is accurate
|
||||||
|
6. Test with missing API key shows proper error
|
||||||
|
</test_steps>
|
||||||
|
</feature_5>
|
||||||
|
|
||||||
|
<feature_6>
|
||||||
|
<title>Frontend UI Indicator for Internet Access</title>
|
||||||
|
<description>
|
||||||
|
Add visual indicator in the UI to show when Ikario has internet access via Tavily.
|
||||||
|
This can be displayed alongside the existing memory status indicator.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Add Tavily status indicator in header or sidebar
|
||||||
|
- Show online/offline status for Tavily connection
|
||||||
|
- Optional: Show when Tavily is being used during a conversation
|
||||||
|
- Optional: Add tooltip explaining internet access capabilities
|
||||||
|
|
||||||
|
Visual Design:
|
||||||
|
- Globe or wifi icon to represent internet access
|
||||||
|
- Green when connected, gray when disconnected
|
||||||
|
- Subtle animation when search is in progress
|
||||||
|
- Tooltip: "Internet access via Tavily" or similar
|
||||||
|
|
||||||
|
Integration:
|
||||||
|
- Use existing useMemory hook pattern or create useTavily hook
|
||||||
|
- Poll /api/tavily/status periodically (every 60s)
|
||||||
|
- Update status in real-time during searches
|
||||||
|
</description>
|
||||||
|
<priority>3</priority>
|
||||||
|
<category>frontend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify internet access indicator appears in UI
|
||||||
|
2. Test status updates when Tavily connects/disconnects
|
||||||
|
3. Verify tooltip shows correct information
|
||||||
|
4. Test that indicator shows activity during searches
|
||||||
|
5. Verify status polling doesn't impact performance
|
||||||
|
6. Test with Tavily disabled shows offline status
|
||||||
|
</test_steps>
|
||||||
|
</feature_6>
|
||||||
|
|
||||||
|
<feature_7>
|
||||||
|
<title>Manual Search UI (Optional Enhancement)</title>
|
||||||
|
<description>
|
||||||
|
Optional: Add manual search interface to allow users to trigger Tavily searches directly,
|
||||||
|
similar to the memory search panel.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Add "Internet Search" panel in sidebar (alongside Memory panel)
|
||||||
|
- Search input for manual Tavily queries
|
||||||
|
- Display search results with title, snippet, URL
|
||||||
|
- Click to insert results into conversation
|
||||||
|
- Filter by search type (general vs news)
|
||||||
|
|
||||||
|
This is OPTIONAL and lower priority. The primary use case is autonomous search by Claude.
|
||||||
|
</description>
|
||||||
|
<priority>4</priority>
|
||||||
|
<category>frontend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify search panel appears in sidebar
|
||||||
|
2. Test manual search returns results
|
||||||
|
3. Verify results display properly with links
|
||||||
|
4. Test inserting results into conversation
|
||||||
|
5. Test news search filter works correctly
|
||||||
|
6. Verify search history is saved (optional)
|
||||||
|
</test_steps>
|
||||||
|
</feature_7>
|
||||||
|
|
||||||
|
<feature_8>
|
||||||
|
<title>Configuration and Settings</title>
|
||||||
|
<description>
|
||||||
|
Add Tavily configuration options to settings and environment.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Add TAVILY_API_KEY to environment variables
|
||||||
|
- Add Tavily settings to .claude_settings.json or similar config file
|
||||||
|
- Create server/config/tavilyConfig.js for configuration management
|
||||||
|
- Document configuration options in README
|
||||||
|
|
||||||
|
Configuration Options:
|
||||||
|
- API key
|
||||||
|
- Max results per search (default: 5)
|
||||||
|
- Search depth (basic/advanced)
|
||||||
|
- Timeout duration
|
||||||
|
- Enable/disable Tavily globally
|
||||||
|
- Rate limiting settings
|
||||||
|
|
||||||
|
Security:
|
||||||
|
- API key should NOT be exposed to frontend
|
||||||
|
- Use environment variable or secure config file
|
||||||
|
- Validate API key on startup
|
||||||
|
- Log warnings if API key is missing
|
||||||
|
</description>
|
||||||
|
<priority>2</priority>
|
||||||
|
<category>backend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify API key is read from environment variable
|
||||||
|
2. Test fallback to config file if env var not set
|
||||||
|
3. Verify API key validation on startup
|
||||||
|
4. Test configuration options are applied correctly
|
||||||
|
5. Verify API key is never exposed in API responses
|
||||||
|
6. Test enabling/disabling Tavily via config
|
||||||
|
</test_steps>
|
||||||
|
</feature_8>
|
||||||
|
|
||||||
|
<feature_9>
|
||||||
|
<title>Error Handling and Rate Limiting</title>
|
||||||
|
<description>
|
||||||
|
Implement robust error handling and rate limiting for Tavily API calls.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Detect and handle Tavily API errors (rate limits, invalid API key, etc.)
|
||||||
|
- Implement client-side rate limiting to avoid hitting Tavily limits
|
||||||
|
- Cache search results for duplicate queries (optional)
|
||||||
|
- Provide clear error messages to Claude when searches fail
|
||||||
|
|
||||||
|
Error Types:
|
||||||
|
- 401: Invalid API key
|
||||||
|
- 429: Rate limit exceeded
|
||||||
|
- 500: Tavily server error
|
||||||
|
- Timeout: Search took too long
|
||||||
|
- Network: Connection failed
|
||||||
|
|
||||||
|
Rate Limiting:
|
||||||
|
- Track searches per minute/hour
|
||||||
|
- Queue requests if limit reached
|
||||||
|
- Return cached results for duplicate queries within 5 minutes
|
||||||
|
- Log rate limit warnings
|
||||||
|
</description>
|
||||||
|
<priority>2</priority>
|
||||||
|
<category>backend</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Test error handling for invalid API key
|
||||||
|
2. Verify rate limit detection and handling
|
||||||
|
3. Test timeout handling for slow searches
|
||||||
|
4. Verify error messages are clear to Claude
|
||||||
|
5. Test rate limiting prevents API abuse
|
||||||
|
6. Verify caching works for duplicate queries
|
||||||
|
</test_steps>
|
||||||
|
</feature_9>
|
||||||
|
|
||||||
|
<feature_10>
|
||||||
|
<title>Documentation and README Updates</title>
|
||||||
|
<description>
|
||||||
|
Update project documentation to explain Tavily integration.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
- Update main README.md with Tavily setup instructions
|
||||||
|
- Add TAVILY_SETUP.md with detailed configuration guide
|
||||||
|
- Document API endpoints in README
|
||||||
|
- Add examples of using Tavily with Ikario
|
||||||
|
- Document troubleshooting steps
|
||||||
|
|
||||||
|
Documentation Sections:
|
||||||
|
- Prerequisites (Tavily API key)
|
||||||
|
- Installation steps
|
||||||
|
- Configuration options
|
||||||
|
- Testing Tavily connection
|
||||||
|
- Example conversations using internet search
|
||||||
|
- Troubleshooting common issues
|
||||||
|
- API reference for Tavily endpoints
|
||||||
|
</description>
|
||||||
|
<priority>3</priority>
|
||||||
|
<category>documentation</category>
|
||||||
|
<test_steps>
|
||||||
|
1. Verify README has Tavily setup section
|
||||||
|
2. Test that setup instructions are clear and complete
|
||||||
|
3. Verify all configuration options are documented
|
||||||
|
4. Test examples work as described
|
||||||
|
5. Verify troubleshooting section covers common issues
|
||||||
|
</test_steps>
|
||||||
|
</feature_10>
|
||||||
|
</core_features>
|
||||||
|
|
||||||
|
<implementation_notes>
|
||||||
|
<order>
|
||||||
|
Recommended implementation order:
|
||||||
|
1. Feature 1 (MCP Client Setup) - Foundation
|
||||||
|
2. Feature 2 (Tool Configuration) - Core functionality
|
||||||
|
3. Feature 3 (Tool Executor Integration) - Core functionality
|
||||||
|
4. Feature 8 (Configuration) - Required for testing
|
||||||
|
5. Feature 4 (System Prompt) - Makes tools accessible to Claude
|
||||||
|
6. Feature 9 (Error Handling) - Production readiness
|
||||||
|
7. Feature 5 (Status API) - Monitoring
|
||||||
|
8. Feature 10 (Documentation) - User onboarding
|
||||||
|
9. Feature 6 (UI Indicator) - Nice to have
|
||||||
|
10. Feature 7 (Manual Search UI) - Optional enhancement
|
||||||
|
</order>
|
||||||
|
|
||||||
|
<testing>
|
||||||
|
After implementing features 1-5, you should be able to:
|
||||||
|
- Ask Ikario: "Quelle est l'actualité aujourd'hui ?"
|
||||||
|
- Ask Ikario: "Recherche des informations sur [topic actuel]"
|
||||||
|
- Ask Ikario: "Vérifie cette information : [claim]"
|
||||||
|
|
||||||
|
Ikario should autonomously use Tavily search and cite sources.
|
||||||
|
</testing>
|
||||||
|
|
||||||
|
<compatibility>
|
||||||
|
- This specification is fully compatible with existing ikario-memory MCP integration
|
||||||
|
- Ikario will have both memory tools AND internet search tools
|
||||||
|
- Tools can be used together in the same conversation
|
||||||
|
- No conflicts expected between tool systems
|
||||||
|
</compatibility>
|
||||||
|
</implementation_notes>
|
||||||
|
|
||||||
|
<safety_requirements>
|
||||||
|
<critical>
|
||||||
|
- DO NOT expose Tavily API key to frontend or in API responses
|
||||||
|
- DO NOT modify existing MCP memory integration
|
||||||
|
- DO NOT break existing conversation functionality
|
||||||
|
- Tavily should gracefully degrade if unavailable (don't crash the app)
|
||||||
|
- Implement proper rate limiting to avoid API abuse
|
||||||
|
- Validate all user inputs before passing to Tavily
|
||||||
|
- Sanitize search results before displaying (XSS prevention)
|
||||||
|
- Log all Tavily API calls for monitoring and debugging
|
||||||
|
</critical>
|
||||||
|
</safety_requirements>
|
||||||
|
|
||||||
|
<success_metrics>
|
||||||
|
- Ikario can successfully perform internet searches when asked
|
||||||
|
- Search results are relevant and well-formatted
|
||||||
|
- Sources are properly cited
|
||||||
|
- Tavily integration doesn't slow down conversations
|
||||||
|
- Error handling is robust and user-friendly
|
||||||
|
- Configuration is straightforward
|
||||||
|
- Documentation is clear and complete
|
||||||
|
</success_metrics>
|
||||||
|
</project_specification>
|
||||||
290
test_security.py
290
test_security.py
@@ -1,290 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Security Hook Tests
|
|
||||||
===================
|
|
||||||
|
|
||||||
Tests for the bash command security validation logic.
|
|
||||||
Run with: python test_security.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from security import (
|
|
||||||
bash_security_hook,
|
|
||||||
extract_commands,
|
|
||||||
validate_chmod_command,
|
|
||||||
validate_init_script,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_hook(command: str, should_block: bool) -> bool:
|
|
||||||
"""Test a single command against the security hook."""
|
|
||||||
input_data = {"tool_name": "Bash", "tool_input": {"command": command}}
|
|
||||||
result = asyncio.run(bash_security_hook(input_data))
|
|
||||||
was_blocked = result.get("decision") == "block"
|
|
||||||
|
|
||||||
if was_blocked == should_block:
|
|
||||||
status = "PASS"
|
|
||||||
else:
|
|
||||||
status = "FAIL"
|
|
||||||
expected = "blocked" if should_block else "allowed"
|
|
||||||
actual = "blocked" if was_blocked else "allowed"
|
|
||||||
reason = result.get("reason", "")
|
|
||||||
print(f" {status}: {command!r}")
|
|
||||||
print(f" Expected: {expected}, Got: {actual}")
|
|
||||||
if reason:
|
|
||||||
print(f" Reason: {reason}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
print(f" {status}: {command!r}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def test_extract_commands():
|
|
||||||
"""Test the command extraction logic."""
|
|
||||||
print("\nTesting command extraction:\n")
|
|
||||||
passed = 0
|
|
||||||
failed = 0
|
|
||||||
|
|
||||||
test_cases = [
|
|
||||||
("ls -la", ["ls"]),
|
|
||||||
("npm install && npm run build", ["npm", "npm"]),
|
|
||||||
("cat file.txt | grep pattern", ["cat", "grep"]),
|
|
||||||
("/usr/bin/node script.js", ["node"]),
|
|
||||||
("VAR=value ls", ["ls"]),
|
|
||||||
("git status || git init", ["git", "git"]),
|
|
||||||
]
|
|
||||||
|
|
||||||
for cmd, expected in test_cases:
|
|
||||||
result = extract_commands(cmd)
|
|
||||||
if result == expected:
|
|
||||||
print(f" PASS: {cmd!r} -> {result}")
|
|
||||||
passed += 1
|
|
||||||
else:
|
|
||||||
print(f" FAIL: {cmd!r}")
|
|
||||||
print(f" Expected: {expected}, Got: {result}")
|
|
||||||
failed += 1
|
|
||||||
|
|
||||||
return passed, failed
|
|
||||||
|
|
||||||
|
|
||||||
def test_validate_chmod():
|
|
||||||
"""Test chmod command validation."""
|
|
||||||
print("\nTesting chmod validation:\n")
|
|
||||||
passed = 0
|
|
||||||
failed = 0
|
|
||||||
|
|
||||||
# Test cases: (command, should_be_allowed, description)
|
|
||||||
test_cases = [
|
|
||||||
# Allowed cases
|
|
||||||
("chmod +x init.sh", True, "basic +x"),
|
|
||||||
("chmod +x script.sh", True, "+x on any script"),
|
|
||||||
("chmod u+x init.sh", True, "user +x"),
|
|
||||||
("chmod a+x init.sh", True, "all +x"),
|
|
||||||
("chmod ug+x init.sh", True, "user+group +x"),
|
|
||||||
("chmod +x file1.sh file2.sh", True, "multiple files"),
|
|
||||||
# Blocked cases
|
|
||||||
("chmod 777 init.sh", False, "numeric mode"),
|
|
||||||
("chmod 755 init.sh", False, "numeric mode 755"),
|
|
||||||
("chmod +w init.sh", False, "write permission"),
|
|
||||||
("chmod +r init.sh", False, "read permission"),
|
|
||||||
("chmod -x init.sh", False, "remove execute"),
|
|
||||||
("chmod -R +x dir/", False, "recursive flag"),
|
|
||||||
("chmod --recursive +x dir/", False, "long recursive flag"),
|
|
||||||
("chmod +x", False, "missing file"),
|
|
||||||
]
|
|
||||||
|
|
||||||
for cmd, should_allow, description in test_cases:
|
|
||||||
allowed, reason = validate_chmod_command(cmd)
|
|
||||||
if allowed == should_allow:
|
|
||||||
print(f" PASS: {cmd!r} ({description})")
|
|
||||||
passed += 1
|
|
||||||
else:
|
|
||||||
expected = "allowed" if should_allow else "blocked"
|
|
||||||
actual = "allowed" if allowed else "blocked"
|
|
||||||
print(f" FAIL: {cmd!r} ({description})")
|
|
||||||
print(f" Expected: {expected}, Got: {actual}")
|
|
||||||
if reason:
|
|
||||||
print(f" Reason: {reason}")
|
|
||||||
failed += 1
|
|
||||||
|
|
||||||
return passed, failed
|
|
||||||
|
|
||||||
|
|
||||||
def test_validate_init_script():
|
|
||||||
"""Test init.sh script execution validation."""
|
|
||||||
print("\nTesting init.sh validation:\n")
|
|
||||||
passed = 0
|
|
||||||
failed = 0
|
|
||||||
|
|
||||||
# Test cases: (command, should_be_allowed, description)
|
|
||||||
test_cases = [
|
|
||||||
# Allowed cases
|
|
||||||
("./init.sh", True, "basic ./init.sh"),
|
|
||||||
("./init.sh arg1 arg2", True, "with arguments"),
|
|
||||||
("/path/to/init.sh", True, "absolute path"),
|
|
||||||
("../dir/init.sh", True, "relative path with init.sh"),
|
|
||||||
# Blocked cases
|
|
||||||
("./setup.sh", False, "different script name"),
|
|
||||||
("./init.py", False, "python script"),
|
|
||||||
("bash init.sh", False, "bash invocation"),
|
|
||||||
("sh init.sh", False, "sh invocation"),
|
|
||||||
("./malicious.sh", False, "malicious script"),
|
|
||||||
("./init.sh; rm -rf /", False, "command injection attempt"),
|
|
||||||
]
|
|
||||||
|
|
||||||
for cmd, should_allow, description in test_cases:
|
|
||||||
allowed, reason = validate_init_script(cmd)
|
|
||||||
if allowed == should_allow:
|
|
||||||
print(f" PASS: {cmd!r} ({description})")
|
|
||||||
passed += 1
|
|
||||||
else:
|
|
||||||
expected = "allowed" if should_allow else "blocked"
|
|
||||||
actual = "allowed" if allowed else "blocked"
|
|
||||||
print(f" FAIL: {cmd!r} ({description})")
|
|
||||||
print(f" Expected: {expected}, Got: {actual}")
|
|
||||||
if reason:
|
|
||||||
print(f" Reason: {reason}")
|
|
||||||
failed += 1
|
|
||||||
|
|
||||||
return passed, failed
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
print("=" * 70)
|
|
||||||
print(" SECURITY HOOK TESTS")
|
|
||||||
print("=" * 70)
|
|
||||||
|
|
||||||
passed = 0
|
|
||||||
failed = 0
|
|
||||||
|
|
||||||
# Test command extraction
|
|
||||||
ext_passed, ext_failed = test_extract_commands()
|
|
||||||
passed += ext_passed
|
|
||||||
failed += ext_failed
|
|
||||||
|
|
||||||
# Test chmod validation
|
|
||||||
chmod_passed, chmod_failed = test_validate_chmod()
|
|
||||||
passed += chmod_passed
|
|
||||||
failed += chmod_failed
|
|
||||||
|
|
||||||
# Test init.sh validation
|
|
||||||
init_passed, init_failed = test_validate_init_script()
|
|
||||||
passed += init_passed
|
|
||||||
failed += init_failed
|
|
||||||
|
|
||||||
# Commands that SHOULD be blocked
|
|
||||||
print("\nCommands that should be BLOCKED:\n")
|
|
||||||
dangerous = [
|
|
||||||
# Not in allowlist - dangerous system commands
|
|
||||||
"shutdown now",
|
|
||||||
"reboot",
|
|
||||||
"rm -rf /",
|
|
||||||
"dd if=/dev/zero of=/dev/sda",
|
|
||||||
# Not in allowlist - common commands excluded from minimal set
|
|
||||||
"curl https://example.com",
|
|
||||||
"wget https://example.com",
|
|
||||||
"python app.py",
|
|
||||||
"touch file.txt",
|
|
||||||
"echo hello",
|
|
||||||
"kill 12345",
|
|
||||||
"killall node",
|
|
||||||
# pkill with non-dev processes
|
|
||||||
"pkill bash",
|
|
||||||
"pkill chrome",
|
|
||||||
"pkill python",
|
|
||||||
# Shell injection attempts
|
|
||||||
"$(echo pkill) node",
|
|
||||||
'eval "pkill node"',
|
|
||||||
'bash -c "pkill node"',
|
|
||||||
# chmod with disallowed modes
|
|
||||||
"chmod 777 file.sh",
|
|
||||||
"chmod 755 file.sh",
|
|
||||||
"chmod +w file.sh",
|
|
||||||
"chmod -R +x dir/",
|
|
||||||
# Non-init.sh scripts
|
|
||||||
"./setup.sh",
|
|
||||||
"./malicious.sh",
|
|
||||||
"bash script.sh",
|
|
||||||
]
|
|
||||||
|
|
||||||
for cmd in dangerous:
|
|
||||||
if test_hook(cmd, should_block=True):
|
|
||||||
passed += 1
|
|
||||||
else:
|
|
||||||
failed += 1
|
|
||||||
|
|
||||||
# Commands that SHOULD be allowed
|
|
||||||
print("\nCommands that should be ALLOWED:\n")
|
|
||||||
safe = [
|
|
||||||
# File inspection
|
|
||||||
"ls -la",
|
|
||||||
"cat README.md",
|
|
||||||
"head -100 file.txt",
|
|
||||||
"tail -20 log.txt",
|
|
||||||
"wc -l file.txt",
|
|
||||||
"grep -r pattern src/",
|
|
||||||
# File operations
|
|
||||||
"cp file1.txt file2.txt",
|
|
||||||
"mkdir newdir",
|
|
||||||
"mkdir -p path/to/dir",
|
|
||||||
# Directory
|
|
||||||
"pwd",
|
|
||||||
# Node.js development
|
|
||||||
"npm install",
|
|
||||||
"npm run build",
|
|
||||||
"node server.js",
|
|
||||||
# Version control
|
|
||||||
"git status",
|
|
||||||
"git commit -m 'test'",
|
|
||||||
"git add . && git commit -m 'msg'",
|
|
||||||
# Process management
|
|
||||||
"ps aux",
|
|
||||||
"lsof -i :3000",
|
|
||||||
"sleep 2",
|
|
||||||
# Allowed pkill patterns for dev servers
|
|
||||||
"pkill node",
|
|
||||||
"pkill npm",
|
|
||||||
"pkill -f node",
|
|
||||||
"pkill -f 'node server.js'",
|
|
||||||
"pkill vite",
|
|
||||||
# Chained commands
|
|
||||||
"npm install && npm run build",
|
|
||||||
"ls | grep test",
|
|
||||||
# Full paths
|
|
||||||
"/usr/local/bin/node app.js",
|
|
||||||
# chmod +x (allowed)
|
|
||||||
"chmod +x init.sh",
|
|
||||||
"chmod +x script.sh",
|
|
||||||
"chmod u+x init.sh",
|
|
||||||
"chmod a+x init.sh",
|
|
||||||
# init.sh execution (allowed)
|
|
||||||
"./init.sh",
|
|
||||||
"./init.sh --production",
|
|
||||||
"/path/to/init.sh",
|
|
||||||
# Combined chmod and init.sh
|
|
||||||
"chmod +x init.sh && ./init.sh",
|
|
||||||
]
|
|
||||||
|
|
||||||
for cmd in safe:
|
|
||||||
if test_hook(cmd, should_block=False):
|
|
||||||
passed += 1
|
|
||||||
else:
|
|
||||||
failed += 1
|
|
||||||
|
|
||||||
# Summary
|
|
||||||
print("\n" + "-" * 70)
|
|
||||||
print(f" Results: {passed} passed, {failed} failed")
|
|
||||||
print("-" * 70)
|
|
||||||
|
|
||||||
if failed == 0:
|
|
||||||
print("\n ALL TESTS PASSED")
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
print(f"\n {failed} TEST(S) FAILED")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
sys.exit(main())
|
|
||||||
Reference in New Issue
Block a user