Add backup system documentation and utility scripts

Documentation: - MODIFICATIONS_BACKUP_SYSTEM.md: Complete documentation of the new backup system - Problem analysis (old system truncated to 200 chars) - New architecture using append_to_conversation - ChromaDB structure (1 principal + N individual message docs) - Coverage comparison (1.2% → 100% for long conversations) - Migration guide and test procedures Utility Scripts: - test_backup_python.py: Direct Python test of backup system - Bypasses Node.js MCP layer - Tests append_to_conversation with complete messages - Displays embedding coverage statistics - fix_stats.mjs: JavaScript patch for getMemoryStats() - patch_stats.py: Python patch for getMemoryStats() function Key Documentation Sections: - Old vs New system comparison table - ChromaDB document structure explanation - Step-by-step migration instructions - Test procedures with expected outputs - Troubleshooting guide 🤖 Generated with Claude Code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-20 20:45:15 +01:00
parent 9af5da620f
commit 51983a5240
4 changed files with 798 additions and 0 deletions
--- a/test_backup_python.py
+++ b/test_backup_python.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+"""
+Test direct du backup - utilise append_to_conversation depuis my_project SQLite vers ikario_rag ChromaDB
+"""
+
+import sqlite3
+import sys
+import os
+
+# Ajouter le chemin vers ikario_rag
+sys.path.insert(0, 'C:/Users/david/SynologyDrive/ikario/ikario_rag')
+
+from mcp_ikario_memory import IkarioMemoryMCP
+import asyncio
+from datetime import datetime
+
+async def test_backup():
+    print("=" * 80)
+    print("TEST BACKUP CONVERSATION - PYTHON DIRECT")
+    print("=" * 80)
+    print()
+
+    # Connexion à la base SQLite de my_project
+    db_path = "C:/GitHub/Linear_coding/generations/my_project/server/data/claude-clone.db"
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+
+    # Trouver la conversation "test tes mémoires"
+    cursor.execute("""
+        SELECT id, title, message_count, is_pinned, has_memory_backup, created_at
+        FROM conversations
+        WHERE title LIKE '%test tes mémoires%'
+        LIMIT 1
+    """)
+
+    conv = cursor.fetchone()
+
+    if not conv:
+        print("ERROR: Conversation 'test tes mémoires' not found")
+        return
+
+    conv_id, title, msg_count, is_pinned, has_backup, created_at = conv
+
+    print(f"FOUND: '{title}'")
+    print(f"ID: {conv_id}")
+    print(f"Messages: {msg_count}")
+    print(f"Pinned: {'Yes' if is_pinned else 'No'}")
+    print(f"Already backed up: {'Yes' if has_backup else 'No'}")
+    print(f"Created: {created_at}")
+    print("=" * 80)
+    print()
+
+    # Récupérer TOUS les messages COMPLETS
+    cursor.execute("""
+        SELECT role, content, thinking_content, created_at
+        FROM messages
+        WHERE conversation_id = ?
+        ORDER BY created_at ASC
+    """, (conv_id,))
+
+    messages = cursor.fetchall()
+
+    print(f"Retrieved {len(messages)} messages from SQLite:")
+    print()
+
+    total_chars = 0
+    formatted_messages = []
+
+    for i, (role, content, thinking, msg_created_at) in enumerate(messages, 1):
+        char_len = len(content)
+        total_chars += char_len
+
+        thinking_note = " [+ thinking]" if thinking else ""
+        print(f"  {i}. {role}: {char_len} chars{thinking_note}")
+
+        # Formater pour MCP append_to_conversation
+        msg = {
+            "author": role,
+            "content": content,  # COMPLET, pas de truncation!
+            "timestamp": msg_created_at or datetime.now().isoformat()
+        }
+
+        # Ajouter thinking si présent
+        if thinking:
+            msg["thinking"] = thinking
+
+        formatted_messages.append(msg)
+
+    total_words = total_chars // 5
+    print(f"\nTotal: {total_chars} chars (~{total_words} words)")
+    print()
+
+    # Calcul couverture
+    old_coverage = min(100, (256 * 4 / total_chars) * 100)
+    new_coverage = min(100, (8192 * 4 / total_chars) * 100)
+
+    print("Embedding coverage estimation:")
+    print(f"  OLD (all-MiniLM-L6-v2, 256 tokens): {old_coverage:.1f}%")
+    print(f"  NEW (BAAI/bge-m3, 8192 tokens):     {new_coverage:.1f}%")
+    print(f"  Improvement: +{(new_coverage - old_coverage):.1f}%")
+    print()
+
+    # Initialiser Ikario Memory MCP
+    print("Initializing Ikario RAG (ChromaDB + BAAI/bge-m3)...")
+    ikario_db_path = "C:/Users/david/SynologyDrive/ikario/ikario_rag/index"
+    memory = IkarioMemoryMCP(db_path=ikario_db_path)
+    print("OK Ikario Memory initialized")
+    print()
+
+    # Préparer les participants et le contexte
+    participants = ["user", "assistant"]
+
+    context = {
+        "category": "fondatrice" if is_pinned else "thematique",
+        "tags": ["test", "mémoire", "conversation"],
+        "summary": f"{title} ({msg_count} messages)",
+        "date": created_at,
+        "title": title,
+        "key_insights": []
+    }
+
+    print("Starting backup with append_to_conversation...")
+    print(f"  - Conversation ID: {conv_id}")
+    print(f"  - Messages: {len(formatted_messages)} COMPLETE messages")
+    print(f"  - Participants: {participants}")
+    print(f"  - Category: {context['category']}")
+    print()
+
+    try:
+        # Appeler append_to_conversation (auto-create si n'existe pas)
+        result = await memory.append_to_conversation(
+            conversation_id=conv_id,
+            new_messages=formatted_messages,
+            participants=participants,
+            context=context
+        )
+
+        print("=" * 80)
+        print("BACKUP RESULT:")
+        print("=" * 80)
+        print(f"Status: {result}")
+        print()
+
+        if "updated" in result or "ajoutée" in result or "added" in result.lower():
+            print("SUCCESS! Conversation backed up to ChromaDB")
+            print()
+            print("What was saved:")
+            print(f"  - {len(formatted_messages)} COMPLETE messages (no truncation!)")
+            print(f"  - Each message has its own embedding (BAAI/bge-m3)")
+            print(f"  - Max tokens per message: 8192 (vs 256 old)")
+            print(f"  - Category: {context['category']}")
+            print()
+            print("ChromaDB structure created:")
+            print(f"  - 1 document principal (full conversation)")
+            print(f"  - {len(formatted_messages)} documents individuels (one per message)")
+            print(f"  - Total: {len(formatted_messages) + 1} documents with embeddings")
+            print()
+
+            # Marquer comme backupé dans SQLite
+            cursor.execute("""
+                UPDATE conversations
+                SET has_memory_backup = 1
+                WHERE id = ?
+            """, (conv_id,))
+            conn.commit()
+            print("✓ Marked as backed up in SQLite")
+
+        else:
+            print("WARNING: Unexpected result format")
+
+    except Exception as e:
+        print(f"ERROR during backup: {e}")
+        import traceback
+        traceback.print_exc()
+
+    finally:
+        conn.close()
+
+    print()
+    print("=" * 80)
+    print("TEST COMPLETED")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    asyncio.run(test_backup())