diff --git a/.gitignore b/.gitignore
index 0da9dd7..74ac56a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,12 +2,18 @@
 generations/*
 !generations/library_rag/
 
+# Python cache and compiled files
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+
 # Log files
 logs/
+*.log
 
 .env
 venv
-__pycache__
 
 # Node modules (if any)
 node_modules/
@@ -18,4 +24,7 @@ backup_migration_*/
 restoration_log.txt
 restoration_remaining_log.txt
 summary_generation_progress.json
-nul
\ No newline at end of file
+nul
+
+# Archives (migration scripts moved here)
+archive/
\ No newline at end of file
diff --git a/08_fix_summaries_properties.py b/08_fix_summaries_properties.py
deleted file mode 100644
index e74d8a0..0000000
--- a/08_fix_summaries_properties.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""Correctif: Ajouter workAuthor, year, language aux Summary_v2."""
-
-import weaviate
-import sys
-
-if sys.stdout.encoding != 'utf-8':
-    sys.stdout.reconfigure(encoding='utf-8')
-
-# Try to import tqdm
-try:
-    from tqdm import tqdm
-    HAS_TQDM = True
-except ImportError:
-    HAS_TQDM = False
-
-client = weaviate.connect_to_local()
-
-try:
-    print("=" * 80)
-    print("CORRECTIF: AJOUTER workAuthor, year, language À SUMMARY_V2")
-    print("=" * 80)
-    print()
-
-    summary_v2 = client.collections.get("Summary_v2")
-    work_collection = client.collections.get("Work")
-
-    # Build workTitle → Work metadata map
-    print("Étape 1: Mapping workTitle → Work metadata")
-    print("-" * 80)
-
-    work_map = {}
-
-    for work in work_collection.iterator(include_vector=False):
-        props = work.properties
-        title = props.get("title")
-        if title:
-            work_map[title] = {
-                "author": props.get("author", "Unknown"),
-                "year": props.get("year", 0),
-                "language": props.get("language", "en"),
-            }
-
-    print(f"✓ {len(work_map)} mappings workTitle → metadata")
-    print()
-
-    # Count total summaries
-    print("Étape 2: Comptage summaries")
-    print("-" * 80)
-
-    print("Comptage en cours...")
-    total_summaries = sum(1 for _ in summary_v2.iterator(include_vector=False))
-
-    print(f"✓ {total_summaries} summaries à corriger")
-    print()
-
-    # Update summaries
-    print("Étape 3: Mise à jour des propriétés")
-    print("-" * 80)
-    print()
-
-    updated = 0
-    skipped = 0
-    errors = []
-
-    # Create iterator with or without tqdm
-    if HAS_TQDM:
-        iterator = tqdm(
-            summary_v2.iterator(include_vector=False),
-            total=total_summaries,
-            desc="Mise à jour",
-            unit="summaries"
-        )
-    else:
-        iterator = summary_v2.iterator(include_vector=False)
-        print("Mise à jour en cours...")
-
-    for idx, summary in enumerate(iterator, 1):
-        props = summary.properties
-
-        try:
-            work_title = props.get("workTitle")
-
-            if not work_title:
-                errors.append(f"Summary {summary.uuid}: pas de workTitle")
-                skipped += 1
-                continue
-
-            # Get work metadata
-            work_metadata = work_map.get(work_title)
-            if not work_metadata:
-                errors.append(f"Summary {summary.uuid}: Work '{work_title}' introuvable")
-                skipped += 1
-                continue
-
-            # Check if already updated (workAuthor exists)
-            if props.get("workAuthor") is not None:
-                skipped += 1
-                continue
-
-            # Update properties
-            summary_v2.data.update(
-                uuid=summary.uuid,
-                properties={
-                    "workAuthor": work_metadata["author"],
-                    "year": work_metadata["year"],
-                    "language": work_metadata["language"],
-                }
-            )
-
-            updated += 1
-
-            # Progress without tqdm
-            if not HAS_TQDM and idx % 10 == 0:
-                print(f"  {idx}/{total_summaries} summaries traités...")
-
-        except Exception as e:
-            errors.append(f"Summary {summary.uuid}: {e}")
-
-    print()
-    print("-" * 80)
-    print(f"✓ Total mis à jour: {updated}/{total_summaries}")
-    print(f"  Déjà à jour: {skipped}")
-
-    if errors:
-        print(f"⚠️  Erreurs rencontrées: {len(errors)}")
-        print()
-        print("Premières erreurs:")
-        for err in errors[:10]:
-            print(f"  - {err}")
-        if len(errors) > 10:
-            print(f"  ... et {len(errors) - 10} autres")
-
-    print()
-    print("=" * 80)
-    print("CORRECTIF TERMINÉ")
-    print("=" * 80)
-    print()
-
-    if updated == total_summaries:
-        print("✅ Tous les summaries ont été mis à jour")
-        print()
-        print("Propriétés ajoutées:")
-        print("  ✓ workAuthor (auteur de l'œuvre)")
-        print("  ✓ year (année de publication)")
-        print("  ✓ language (langue du texte)")
-        print()
-        print("VÉRIFICATION:")
-        print("  python -c \"from verify_summaries import verify; verify()\"")
-    elif updated > 0:
-        print(f"⚠️  {updated}/{total_summaries} summaries mis à jour")
-        print("  Vérifier les erreurs")
-    else:
-        print("❌ Aucun summary mis à jour")
-        print("  Corriger les erreurs et relancer")
-
-finally:
-    client.close()
diff --git a/09_rechunk_oversized.py b/09_rechunk_oversized.py
deleted file mode 100644
index 5083b9e..0000000
--- a/09_rechunk_oversized.py
+++ /dev/null
@@ -1,267 +0,0 @@
-"""Script to re-chunk oversized chunks (> 2000 tokens) in Chunk_v2.
-
-This script identifies chunks that are too large (> 2000 tokens) and splits them
-into smaller chunks with overlap (max 1000 words, overlap 100 words).
-
-Steps:
-    1. Identify all chunks > 2000 tokens in Chunk_v2
-    2. Re-chunk using simple_chunk_with_overlap (1000 words max, 100 overlap)
-    3. Delete the original oversized chunk
-    4. Insert new smaller chunks with preserved metadata
-    5. Update Summary_v2 chunksCount if needed
-"""
-
-import weaviate
-import sys
-from pathlib import Path
-
-# Add utils to path
-sys.path.insert(0, str(Path(__file__).parent / "generations" / "library_rag"))
-
-from utils.llm_chunker_improved import simple_chunk_with_overlap, estimate_tokens
-
-if sys.stdout.encoding != 'utf-8':
-    sys.stdout.reconfigure(encoding='utf-8')
-
-# Try to import tqdm
-try:
-    from tqdm import tqdm
-    HAS_TQDM = True
-except ImportError:
-    HAS_TQDM = False
-
-# Constants
-TOKEN_THRESHOLD = 2000  # Chunks > 2000 tokens will be re-chunked
-MAX_WORDS = 1000
-OVERLAP_WORDS = 100
-
-client = weaviate.connect_to_local()
-
-try:
-    print("=" * 80)
-    print("RE-CHUNKING DES CHUNKS SURDIMENSIONNÉS")
-    print("=" * 80)
-    print()
-
-    chunk_v2 = client.collections.get("Chunk_v2")
-    work_collection = client.collections.get("Work")
-
-    # ========== 1. IDENTIFIER LES CHUNKS PROBLÉMATIQUES ==========
-    print("1. IDENTIFICATION DES CHUNKS > 2000 TOKENS")
-    print("-" * 80)
-    print()
-
-    oversized_chunks = []
-
-    print("Analyse en cours...")
-    for chunk in chunk_v2.iterator(include_vector=False):
-        props = chunk.properties
-        text = props.get('text', '')
-        tokens = estimate_tokens(text)
-
-        if tokens > TOKEN_THRESHOLD:
-            oversized_chunks.append({
-                'uuid': str(chunk.uuid),
-                'tokens': tokens,
-                'chars': len(text),
-                'text': text,
-                'workTitle': props.get('workTitle', ''),
-                'workAuthor': props.get('workAuthor', ''),
-                'year': props.get('year', 0),
-                'language': props.get('language', 'en'),
-                'sectionPath': props.get('sectionPath', ''),
-                'chapterTitle': props.get('chapterTitle', ''),
-                'canonicalReference': props.get('canonicalReference', ''),
-                'unitType': props.get('unitType', 'main_content'),
-                'keywords': props.get('keywords', []),
-                'orderIndex': props.get('orderIndex', 0),
-            })
-
-    print(f"✓ {len(oversized_chunks)} chunks > {TOKEN_THRESHOLD} tokens trouvés")
-    print()
-
-    if not oversized_chunks:
-        print("✅ Aucun chunk surdimensionné à traiter")
-        print()
-        print("=" * 80)
-        print("SCRIPT TERMINÉ - RIEN À FAIRE")
-        print("=" * 80)
-        sys.exit(0)
-
-    # Trier par taille
-    oversized_chunks.sort(key=lambda x: x['tokens'], reverse=True)
-
-    print("Top 5 plus gros chunks:")
-    for i, chunk in enumerate(oversized_chunks[:5], 1):
-        print(f"{i}. {chunk['tokens']:,} tokens ({chunk['chars']:,} chars)")
-        print(f"   Œuvre: {chunk['workTitle']}")
-        print(f"   Section: {chunk['sectionPath'][:60]}...")
-        print()
-
-    if len(oversized_chunks) > 5:
-        print(f"... et {len(oversized_chunks) - 5} autres")
-
-    print()
-
-    # ========== 2. RE-CHUNKING ==========
-    print("2. RE-CHUNKING AVEC OVERLAP")
-    print("-" * 80)
-    print()
-
-    # Build work_title -> work_uuid map for references
-    work_map = {}
-    for work in work_collection.iterator(include_vector=False):
-        props = work.properties
-        title = props.get("title")
-        if title:
-            work_map[title] = str(work.uuid)
-
-    print(f"✓ {len(work_map)} Works mappés")
-    print()
-
-    deleted_count = 0
-    inserted_count = 0
-    errors = []
-
-    # Create iterator with or without tqdm
-    if HAS_TQDM:
-        iterator = tqdm(
-            oversized_chunks,
-            desc="Re-chunking",
-            unit="chunks"
-        )
-    else:
-        iterator = oversized_chunks
-        print("Re-chunking en cours...")
-
-    for idx, old_chunk in enumerate(iterator, 1):
-        try:
-            # Re-chunk text
-            new_texts = simple_chunk_with_overlap(
-                old_chunk['text'],
-                max_words=MAX_WORDS,
-                overlap_words=OVERLAP_WORDS
-            )
-
-            # Get work reference
-            work_uuid = work_map.get(old_chunk['workTitle'])
-            if not work_uuid:
-                errors.append(f"Chunk {old_chunk['uuid'][:8]}: Work '{old_chunk['workTitle']}' introuvable")
-                continue
-
-            # Insert new chunks
-            for i, new_text in enumerate(new_texts):
-                # Sub-ordering: multiply base index by 100 and add part index
-                # Example: orderIndex=5 becomes 500, 501, 502, etc.
-                new_order_index = (old_chunk['orderIndex'] * 100) + i
-
-                new_props = {
-                    "text": new_text,
-                    "summary": "",  # Empty summary for simple chunks
-                    "keywords": old_chunk['keywords'],
-                    "workTitle": old_chunk['workTitle'],
-                    "workAuthor": old_chunk['workAuthor'],
-                    "year": old_chunk['year'],
-                    "language": old_chunk['language'],
-                    "sectionPath": old_chunk['sectionPath'],
-                    "chapterTitle": old_chunk['chapterTitle'],
-                    "canonicalReference": old_chunk['canonicalReference'],
-                    "unitType": old_chunk['unitType'],
-                    "orderIndex": new_order_index,
-                }
-
-                chunk_v2.data.insert(
-                    properties=new_props,
-                    references={"work": work_uuid}
-                )
-                inserted_count += 1
-
-            # Delete old chunk
-            chunk_v2.data.delete_by_id(old_chunk['uuid'])
-            deleted_count += 1
-
-            # Progress without tqdm
-            if not HAS_TQDM and idx % 5 == 0:
-                print(f"  {idx}/{len(oversized_chunks)} chunks traités...")
-
-        except Exception as e:
-            errors.append(f"Chunk {old_chunk['uuid'][:8]}: {e}")
-
-    print()
-    print("-" * 80)
-    print(f"✓ Chunks supprimés: {deleted_count}")
-    print(f"✓ Nouveaux chunks créés: {inserted_count}")
-    if deleted_count > 0:
-        print(f"  Expansion moyenne: {inserted_count / deleted_count:.1f}x")
-    else:
-        print(f"  ⚠️  Aucun chunk supprimé - vérifier les erreurs")
-
-    if errors:
-        print()
-        print(f"⚠️  Erreurs rencontrées: {len(errors)}")
-        for err in errors[:10]:
-            print(f"  - {err}")
-        if len(errors) > 10:
-            print(f"  ... et {len(errors) - 10} autres")
-
-    print()
-
-    # ========== 3. VÉRIFICATION ==========
-    print("3. VÉRIFICATION POST-RECHUNKING")
-    print("-" * 80)
-    print()
-
-    print("Comptage des nouveaux chunks...")
-    remaining_oversized = 0
-    total_chunks = 0
-
-    for chunk in chunk_v2.iterator(include_vector=False):
-        total_chunks += 1
-        text = chunk.properties.get('text', '')
-        tokens = estimate_tokens(text)
-        if tokens > TOKEN_THRESHOLD:
-            remaining_oversized += 1
-
-    print(f"✓ Total chunks: {total_chunks:,}")
-    print(f"✓ Chunks > {TOKEN_THRESHOLD} tokens: {remaining_oversized}")
-
-    if remaining_oversized == 0:
-        print()
-        print("✅ Aucun chunk surdimensionné restant!")
-    else:
-        print()
-        print(f"⚠️  {remaining_oversized} chunks encore > {TOKEN_THRESHOLD} tokens")
-        print("   Relancer le script si nécessaire")
-
-    print()
-    print("=" * 80)
-    print("RE-CHUNKING TERMINÉ")
-    print("=" * 80)
-    print()
-
-    print("RÉSULTATS:")
-    print(f"  • Chunks supprimés: {deleted_count}")
-    print(f"  • Nouveaux chunks créés: {inserted_count}")
-    if deleted_count > 0:
-        print(f"  • Expansion: {inserted_count / deleted_count:.1f}x")
-    print(f"  • Chunks restants > {TOKEN_THRESHOLD} tokens: {remaining_oversized}")
-    print()
-
-    if remaining_oversized == 0 and deleted_count > 0:
-        print("✅ RE-CHUNKING RÉUSSI")
-        print()
-        print("AMÉLIORATIONS:")
-        print(f"  • {deleted_count} chunks géants éliminés")
-        print(f"  • {inserted_count} chunks optimaux créés")
-        print(f"  • Taille max: {MAX_WORDS} mots (~{MAX_WORDS * 2.5:.0f} tokens)")
-        print(f"  • Overlap: {OVERLAP_WORDS} mots (contexte préservé)")
-        print()
-        print("PROCHAINES ÉTAPES:")
-        print("  1. Tester la recherche sémantique")
-        print("  2. Vérifier la qualité des vecteurs")
-        print("  3. Optionnel: Mettre à jour Summary_v2.chunksCount si nécessaire")
-    elif deleted_count == 0:
-        print("ℹ️  Aucun chunk n'a nécessité de re-chunking")
-
-finally:
-    client.close()
diff --git a/10_test_search_quality.py b/10_test_search_quality.py
deleted file mode 100644
index 86a4e28..0000000
--- a/10_test_search_quality.py
+++ /dev/null
@@ -1,402 +0,0 @@
-"""Test search quality with re-chunked data.
-
-This script tests semantic search to verify that the re-chunking improved
-search quality and relevance.
-
-Tests:
-    1. Chunk size distribution after re-chunking
-    2. Overlap verification between consecutive chunks
-    3. Semantic search quality on various queries
-    4. Comparison of results from giant chunks vs optimized chunks
-"""
-
-import weaviate
-import sys
-import requests
-from pathlib import Path
-
-# Add utils to path
-sys.path.insert(0, str(Path(__file__).parent / "generations" / "library_rag"))
-
-from utils.llm_chunker_improved import estimate_tokens
-
-if sys.stdout.encoding != 'utf-8':
-    sys.stdout.reconfigure(encoding='utf-8')
-
-# Vectorizer URL (same as in 11_vectorize_missing_chunks.py)
-VECTORIZER_URL = "http://localhost:8090/vectors"
-
-def vectorize_query(query: str) -> list[float]:
-    """Manually vectorize a query using text2vec-transformers service.
-
-    Args:
-        query: Query text to vectorize
-
-    Returns:
-        Vector as list of floats (1024 dimensions for BGE-M3)
-    """
-    response = requests.post(
-        VECTORIZER_URL,
-        json={"text": query},
-        headers={"Content-Type": "application/json"},
-        timeout=30
-    )
-    if response.status_code != 200:
-        raise RuntimeError(f"Vectorization failed: HTTP {response.status_code}")
-
-    result = response.json()
-    vector = result.get('vector')
-    if not vector:
-        raise RuntimeError("No vector in response")
-
-    return vector
-
-client = weaviate.connect_to_local()
-
-try:
-    print("=" * 80)
-    print("TEST DE LA QUALITÉ DE RECHERCHE APRÈS RE-CHUNKING")
-    print("=" * 80)
-    print()
-
-    chunk_v2 = client.collections.get("Chunk_v2")
-
-    # ========== 1. DISTRIBUTION DES TAILLES ==========
-    print("1. DISTRIBUTION DES TAILLES DE CHUNKS")
-    print("-" * 80)
-    print()
-
-    print("Analyse en cours...")
-
-    sizes = []
-    for chunk in chunk_v2.iterator(include_vector=False):
-        text = chunk.properties.get('text', '')
-        tokens = estimate_tokens(text)
-        sizes.append(tokens)
-
-    total = len(sizes)
-    avg = sum(sizes) / total
-    max_size = max(sizes)
-    min_size = min(sizes)
-
-    print(f"Total chunks: {total:,}")
-    print(f"Taille moyenne: {avg:.0f} tokens")
-    print(f"Min: {min_size} tokens")
-    print(f"Max: {max_size} tokens")
-    print()
-
-    # Distribution par tranches
-    ranges = [
-        (0, 500, "Très petits"),
-        (500, 1000, "Petits"),
-        (1000, 1500, "Moyens"),
-        (1500, 2000, "Grands"),
-        (2000, 3000, "Très grands"),
-        (3000, 10000, "ÉNORMES"),
-    ]
-
-    print("Distribution par tranches:")
-    for min_tok, max_tok, label in ranges:
-        count = sum(1 for s in sizes if min_tok <= s < max_tok)
-        percentage = count / total * 100
-        bar = "█" * int(percentage / 2)
-        print(f"  {min_tok:>5}-{max_tok:>5} tokens ({label:15}): {count:>5} ({percentage:>5.1f}%) {bar}")
-
-    print()
-
-    # ========== 2. VÉRIFICATION OVERLAP ==========
-    print("2. VÉRIFICATION DE L'OVERLAP ENTRE CHUNKS CONSÉCUTIFS")
-    print("-" * 80)
-    print()
-
-    # Prendre une œuvre pour vérifier l'overlap
-    print("Analyse de l'overlap dans 'Between Past and Future'...")
-
-    arendt_chunks = []
-    for chunk in chunk_v2.iterator(include_vector=False):
-        props = chunk.properties
-        if props.get('workTitle') == 'Between Past and Future':
-            arendt_chunks.append({
-                'orderIndex': props.get('orderIndex', 0),
-                'text': props.get('text', ''),
-                'sectionPath': props.get('sectionPath', '')
-            })
-
-    # Trier par orderIndex
-    arendt_chunks.sort(key=lambda x: x['orderIndex'])
-
-    print(f"Chunks trouvés: {len(arendt_chunks)}")
-    print()
-
-    # Vérifier overlap entre chunks consécutifs de même section
-    overlaps_found = 0
-    overlaps_checked = 0
-
-    for i in range(len(arendt_chunks) - 1):
-        current = arendt_chunks[i]
-        next_chunk = arendt_chunks[i + 1]
-
-        # Vérifier si même section (potentiellement des chunks split)
-        if current['sectionPath'] == next_chunk['sectionPath']:
-            # Extraire les derniers 200 caractères du chunk actuel
-            current_end = current['text'][-200:].strip()
-            # Extraire les premiers 200 caractères du chunk suivant
-            next_start = next_chunk['text'][:200].strip()
-
-            # Chercher overlap
-            overlap_found = False
-            for length in range(50, 201, 10):  # Tester différentes longueurs
-                if len(current_end) < length or len(next_start) < length:
-                    continue
-
-                test_end = current_end[-length:]
-                if test_end in next_start:
-                    overlap_found = True
-                    overlaps_found += 1
-                    break
-
-            overlaps_checked += 1
-
-    if overlaps_checked > 0:
-        print(f"Chunks consécutifs vérifiés: {overlaps_checked}")
-        print(f"Overlaps détectés: {overlaps_found} ({overlaps_found/overlaps_checked*100:.1f}%)")
-    else:
-        print("Aucun chunk consécutif dans la même section (pas de split détecté)")
-
-    print()
-
-    # ========== 3. TESTS DE RECHERCHE SÉMANTIQUE ==========
-    print("3. TESTS DE RECHERCHE SÉMANTIQUE")
-    print("-" * 80)
-    print()
-
-    test_queries = [
-        {
-            "query": "What is the nature of representation in cognitive science?",
-            "expected_work": "Mind Design III",
-            "description": "Requête philosophique complexe"
-        },
-        {
-            "query": "Comment définit-on la vertu selon Platon?",
-            "expected_work": "Platon - Ménon",
-            "description": "Requête en français sur un concept spécifique"
-        },
-        {
-            "query": "pragmatism and belief fixation",
-            "expected_work": "Collected papers",
-            "description": "Concepts multiples (test de granularité)"
-        },
-        {
-            "query": "Entre la logique des termes et la grammaire spéculative",
-            "expected_work": "La pensée-signe",
-            "description": "Requête technique académique"
-        },
-    ]
-
-    for i, test in enumerate(test_queries, 1):
-        print(f"Test {i}: {test['description']}")
-        print(f"Query: \"{test['query']}\"")
-        print()
-
-        # Vectorize query and search with near_vector
-        # (Chunk_v2 has no vectorizer, so we must manually vectorize queries)
-        query_vector = vectorize_query(test['query'])
-        result = chunk_v2.query.near_vector(
-            near_vector=query_vector,
-            limit=5,
-            return_properties=[
-                'text', 'workTitle', 'workAuthor',
-                'sectionPath', 'chapterTitle'
-            ],
-            return_metadata=['distance']
-        )
-
-        if not result.objects:
-            print("  ❌ Aucun résultat trouvé")
-            print()
-            continue
-
-        # Analyser les résultats
-        print(f"  Résultats: {len(result.objects)}")
-        print()
-
-        for j, obj in enumerate(result.objects, 1):
-            props = obj.properties
-            work_title = props.get('workTitle', 'N/A')
-            text = props.get('text', '')
-            tokens = estimate_tokens(text)
-
-            # Distance (si disponible)
-            distance = getattr(obj.metadata, 'distance', None) if hasattr(obj, 'metadata') else None
-            distance_str = f" (distance: {distance:.4f})" if distance else ""
-
-            # Marquer si c'est l'œuvre attendue
-            match_icon = "✓" if test['expected_work'] in work_title else " "
-
-            print(f"  [{match_icon}] {j}. {work_title}{distance_str}")
-            print(f"      Taille: {tokens} tokens")
-            print(f"      Section: {props.get('sectionPath', 'N/A')[:60]}...")
-            print(f"      Extrait: {text[:120]}...")
-            print()
-
-        # Vérifier si l'œuvre attendue est dans les résultats
-        found_expected = any(
-            test['expected_work'] in obj.properties.get('workTitle', '')
-            for obj in result.objects
-        )
-
-        if found_expected:
-            rank = next(
-                i for i, obj in enumerate(result.objects, 1)
-                if test['expected_work'] in obj.properties.get('workTitle', '')
-            )
-            print(f"  ✅ Œuvre attendue trouvée (rang {rank}/5)")
-        else:
-            print(f"  ⚠️  Œuvre attendue '{test['expected_work']}' non trouvée dans le top 5")
-
-        print()
-        print("-" * 80)
-        print()
-
-    # ========== 4. STATISTIQUES GLOBALES ==========
-    print("4. STATISTIQUES GLOBALES DE RECHERCHE")
-    print("-" * 80)
-    print()
-
-    # Tester une requête large
-    broad_query = "philosophy and logic"
-    print(f"Requête large: \"{broad_query}\"")
-    print()
-
-    query_vector = vectorize_query(broad_query)
-    result = chunk_v2.query.near_vector(
-        near_vector=query_vector,
-        limit=20,
-        return_properties=['workTitle', 'text']
-    )
-
-    # Compter par œuvre
-    work_distribution = {}
-    chunk_sizes_in_results = []
-
-    for obj in result.objects:
-        props = obj.properties
-        work = props.get('workTitle', 'Unknown')
-        work_distribution[work] = work_distribution.get(work, 0) + 1
-
-        text = props.get('text', '')
-        tokens = estimate_tokens(text)
-        chunk_sizes_in_results.append(tokens)
-
-    print(f"Résultats par œuvre (top 20):")
-    for work, count in sorted(work_distribution.items(), key=lambda x: x[1], reverse=True):
-        print(f"  • {work}: {count} chunks")
-
-    print()
-
-    if chunk_sizes_in_results:
-        avg_result_size = sum(chunk_sizes_in_results) / len(chunk_sizes_in_results)
-        max_result_size = max(chunk_sizes_in_results)
-        print(f"Taille moyenne des chunks retournés: {avg_result_size:.0f} tokens")
-        print(f"Taille max des chunks retournés: {max_result_size} tokens")
-
-    print()
-
-    # ========== 5. SCORE DE QUALITÉ ==========
-    print("5. SCORE DE QUALITÉ DE LA RECHERCHE")
-    print("-" * 80)
-    print()
-
-    quality_checks = []
-
-    # Check 1: Aucun chunk > 2000 tokens
-    oversized = sum(1 for s in sizes if s > 2000)
-    quality_checks.append({
-        'name': 'Taille des chunks',
-        'passed': oversized == 0,
-        'detail': f'{oversized} chunks > 2000 tokens'
-    })
-
-    # Check 2: Distribution équilibrée
-    optimal_range = sum(1 for s in sizes if 200 <= s <= 1500)
-    optimal_percentage = optimal_range / total * 100
-    quality_checks.append({
-        'name': 'Distribution optimale',
-        'passed': optimal_percentage >= 80,
-        'detail': f'{optimal_percentage:.1f}% dans range optimal (200-1500 tokens)'
-    })
-
-    # Check 3: Résultats variés
-    unique_works = len(work_distribution)
-    quality_checks.append({
-        'name': 'Diversité des résultats',
-        'passed': unique_works >= 3,
-        'detail': f'{unique_works} œuvres différentes dans top 20'
-    })
-
-    # Check 4: Overlap présent
-    quality_checks.append({
-        'name': 'Overlap entre chunks',
-        'passed': overlaps_found > 0 if overlaps_checked > 0 else None,
-        'detail': f'{overlaps_found}/{overlaps_checked} overlaps détectés' if overlaps_checked > 0 else 'N/A'
-    })
-
-    # Afficher les résultats
-    passed = sum(1 for c in quality_checks if c['passed'] is True)
-    total_checks = sum(1 for c in quality_checks if c['passed'] is not None)
-
-    for check in quality_checks:
-        if check['passed'] is None:
-            icon = "⚠️"
-            status = "N/A"
-        elif check['passed']:
-            icon = "✅"
-            status = "OK"
-        else:
-            icon = "❌"
-            status = "FAIL"
-
-        print(f"{icon} {check['name']}: {status}")
-        print(f"   {check['detail']}")
-
-    print()
-    print(f"Score: {passed}/{total_checks} ({passed/total_checks*100:.0f}%)")
-    print()
-
-    # ========== 6. RÉSUMÉ ==========
-    print("=" * 80)
-    print("RÉSUMÉ DU TEST")
-    print("=" * 80)
-    print()
-
-    if passed >= total_checks * 0.8:
-        print("✅ QUALITÉ DE RECHERCHE: EXCELLENTE")
-        print()
-        print("Les chunks re-chunkés ont amélioré la recherche:")
-        print(f"  • {total:,} chunks optimisés")
-        print(f"  • Taille moyenne: {avg:.0f} tokens (optimal)")
-        print(f"  • {optimal_percentage:.1f}% dans la plage optimale")
-        print(f"  • Max: {max_size} tokens (< 2500)")
-        print(f"  • Overlap détecté: {overlaps_found > 0 if overlaps_checked > 0 else 'N/A'}")
-        print()
-        print("Recommandations:")
-        print("  ✓ La recherche sémantique fonctionne correctement")
-        print("  ✓ Les chunks sont de taille optimale pour BGE-M3")
-        print("  ✓ Le système est prêt pour la production")
-    elif passed >= total_checks * 0.6:
-        print("⚠️  QUALITÉ DE RECHERCHE: BONNE")
-        print()
-        print("Quelques améliorations possibles:")
-        for check in quality_checks:
-            if not check['passed'] and check['passed'] is not None:
-                print(f"  • {check['name']}: {check['detail']}")
-    else:
-        print("❌ QUALITÉ DE RECHERCHE: À AMÉLIORER")
-        print()
-        print("Problèmes détectés:")
-        for check in quality_checks:
-            if not check['passed'] and check['passed'] is not None:
-                print(f"  • {check['name']}: {check['detail']}")
-
-finally:
-    client.close()
diff --git a/11_vectorize_missing_chunks.py b/11_vectorize_missing_chunks.py
deleted file mode 100644
index 7dd7aaf..0000000
--- a/11_vectorize_missing_chunks.py
+++ /dev/null
@@ -1,217 +0,0 @@
-"""Vectorize chunks that don't have vectors.
-
-After re-chunking, new chunks were created without vectors because Chunk_v2
-collection has no vectorizer configured. This script manually vectorizes
-these chunks using the text2vec-transformers service.
-"""
-
-import weaviate
-import sys
-import requests
-
-if sys.stdout.encoding != 'utf-8':
-    sys.stdout.reconfigure(encoding='utf-8')
-
-# Try to import tqdm
-try:
-    from tqdm import tqdm
-    HAS_TQDM = True
-except ImportError:
-    HAS_TQDM = False
-
-# Text2vec-transformers service URL (from docker-compose.yml)
-VECTORIZER_URL = "http://localhost:8090/vectors"
-
-client = weaviate.connect_to_local()
-
-try:
-    print("=" * 80)
-    print("VECTORISATION DES CHUNKS SANS VECTEUR")
-    print("=" * 80)
-    print()
-
-    chunk_v2 = client.collections.get("Chunk_v2")
-
-    # ========== 1. IDENTIFIER LES CHUNKS SANS VECTEUR ==========
-    print("1. IDENTIFICATION DES CHUNKS SANS VECTEUR")
-    print("-" * 80)
-    print()
-
-    print("Analyse en cours...")
-
-    chunks_to_vectorize = []
-
-    for chunk in chunk_v2.iterator(include_vector=True):
-        if not chunk.vector or not chunk.vector.get('default'):
-            props = chunk.properties
-            chunks_to_vectorize.append({
-                'uuid': chunk.uuid,
-                'text': props.get('text', ''),
-                'summary': props.get('summary', ''),
-                'keywords': props.get('keywords', []),
-                'workTitle': props.get('workTitle', 'N/A')
-            })
-
-    print(f"✓ {len(chunks_to_vectorize)} chunks sans vecteur trouvés")
-    print()
-
-    if not chunks_to_vectorize:
-        print("✅ Aucun chunk à vectoriser")
-        print()
-        print("=" * 80)
-        print("SCRIPT TERMINÉ - RIEN À FAIRE")
-        print("=" * 80)
-        sys.exit(0)
-
-    # ========== 2. VECTORISATION ==========
-    print("2. VECTORISATION DES CHUNKS")
-    print("-" * 80)
-    print()
-
-    print(f"Service vectorizer: {VECTORIZER_URL}")
-    print()
-
-    vectorized_count = 0
-    errors = []
-
-    # Create iterator with or without tqdm
-    if HAS_TQDM:
-        iterator = tqdm(
-            chunks_to_vectorize,
-            desc="Vectorisation",
-            unit="chunks"
-        )
-    else:
-        iterator = chunks_to_vectorize
-        print("Vectorisation en cours...")
-
-    for idx, chunk_data in enumerate(iterator, 1):
-        try:
-            # Prepare text for vectorization
-            # Combine text, summary, and keywords as per original Chunk schema
-            text_parts = [chunk_data['text']]
-
-            if chunk_data['summary']:
-                text_parts.append(chunk_data['summary'])
-
-            if chunk_data['keywords']:
-                text_parts.append(' '.join(chunk_data['keywords']))
-
-            combined_text = ' '.join(text_parts)
-
-            # Call text2vec-transformers service
-            response = requests.post(
-                VECTORIZER_URL,
-                json={"text": combined_text},
-                headers={"Content-Type": "application/json"},
-                timeout=30
-            )
-
-            if response.status_code != 200:
-                errors.append(f"Chunk {str(chunk_data['uuid'])[:8]}: HTTP {response.status_code}")
-                continue
-
-            result = response.json()
-            vector = result.get('vector')
-
-            if not vector:
-                errors.append(f"Chunk {str(chunk_data['uuid'])[:8]}: Pas de vecteur dans la réponse")
-                continue
-
-            # Update chunk with vector
-            chunk_v2.data.update(
-                uuid=chunk_data['uuid'],
-                vector=vector
-            )
-
-            vectorized_count += 1
-
-            # Progress without tqdm
-            if not HAS_TQDM and idx % 10 == 0:
-                print(f"  {idx}/{len(chunks_to_vectorize)} chunks vectorisés...")
-
-        except requests.exceptions.RequestException as e:
-            errors.append(f"Chunk {str(chunk_data['uuid'])[:8]}: Erreur réseau - {e}")
-        except Exception as e:
-            errors.append(f"Chunk {str(chunk_data['uuid'])[:8]}: {e}")
-
-    print()
-    print("-" * 80)
-    print(f"✓ Chunks vectorisés: {vectorized_count}/{len(chunks_to_vectorize)}")
-
-    if errors:
-        print()
-        print(f"⚠️  Erreurs rencontrées: {len(errors)}")
-        for err in errors[:10]:
-            print(f"  - {err}")
-        if len(errors) > 10:
-            print(f"  ... et {len(errors) - 10} autres")
-
-    print()
-
-    # ========== 3. VÉRIFICATION ==========
-    print("3. VÉRIFICATION POST-VECTORISATION")
-    print("-" * 80)
-    print()
-
-    print("Recomptage...")
-    remaining_without_vector = 0
-    total_chunks = 0
-
-    for chunk in chunk_v2.iterator(include_vector=True):
-        total_chunks += 1
-        if not chunk.vector or not chunk.vector.get('default'):
-            remaining_without_vector += 1
-
-    chunks_with_vector = total_chunks - remaining_without_vector
-
-    print(f"✓ Total chunks: {total_chunks:,}")
-    print(f"✓ Avec vecteur: {chunks_with_vector:,} ({chunks_with_vector/total_chunks*100:.1f}%)")
-    print(f"✓ Sans vecteur: {remaining_without_vector:,}")
-
-    print()
-
-    if remaining_without_vector == 0:
-        print("✅ Tous les chunks ont été vectorisés!")
-    else:
-        print(f"⚠️  {remaining_without_vector} chunks encore sans vecteur")
-        print("   Relancer le script ou vérifier les erreurs")
-
-    print()
-    print("=" * 80)
-    print("VECTORISATION TERMINÉE")
-    print("=" * 80)
-    print()
-
-    if remaining_without_vector == 0:
-        print("✅ VECTORISATION RÉUSSIE")
-        print()
-        print("RÉSULTATS:")
-        print(f"  • {vectorized_count} nouveaux vecteurs créés")
-        print(f"  • {total_chunks:,} chunks totaux")
-        print(f"  • 100% des chunks ont des vecteurs")
-        print()
-        print("PROCHAINES ÉTAPES:")
-        print("  1. Relancer le test de recherche: python 10_test_search_quality.py")
-        print("  2. Tester l'application Flask")
-        print()
-        print("NOTE: Chunk_v2 n'a toujours pas de vectorizer configuré.")
-        print("Les futurs nouveaux chunks devront être vectorisés manuellement")
-        print("OU la collection devra être recréée avec un vectorizer.")
-    elif vectorized_count > 0:
-        print("⚠️  VECTORISATION PARTIELLE")
-        print()
-        print(f"  • {vectorized_count} chunks vectorisés")
-        print(f"  • {remaining_without_vector} chunks restants")
-        print("  • Vérifier les erreurs et relancer")
-    else:
-        print("❌ VECTORISATION ÉCHOUÉE")
-        print()
-        print("Aucun chunk n'a pu être vectorisé.")
-        print("Vérifications:")
-        print(f"  1. Service text2vec-transformers actif: {VECTORIZER_URL}")
-        print("  2. Docker containers en cours d'exécution")
-        print("  3. Logs des erreurs ci-dessus")
-
-finally:
-    client.close()
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..0dbe17c
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,136 @@
+# Changelog - Library RAG Project
+
+## 2026-01-08 - Chunking Optimization & Vectorization
+
+### Chunking Improvements
+- **Strict chunk size limits**: Max 1000 words (down from 1500-2000)
+- **Overlap implementation**: 100-word overlap between consecutive chunks
+- **Triple fallback system**: Ensures robust chunking even on LLM failures
+- **New module**: `llm_chunker_improved.py` with overlap functionality
+
+### Re-chunking Results
+- Identified 31 oversized chunks (>2000 tokens, max 7,158)
+- Split into 92 optimally-sized chunks
+- **Result**: 0 chunks > 2000 tokens (100% within BGE-M3 limits)
+- Preserved all metadata during split (workTitle, workAuthor, sectionPath, orderIndex)
+
+### Vectorization
+- Created manual vectorization system for Chunk_v2 (no vectorizer configured)
+- Successfully vectorized 92 new chunks via text2vec-transformers API
+- **Result**: 5,304/5,304 chunks with vectors (100% coverage)
+
+### Docker Configuration
+- Exposed text2vec-transformers port (8090:8080) for external vectorization
+- Added cluster configuration to fix "No private IP address found" error
+- Increased WORKER_TIMEOUT to 600s for very large chunks
+
+### Search Quality
+- Created comprehensive test suite (`10_test_search_quality.py`)
+- Tests: distribution, overlap detection, semantic search (4 queries)
+- Search now uses `near_vector()` with manual query vectorization
+- **Issue identified**: Collected papers dominates results (95.8% of chunks)
+
+### Database Stats (Post-Optimization)
+- Total chunks: 5,304
+- Average size: 289 tokens (optimal for BGE-M3)
+- Distribution: 84.6% < 500 tokens, 11.5% 500-1000, 3.0% 1000-1500
+- Works: 8 (Collected papers: 5,080 chunks, Mind Design III: 61, Platon Ménon: 56, etc.)
+
+---
+
+## 2025-01 - Weaviate v2 Migration & GPU Integration
+
+### Phase 1-3: Schema Migration (Complete)
+- Migrated from Chunk/Summary/Document to Chunk_v2/Summary_v2/Work
+- Removed nested `document` object, added direct properties (workTitle, workAuthor, year, language)
+- Work collection with sourceId for documents
+- Fixed 114 summaries missing properties
+- Deleted vL-jepa chunks (17), fixed null workTitles
+
+### Phase 4: Memory System (Complete)
+- Added Thought/Message/Conversation collections to Weaviate
+- 9 MCP tools for memory management (add_thought, search_thoughts, etc.)
+- GPU embeddings integration (BAAI/bge-m3, RTX 4070)
+- Data: 102 Thoughts, 377 Messages, 12 Conversations
+
+### Phase 5: Backend Integration (Complete)
+- Integrated GPU embedder into Flask app (singleton pattern)
+- All search routes now use manual vectorization with `near_vector()`
+- Updated all routes: simple_search, hierarchical_search, summary_only_search, rag_search
+- Fixed Work → Chunk/Summary property mapping (v2 schema)
+
+### Phase 6-7: Testing & Optimization
+- Comprehensive testing of search routes
+- MCP tools validation
+- Performance optimization with GPU embeddings
+- Documentation updates (README.md, CLAUDE.md)
+
+### Phase 8: Documentation Cleanup
+- Consolidated all phase documentation
+- Updated README with Memory MCP tools section
+- Cleaned up temporary files and scripts
+
+---
+
+## Archive Structure
+
+```
+archive/
+├── migration_scripts/        # Migration & optimization scripts (01-11)
+│   ├── 01_migrate_document_to_work.py
+│   ├── 02_create_schema_v2.py
+│   ├── 03_migrate_chunks_v2.py
+│   ├── 04_migrate_summaries_v2.py
+│   ├── 05_validate_migration.py
+│   ├── 07_cleanup.py
+│   ├── 08_fix_summaries_properties.py
+│   ├── 09_rechunk_oversized.py
+│   ├── 10_test_search_quality.py
+│   ├── 11_vectorize_missing_chunks.py
+│   └── old_scripts/          # ChromaDB migration scripts
+├── migration_docs/           # Detailed migration documentation
+│   ├── PLAN_MIGRATION_V2_SANS_DOCUMENT.md
+│   ├── PHASE5_BACKEND_INTEGRATION.md
+│   └── WEAVIATE_RETRIEVAL_ARCHITECTURE.md
+├── documentation/            # Phase summaries
+│   ├── PHASE_0_PYTORCH_CUDA.md
+│   ├── PHASE_2_MIGRATION_SUMMARY.md
+│   ├── PHASE_3_CONVERSATIONS_SUMMARY.md
+│   ├── PHASE_4_MIGRATION_CHROMADB.md
+│   ├── PHASE_5_MCP_TOOLS.md
+│   ├── PHASE_6_TESTS_OPTIMISATION.md
+│   ├── PHASE_7_INTEGRATION_BACKEND.md
+│   ├── PHASE_8_DOCUMENTATION_CLEANUP.md
+│   └── MIGRATION_README.md
+└── backups/                  # Pre-migration data backups
+    └── pre_migration_20260108_152033/
+```
+
+---
+
+## Technology Stack
+
+**Vector Database**: Weaviate 1.34.4 with BAAI/bge-m3 embeddings (1024-dim)
+**Embedder**: PyTorch 2.6.0+cu124, GPU RTX 4070
+**Backend**: Flask 3.0 with Server-Sent Events
+**MCP Integration**: 9 memory tools + 6 RAG tools for Claude Desktop
+**OCR**: Mistral OCR API
+**LLM**: Ollama (local) or Mistral API
+
+---
+
+## Known Issues
+
+1. **Chunk_v2 has no vectorizer**: All new chunks require manual vectorization via `11_vectorize_missing_chunks.py`
+2. **Data imbalance**: Collected papers represents 95.8% of chunks, dominating search results
+3. **Mind Design III underrepresented**: Only 61 chunks (1.2%) vs 5,080 for Collected papers
+
+## Recommendations
+
+1. Add more diverse works to balance corpus
+2. Consider re-ranking with per-work boosting for diversity
+3. Recreate Chunk_v2 with text2vec-transformers vectorizer for auto-vectorization (requires full data reload)
+
+---
+
+For detailed implementation notes, see `.claude/CLAUDE.md` and `archive/` directories.
diff --git a/check_linear_status.py b/check_linear_status.py
deleted file mode 100644
index 9febbb3..0000000
--- a/check_linear_status.py
+++ /dev/null
@@ -1,174 +0,0 @@
-"""
-Script pour vérifier l'état actuel des issues Linear du projet library_rag.
-
-Affiche :
-- Nombre total d'issues
-- Nombre d'issues par statut (Todo, In Progress, Done)
-- Liste des issues In Progress (si présentes)
-- Liste des issues Todo avec priorité 1 ou 2
-"""
-
-import os
-import json
-import requests
-from pathlib import Path
-from dotenv import load_dotenv
-
-# Load environment variables
-load_dotenv()
-
-LINEAR_API_KEY = os.environ.get("LINEAR_API_KEY")
-if not LINEAR_API_KEY:
-    print("❌ LINEAR_API_KEY not found in .env file")
-    exit(1)
-
-# Read project info
-project_file = Path("generations/library_rag/.linear_project.json")
-if not project_file.exists():
-    print(f"❌ Project file not found: {project_file}")
-    exit(1)
-
-with open(project_file) as f:
-    project_info = json.load(f)
-
-project_id = project_info.get("project_id")
-team_id = project_info.get("team_id")
-total_issues_created = project_info.get("total_issues", 0)
-
-print("=" * 80)
-print(f"LINEAR STATUS CHECK - Project: {project_info.get('project_name')}")
-print(f"URL: {project_info.get('project_url')}")
-print(f"Total issues created historically: {total_issues_created}")
-print("=" * 80)
-print()
-
-# GraphQL query to list all issues in the project
-query = """
-query($projectId: String!) {
-  project(id: $projectId) {
-    issues(first: 200) {
-      nodes {
-        id
-        identifier
-        title
-        priority
-        state {
-          name
-        }
-        createdAt
-      }
-    }
-  }
-}
-"""
-
-headers = {
-    "Authorization": LINEAR_API_KEY,
-    "Content-Type": "application/json"
-}
-
-response = requests.post(
-    "https://api.linear.app/graphql",
-    headers=headers,
-    json={"query": query, "variables": {"projectId": project_id}}
-)
-
-if response.status_code != 200:
-    print(f"❌ Linear API error: {response.status_code}")
-    print(response.text)
-    exit(1)
-
-data = response.json()
-
-if "errors" in data:
-    print(f"❌ GraphQL errors: {data['errors']}")
-    exit(1)
-
-issues = data["data"]["project"]["issues"]["nodes"]
-
-# Count by status
-status_counts = {
-    "Todo": 0,
-    "In Progress": 0,
-    "Done": 0,
-    "Other": 0
-}
-
-issues_by_status = {
-    "Todo": [],
-    "In Progress": [],
-    "Done": []
-}
-
-for issue in issues:
-    state_name = issue["state"]["name"]
-    if state_name in status_counts:
-        status_counts[state_name] += 1
-        issues_by_status[state_name].append(issue)
-    else:
-        status_counts["Other"] += 1
-
-# Display summary
-print(f"STATUS SUMMARY:")
-print(f"   Done:        {status_counts['Done']}")
-print(f"   In Progress: {status_counts['In Progress']}")
-print(f"   Todo:        {status_counts['Todo']}")
-print(f"   Other:       {status_counts['Other']}")
-print(f"   TOTAL:       {len(issues)}")
-print()
-
-# Check for issues In Progress (potential blocker)
-if status_counts["In Progress"] > 0:
-    print("WARNING: There are 'In Progress' issues:")
-    print()
-    for issue in issues_by_status["In Progress"]:
-        priority = issue.get("priority", "N/A")
-        print(f"   [IN PROGRESS] {issue['identifier']} - Priority {priority}")
-        print(f"      {issue['title']}")
-        print()
-    print("! The agent will resume these issues first!")
-    print()
-
-# List high-priority Todo issues
-high_priority_todo = [
-    issue for issue in issues_by_status["Todo"]
-    if issue.get("priority") in [1, 2]
-]
-
-if high_priority_todo:
-    print(f"HIGH PRIORITY TODO (Priority 1-2): {len(high_priority_todo)}")
-    print()
-    for issue in sorted(high_priority_todo, key=lambda x: x.get("priority", 99)):
-        priority = issue.get("priority", "N/A")
-        print(f"   [TODO] {issue['identifier']} - Priority {priority}")
-        print(f"      {issue['title'][:80]}")
-        print()
-
-# List all Todo issues (for reference)
-if status_counts["Todo"] > 0:
-    print(f"ALL TODO ISSUES: {status_counts['Todo']}")
-    print()
-    for issue in sorted(issues_by_status["Todo"], key=lambda x: x.get("priority", 99)):
-        priority = issue.get("priority", "N/A")
-        title = issue['title'][:60] + "..." if len(issue['title']) > 60 else issue['title']
-        print(f"   {issue['identifier']} [P{priority}] {title}")
-    print()
-
-# Recommendation
-print("=" * 80)
-if status_counts["In Progress"] > 0:
-    print("RECOMMENDATION:")
-    print("   - There are 'In Progress' issues that should be finished first")
-    print("   - Before adding new issues, check if these should be:")
-    print("     1. Completed")
-    print("     2. Cancelled (moved back to Todo)")
-    print("     3. Deleted")
-elif status_counts["Todo"] > 10:
-    print("RECOMMENDATION:")
-    print(f"   - There are {status_counts['Todo']} Todo issues pending")
-    print("   - Consider finishing them before adding new ones")
-else:
-    print("RECOMMENDATION:")
-    print("   - Project is in good state to add new issues")
-    print("   - You can proceed with --new-spec")
-print("=" * 80)
diff --git a/check_meta_issue.py b/check_meta_issue.py
deleted file mode 100644
index 1a7cc95..0000000
--- a/check_meta_issue.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""
-Vérifier si le META issue existe toujours dans Linear.
-"""
-
-import os
-import json
-import requests
-from pathlib import Path
-from dotenv import load_dotenv
-
-load_dotenv()
-
-LINEAR_API_KEY = os.environ.get("LINEAR_API_KEY")
-if not LINEAR_API_KEY:
-    print("ERROR: LINEAR_API_KEY not found")
-    exit(1)
-
-# Read project info
-project_file = Path("generations/library_rag/.linear_project.json")
-with open(project_file) as f:
-    project_info = json.load(f)
-
-meta_issue_id = project_info.get("meta_issue_id")
-project_id = project_info.get("project_id")
-
-print("=" * 80)
-print("Checking META issue existence...")
-print(f"META issue ID from .linear_project.json: {meta_issue_id}")
-print("=" * 80)
-print()
-
-# Try to fetch the META issue
-query = """
-query($issueId: String!) {
-  issue(id: $issueId) {
-    id
-    identifier
-    title
-    state {
-      name
-    }
-  }
-}
-"""
-
-headers = {
-    "Authorization": LINEAR_API_KEY,
-    "Content-Type": "application/json"
-}
-
-response = requests.post(
-    "https://api.linear.app/graphql",
-    headers=headers,
-    json={"query": query, "variables": {"issueId": meta_issue_id}}
-)
-
-if response.status_code != 200:
-    print(f"ERROR: Linear API error: {response.status_code}")
-    exit(1)
-
-data = response.json()
-
-if "errors" in data:
-    print("META ISSUE NOT FOUND (was deleted)")
-    print()
-    print("SOLUTION: Need to recreate META issue or reset .linear_project.json")
-    exit(1)
-
-issue = data["data"]["issue"]
-if issue is None:
-    print("META ISSUE NOT FOUND (was deleted)")
-    print()
-    print("SOLUTION: Need to recreate META issue or reset .linear_project.json")
-    exit(1)
-
-print(f"META issue EXISTS:")
-print(f"   ID: {issue['id']}")
-print(f"   Identifier: {issue['identifier']}")
-print(f"   Title: {issue['title']}")
-print(f"   State: {issue['state']['name']}")
-print()
-print("OK - Can proceed with agent")
diff --git a/dockerize_ikario_body.py b/dockerize_ikario_body.py
deleted file mode 100644
index ddd5703..0000000
--- a/dockerize_ikario_body.py
+++ /dev/null
@@ -1,93 +0,0 @@
-"""
-Dockerization helper for ikario_body
-===================================
-
-Ce script crée les fichiers Docker nécessaires pour exécuter l'application
-`generations/ikario_body` (frontend + serveur + base SQLite) dans Docker,
-SANS modifier aucun fichier existant.
-
-Il génère un fichier de composition :
-  - docker-compose.ikario_body.yml  (à la racine du repo)
-
-Ce fichier utilise l'image officielle Node et monte le code existant
-ainsi que la base SQLite dans les conteneurs (mode développement).
-
-Utilisation :
-  1) Depuis la racine du repo :
-       python dockerize_ikario_body.py
-  2) Puis pour lancer l'appli dans Docker :
-       docker compose -f docker-compose.ikario_body.yml up
-     ou, selon votre installation :
-       docker-compose -f docker-compose.ikario_body.yml up
-
-  - Frontend accessible sur: http://localhost:3000
-  - API backend (server) sur : http://localhost:3001
-"""
-
-from pathlib import Path
-
-
-def generate_docker_compose(root: Path) -> None:
-  """Génère le fichier docker-compose.ikario_body.yml sans toucher au code existant."""
-  project_dir = root / "generations" / "ikario_body"
-
-  if not project_dir.exists():
-    raise SystemExit(f"Project directory not found: {project_dir}")
-
-  compose_path = root / "docker-compose.ikario_body.yml"
-
-  # On utilise les scripts npm déjà définis :
-  # - frontend: npm run dev (Vite) en écoutant sur 0.0.0.0:3000 (dans le conteneur)
-  # - server:   npm start dans ./server sur 3001 (dans le conteneur)
-  #
-  # Pour éviter les conflits de ports courants (3000/3001) sur la machine hôte,
-  # on mappe vers des ports plus élevés côté host :
-  #   - frontend : host 4300 -> container 3000
-  #   - backend  : host 4301 -> container 3001
-  #
-  # Le volume ./generations/ikario_body est monté dans /app,
-  # ce qui inclut aussi la base SQLite dans server/data/claude-clone.db.
-  compose_content = f"""services:
-  ikario_body_frontend:
-    image: node:20
-    working_dir: /app
-    volumes:
-      - ./generations/ikario_body:/app
-      # Eviter de réutiliser les node_modules Windows dans le conteneur Linux
-      - /app/node_modules
-    command: ["sh", "-c", "npm install && npm run dev -- --host 0.0.0.0 --port 3000"]
-    ports:
-      - "4300:3000"
-    environment:
-      - NODE_ENV=development
-
-  ikario_body_server:
-    image: node:20
-    working_dir: /app/server
-    volumes:
-      - ./generations/ikario_body:/app
-      # Eviter de réutiliser les node_modules Windows dans le conteneur Linux
-      - /app/server/node_modules
-    command: ["sh", "-c", "npm install && npm start"]
-    ports:
-      - "4301:3001"
-    environment:
-      - NODE_ENV=development
-    depends_on:
-      - ikario_body_frontend
-
-"""
-
-  compose_path.write_text(compose_content, encoding="utf-8")
-  print(f"Created {compose_path.relative_to(root)}")
-
-
-def main() -> None:
-  repo_root = Path(__file__).resolve().parent
-  generate_docker_compose(repo_root)
-
-
-if __name__ == "__main__":
-  main()
-
-
diff --git a/generations/library_rag/README.md b/generations/library_rag/README.md
index 92b85b7..6a7c2b1 100644
--- a/generations/library_rag/README.md
+++ b/generations/library_rag/README.md
@@ -451,7 +451,101 @@ filter_by_author(author="Platon")
 delete_document(source_id="platon-menon", confirm=true)
 ```
 
-Pour plus de détails, voir la documentation complète dans `.claude/CLAUDE.md`.
+### Outils MCP Memory (9 outils intégrés - Phase 4)
+
+**Système de Mémoire Unifié** : Le serveur MCP intègre désormais 9 outils pour gérer un système de mémoire (Thoughts, Messages, Conversations) utilisant Weaviate + GPU embeddings. Ces outils permettent à Claude Desktop de créer, rechercher et gérer des pensées, messages et conversations de manière persistante.
+
+**Architecture Memory** :
+- **Backend** : Weaviate 1.34.4 (collections Thought, Message, Conversation)
+- **Embeddings** : BAAI/bge-m3 GPU (1024-dim, RTX 4070, PyTorch 2.6.0+cu124)
+- **Handlers** : `memory/mcp/` (thought_tools, message_tools, conversation_tools)
+- **Données** : 102 Thoughts, 377 Messages, 12 Conversations (au 2025-01-08)
+
+#### Thought Tools (3)
+
+**1. add_thought** - Ajouter une pensée au système
+```
+add_thought(
+    content="Exploring vector databases for semantic search",
+    thought_type="observation",  # reflection, question, intuition, observation
+    trigger="Research session",
+    concepts=["weaviate", "embeddings", "gpu"],
+    privacy_level="private"  # private, shared, public
+)
+```
+
+**2. search_thoughts** - Recherche sémantique dans les pensées
+```
+search_thoughts(
+    query="vector databases GPU",
+    limit=10,
+    thought_type_filter="observation"  # optionnel
+)
+```
+
+**3. get_thought** - Récupérer une pensée par UUID
+```
+get_thought(uuid="730c1a8e-b09f-4889-bbe9-4867d0ee7f1a")
+```
+
+#### Message Tools (3)
+
+**4. add_message** - Ajouter un message à une conversation
+```
+add_message(
+    content="Explain transformers in AI",
+    role="user",  # user, assistant, system
+    conversation_id="chat_2025_01_08",
+    order_index=0
+)
+```
+
+**5. get_messages** - Récupérer tous les messages d'une conversation
+```
+get_messages(
+    conversation_id="chat_2025_01_08",
+    limit=50
+)
+```
+
+**6. search_messages** - Recherche sémantique dans les messages
+```
+search_messages(
+    query="transformers AI",
+    limit=10,
+    conversation_id_filter="chat_2025_01_08"  # optionnel
+)
+```
+
+#### Conversation Tools (3)
+
+**7. get_conversation** - Récupérer une conversation par ID
+```
+get_conversation(conversation_id="ikario_derniere_pensee")
+```
+
+**8. search_conversations** - Recherche sémantique dans les conversations
+```
+search_conversations(
+    query="philosophical discussion",
+    limit=10,
+    category_filter="philosophy"  # optionnel
+)
+```
+
+**9. list_conversations** - Lister toutes les conversations
+```
+list_conversations(
+    limit=20,
+    category_filter="testing"  # optionnel
+)
+```
+
+**Tests** : Tous les outils Memory ont été testés avec succès (voir `test_memory_mcp_tools.py`)
+
+**Documentation complète** : Voir `memory/README_MCP_TOOLS.md` pour l'architecture détaillée, les schémas de données et les exemples d'utilisation.
+
+Pour plus de détails sur les outils Library RAG, voir la documentation complète dans `.claude/CLAUDE.md`.
 
 ---
 
diff --git a/generations/library_rag/flask_app.py b/generations/library_rag/flask_app.py
index e698332..89a1b50 100644
--- a/generations/library_rag/flask_app.py
+++ b/generations/library_rag/flask_app.py
@@ -89,8 +89,23 @@ from utils.types import (
     SSEEvent,
 )
 
+# GPU Embedder for manual vectorization (Phase 5: Backend Integration)
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from memory.core import get_embedder
+
 app = Flask(__name__)
 
+# Initialize GPU embedder singleton
+_embedder = None
+
+def get_gpu_embedder():
+    """Get or create GPU embedder singleton."""
+    global _embedder
+    if _embedder is None:
+        _embedder = get_embedder()
+    return _embedder
+
 # Configuration Flask
 app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY", "dev-secret-key-change-in-production")
 
@@ -152,26 +167,25 @@ def get_collection_stats() -> Optional[CollectionStats]:
             stats: CollectionStats = {}
 
             # Chunk stats (renamed from Passage)
-            passages = client.collections.get("Chunk")
+            passages = client.collections.get("Chunk_v2")
             passage_count = passages.aggregate.over_all(total_count=True)
             stats["passages"] = passage_count.total_count or 0
 
-            # Get unique authors and works (from nested objects)
-            all_passages = passages.query.fetch_objects(limit=1000)
+            # Get unique authors and works (from direct properties in v2)
+            all_passages = passages.query.fetch_objects(limit=10000)
             authors: set[str] = set()
             works: set[str] = set()
             languages: set[str] = set()
 
             for obj in all_passages.objects:
-                # Work is now a nested object with {title, author}
-                work_obj = obj.properties.get("work")
-                if work_obj and isinstance(work_obj, dict):
-                    if work_obj.get("author"):
-                        authors.add(str(work_obj["author"]))
-                    if work_obj.get("title"):
-                        works.add(str(work_obj["title"]))
-                if obj.properties.get("language"):
-                    languages.add(str(obj.properties["language"]))
+                props = obj.properties
+                # In v2: workAuthor and workTitle are direct properties
+                if props.get("workAuthor"):
+                    authors.add(str(props["workAuthor"]))
+                if props.get("workTitle"):
+                    works.add(str(props["workTitle"]))
+                if props.get("language"):
+                    languages.add(str(props["language"]))
 
             stats["authors"] = len(authors)
             stats["works"] = len(works)
@@ -208,13 +222,13 @@ def get_all_passages(
             if client is None:
                 return []
 
-            chunks = client.collections.get("Chunk")
+            chunks = client.collections.get("Chunk_v2")
 
             result = chunks.query.fetch_objects(
                 limit=limit,
                 offset=offset,
                 return_properties=[
-                    "text", "sectionPath", "sectionLevel", "chapterTitle",
+                    "text", "sectionPath", "chapterTitle",
                     "canonicalReference", "unitType", "keywords", "orderIndex", "language"
                 ],
             )
@@ -253,7 +267,7 @@ def simple_search(
             if client is None:
                 return []
 
-            chunks = client.collections.get("Chunk")
+            chunks = client.collections.get("Chunk_v2")
 
             # Build filters using top-level properties (workAuthor, workTitle)
             filters: Optional[Any] = None
@@ -263,13 +277,17 @@ def simple_search(
                 work_filter_obj = wvq.Filter.by_property("workTitle").equal(work_filter)
                 filters = filters & work_filter_obj if filters else work_filter_obj
 
-            result = chunks.query.near_text(
-                query=query,
+            # Generate query vector with GPU embedder (Phase 5: manual vectorization)
+            embedder = get_gpu_embedder()
+            query_vector = embedder.embed_single(query)
+
+            result = chunks.query.near_vector(
+                near_vector=query_vector.tolist(),
                 limit=limit,
                 filters=filters,
                 return_metadata=wvq.MetadataQuery(distance=True),
                 return_properties=[
-                    "text", "sectionPath", "sectionLevel", "chapterTitle",
+                    "text", "sectionPath", "chapterTitle",
                     "canonicalReference", "unitType", "keywords", "orderIndex", "language"
                 ],
             )
@@ -333,10 +351,14 @@ def hierarchical_search(
             # STAGE 1: Search Summary collection for relevant sections
             # ═══════════════════════════════════════════════════════════════
 
-            summary_collection = client.collections.get("Summary")
+            summary_collection = client.collections.get("Summary_v2")
 
-            summaries_result = summary_collection.query.near_text(
-                query=query,
+            # Generate query vector with GPU embedder (Phase 5: manual vectorization)
+            embedder = get_gpu_embedder()
+            query_vector = embedder.embed_single(query)
+
+            summaries_result = summary_collection.query.near_vector(
+                near_vector=query_vector.tolist(),
                 limit=sections_limit,
                 return_metadata=wvq.MetadataQuery(distance=True),
                 # Note: Don't specify return_properties - let Weaviate return all properties
@@ -358,63 +380,62 @@ def hierarchical_search(
             for summary_obj in summaries_result.objects:
                 props = summary_obj.properties
 
-                # Try to get document.sourceId if available (nested object might still be returned)
-                doc_obj = props.get("document")
-                source_id = ""
-                if doc_obj and isinstance(doc_obj, dict):
-                    source_id = doc_obj.get("sourceId", "")
+                # In v2: Summary has workTitle property, need to get sourceId from Work
+                work_title = props.get("workTitle", "")
 
+                # We'll get sourceId later by matching workTitle with Work.sourceId
+                # For now, use workTitle as identifier
                 sections_data.append({
                     "section_path": props.get("sectionPath", ""),
                     "title": props.get("title", ""),
                     "summary_text": props.get("text", ""),
                     "level": props.get("level", 1),
                     "concepts": props.get("concepts", []),
-                    "document_source_id": source_id,
-                    "summary_uuid": str(summary_obj.uuid),  # Keep UUID for later retrieval if needed
+                    "document_source_id": "",  # Will be populated during filtering
+                    "work_title": work_title,  # Add workTitle for filtering
+                    "summary_uuid": str(summary_obj.uuid),
                     "similarity": round((1 - summary_obj.metadata.distance) * 100, 1) if summary_obj.metadata and summary_obj.metadata.distance else 0,
                 })
 
-            # Post-filter sections by author/work (Summary doesn't have work nested object)
+            # Post-filter sections by author/work (Summary_v2 has workTitle property)
             if author_filter or work_filter:
                 print(f"[HIERARCHICAL] Post-filtering {len(sections_data)} sections by work='{work_filter}'")
-                doc_collection = client.collections.get("Document")
-                filtered_sections = []
 
+                # Build Work title -> author map for filtering
+                work_collection = client.collections.get("Work")
+                work_map = {}
+                for work in work_collection.iterator(include_vector=False):
+                    props = work.properties
+                    title = props.get("title")
+                    if title:
+                        work_map[title] = {
+                            "author": props.get("author", "Unknown"),
+                            "sourceId": props.get("sourceId", "")
+                        }
+
+                filtered_sections = []
                 for section in sections_data:
-                    source_id = section["document_source_id"]
-                    if not source_id:
-                        print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' SKIPPED (no sourceId)")
+                    work_title = section.get("work_title", "")
+
+                    if not work_title or work_title not in work_map:
+                        print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' SKIPPED (no work mapping)")
                         continue
 
-                    # Query Document to get work metadata
-                    # Note: 'work' is a nested object, so we don't specify it in return_properties
-                    # Weaviate should return it automatically
-                    doc_result = doc_collection.query.fetch_objects(
-                        filters=wvq.Filter.by_property("sourceId").equal(source_id),
-                        limit=1,
-                    )
+                    work_author = work_map[work_title]["author"]
+                    section["document_source_id"] = work_map[work_title]["sourceId"]  # Populate sourceId
 
-                    if doc_result.objects:
-                        doc_work = doc_result.objects[0].properties.get("work", {})
-                        print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' doc_work type={type(doc_work)}, value={doc_work}")
-                        if isinstance(doc_work, dict):
-                            work_title = doc_work.get("title", "N/A")
-                            work_author = doc_work.get("author", "N/A")
-                            # Check filters
-                            if author_filter and work_author != author_filter:
-                                print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' FILTERED (author '{work_author}' != '{author_filter}')")
-                                continue
-                            if work_filter and work_title != work_filter:
-                                print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' FILTERED (work '{work_title}' != '{work_filter}')")
-                                continue
+                    print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' work={work_title}, author={work_author}")
 
-                            print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' KEPT (work='{work_title}')")
-                            filtered_sections.append(section)
-                        else:
-                            print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' SKIPPED (doc_work not a dict)")
-                    else:
-                        print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' SKIPPED (no doc found for sourceId='{source_id}')")
+                    # Check filters
+                    if author_filter and work_author != author_filter:
+                        print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' FILTERED (author '{work_author}' != '{author_filter}')")
+                        continue
+                    if work_filter and work_title != work_filter:
+                        print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' FILTERED (work '{work_title}' != '{work_filter}')")
+                        continue
+
+                    print(f"[HIERARCHICAL] Section '{section['section_path'][:40]}...' KEPT (work='{work_title}')")
+                    filtered_sections.append(section)
 
                 sections_data = filtered_sections
                 print(f"[HIERARCHICAL] After filtering: {len(sections_data)} sections remaining")
@@ -438,7 +459,7 @@ def hierarchical_search(
             # For each section, search chunks using the section's summary text
             # This groups chunks under their relevant sections
 
-            chunk_collection = client.collections.get("Chunk")
+            chunk_collection = client.collections.get("Chunk_v2")
 
             # Build base filters (author/work only)
             base_filters: Optional[Any] = None
@@ -464,8 +485,11 @@ def hierarchical_search(
                 if base_filters:
                     section_filters = base_filters & section_filters
 
-                chunks_result = chunk_collection.query.near_text(
-                    query=section_query,
+                # Generate query vector with GPU embedder (Phase 5: manual vectorization)
+                section_query_vector = embedder.embed_single(section_query)
+
+                chunks_result = chunk_collection.query.near_vector(
+                    near_vector=section_query_vector.tolist(),
                     limit=chunks_per_section,
                     filters=section_filters,
                     return_metadata=wvq.MetadataQuery(distance=True),
@@ -600,14 +624,28 @@ def summary_only_search(
             if client is None:
                 return []
 
-            summaries = client.collections.get("Summary")
+            summaries = client.collections.get("Summary_v2")
 
-            # Note: Cannot filter by nested document properties directly in Weaviate v4
-            # Must fetch all and filter in Python if author/work filters are present
+            # Build Work map for metadata lookup (Summary_v2 has workTitle, not document)
+            work_collection = client.collections.get("Work")
+            work_map = {}
+            for work in work_collection.iterator(include_vector=False):
+                work_props = work.properties
+                title = work_props.get("title")
+                if title:
+                    work_map[title] = {
+                        "author": work_props.get("author", "Unknown"),
+                        "year": work_props.get("year", 0),
+                        "sourceId": work_props.get("sourceId", ""),
+                    }
+
+            # Generate query vector with GPU embedder (Phase 5: manual vectorization)
+            embedder = get_gpu_embedder()
+            query_vector = embedder.embed_single(query)
 
             # Semantic search
-            results = summaries.query.near_text(
-                query=query,
+            results = summaries.query.near_vector(
+                near_vector=query_vector.tolist(),
                 limit=limit * 3 if (author_filter or work_filter) else limit,  # Fetch more if filtering
                 return_metadata=wvq.MetadataQuery(distance=True)
             )
@@ -618,24 +656,34 @@ def summary_only_search(
                 props = obj.properties
                 similarity = 1 - obj.metadata.distance
 
-                # Apply filters (Python-side since nested properties)
-                if author_filter and props["document"].get("author", "") != author_filter:
+                # Get work metadata from workTitle
+                work_title = props.get("workTitle", "")
+                if not work_title or work_title not in work_map:
                     continue
-                if work_filter and props["document"].get("title", "") != work_filter:
+
+                work_info = work_map[work_title]
+                work_author = work_info["author"]
+                work_year = work_info["year"]
+                source_id = work_info["sourceId"]
+
+                # Apply filters
+                if author_filter and work_author != author_filter:
+                    continue
+                if work_filter and work_title != work_filter:
                     continue
 
                 # Determine document icon and name
-                doc_id = props["document"]["sourceId"].lower()
-                if "tiercelin" in doc_id:
+                doc_id_lower = source_id.lower()
+                if "tiercelin" in doc_id_lower:
                     doc_icon = "🟡"
                     doc_name = "Tiercelin"
-                elif "platon" in doc_id or "menon" in doc_id:
+                elif "platon" in doc_id_lower or "menon" in doc_id_lower:
                     doc_icon = "🟢"
                     doc_name = "Platon"
-                elif "haugeland" in doc_id:
+                elif "haugeland" in doc_id_lower:
                     doc_icon = "🟣"
                     doc_name = "Haugeland"
-                elif "logique" in doc_id:
+                elif "logique" in doc_id_lower:
                     doc_icon = "🔵"
                     doc_name = "Logique"
                 else:
@@ -647,19 +695,19 @@ def summary_only_search(
                     "uuid": str(obj.uuid),
                     "similarity": round(similarity * 100, 1),  # Convert to percentage
                     "text": props.get("text", ""),
-                    "title": props["title"],
+                    "title": props.get("title", ""),
                     "concepts": props.get("concepts", []),
                     "doc_icon": doc_icon,
                     "doc_name": doc_name,
-                    "author": props["document"].get("author", ""),
-                    "year": props["document"].get("year", 0),
+                    "author": work_author,
+                    "year": work_year,
                     "chunks_count": props.get("chunksCount", 0),
                     "section_path": props.get("sectionPath", ""),
                     "sectionPath": props.get("sectionPath", ""),  # Alias for template compatibility
                     # Add work info for template compatibility
                     "work": {
-                        "title": props["document"].get("title", ""),
-                        "author": props["document"].get("author", ""),
+                        "title": work_title,
+                        "author": work_author,
                     },
                 }
 
@@ -969,7 +1017,7 @@ def rag_search(
                 print("[RAG Search] Weaviate client unavailable")
                 return []
 
-            chunks = client.collections.get("Chunk")
+            chunks = client.collections.get("Chunk_v2")
 
             # Build work filter if selected_works is provided
             work_filter: Optional[Any] = None
@@ -978,9 +1026,13 @@ def rag_search(
                 work_filter = wvq.Filter.by_property("workTitle").contains_any(selected_works)
                 print(f"[RAG Search] Applying work filter: {selected_works}")
 
+            # Generate query vector with GPU embedder (Phase 5: manual vectorization)
+            embedder = get_gpu_embedder()
+            query_vector = embedder.embed_single(query)
+
             # Query with properties needed for RAG context
-            result = chunks.query.near_text(
-                query=query,
+            result = chunks.query.near_vector(
+                near_vector=query_vector.tolist(),
                 limit=limit,
                 filters=work_filter,
                 return_metadata=wvq.MetadataQuery(distance=True),
@@ -1444,33 +1496,30 @@ def api_get_works() -> Union[Response, tuple[Response, int]]:
                     "message": "Cannot connect to Weaviate database"
                 }), 500
 
-            # Query Chunk collection to get all unique works with counts
-            chunks = client.collections.get("Chunk")
+            # Query Chunk_v2 collection to get all unique works with counts
+            chunks = client.collections.get("Chunk_v2")
 
             # Fetch all chunks to aggregate by work
-            # Using a larger limit to get all documents
-            # Note: Don't use return_properties with nested objects (causes gRPC error)
-            # Fetch all objects without specifying properties
+            # In v2: work is NOT a nested object, use workTitle and workAuthor properties
             all_chunks = chunks.query.fetch_objects(limit=10000)
 
             # Aggregate chunks by work (title + author)
             works_count: Dict[str, Dict[str, Any]] = {}
 
             for obj in all_chunks.objects:
-                work_obj = obj.properties.get("work")
-                if work_obj and isinstance(work_obj, dict):
-                    title = work_obj.get("title", "")
-                    author = work_obj.get("author", "")
+                props = obj.properties
+                title = props.get("workTitle", "")
+                author = props.get("workAuthor", "")
 
-                    if title:  # Only count if title exists
-                        # Use title as key (assumes unique titles)
-                        if title not in works_count:
-                            works_count[title] = {
-                                "title": title,
-                                "author": author or "Unknown",
-                                "chunks_count": 0
-                            }
-                        works_count[title]["chunks_count"] += 1
+                if title:  # Only count if title exists
+                    # Use title as key (assumes unique titles)
+                    if title not in works_count:
+                        works_count[title] = {
+                            "title": title,
+                            "author": author or "Unknown",
+                            "chunks_count": 0
+                        }
+                    works_count[title]["chunks_count"] += 1
 
             # Convert to list and sort by author, then title
             works_list = list(works_count.values())
@@ -3082,45 +3131,60 @@ def documents() -> str:
 
     with get_weaviate_client() as client:
         if client is not None:
-            # Get chunk counts and authors
-            chunk_collection = client.collections.get("Chunk")
+            from typing import cast
 
-            for obj in chunk_collection.iterator(include_vector=False):
-                props = obj.properties
-                from typing import cast
-                doc_obj = cast(Dict[str, Any], props.get("document", {}))
-                work_obj = cast(Dict[str, Any], props.get("work", {}))
-
-                if doc_obj:
-                    source_id = doc_obj.get("sourceId", "")
-                    if source_id:
-                        if source_id not in documents_from_weaviate:
-                            documents_from_weaviate[source_id] = {
-                                "source_id": source_id,
-                                "title": work_obj.get("title") if work_obj else "Unknown",
-                                "author": work_obj.get("author") if work_obj else "Unknown",
-                                "chunks_count": 0,
-                                "summaries_count": 0,
-                                "authors": set(),
-                            }
-                        documents_from_weaviate[source_id]["chunks_count"] += 1
-
-                        # Track unique authors
-                        author = work_obj.get("author") if work_obj else None
-                        if author:
-                            documents_from_weaviate[source_id]["authors"].add(author)
-
-            # Get summary counts
+            # Get all Works (now with sourceId added in Phase 1 of migration)
             try:
-                summary_collection = client.collections.get("Summary")
-                for obj in summary_collection.iterator(include_vector=False):
-                    props = obj.properties
-                    doc_obj = cast(Dict[str, Any], props.get("document", {}))
+                work_collection = client.collections.get("Work")
+                chunk_collection = client.collections.get("Chunk_v2")
 
-                    if doc_obj:
-                        source_id = doc_obj.get("sourceId", "")
-                        if source_id and source_id in documents_from_weaviate:
-                            documents_from_weaviate[source_id]["summaries_count"] += 1
+                # Build documents from Work collection
+                for work in work_collection.iterator(include_vector=False):
+                    props = work.properties
+                    source_id = props.get("sourceId")
+
+                    # Skip Works without sourceId (not documents)
+                    if not source_id:
+                        continue
+
+                    documents_from_weaviate[source_id] = {
+                        "source_id": source_id,
+                        "title": props.get("title", "Unknown"),
+                        "author": props.get("author", "Unknown"),
+                        "pages": props.get("pages", 0),
+                        "edition": props.get("edition", ""),
+                        "chunks_count": 0,
+                        "summaries_count": 0,
+                        "authors": set(),
+                    }
+
+                    # Add author to set
+                    if props.get("author") and props.get("author") != "Unknown":
+                        documents_from_weaviate[source_id]["authors"].add(props.get("author"))
+
+                # Count chunks per document (via workTitle)
+                for chunk in chunk_collection.iterator(include_vector=False):
+                    work_title = chunk.properties.get("workTitle")
+
+                    # Find corresponding sourceId
+                    for source_id, doc_data in documents_from_weaviate.items():
+                        if doc_data["title"] == work_title:
+                            doc_data["chunks_count"] += 1
+                            break
+            except Exception as e:
+                print(f"Warning: Could not load Work collection: {e}")
+
+            # Count summaries (if collection exists)
+            try:
+                summary_collection = client.collections.get("Summary_v2")
+                for summary in summary_collection.iterator(include_vector=False):
+                    work_title = summary.properties.get("workTitle")
+
+                    # Find corresponding sourceId
+                    for source_id, doc_data in documents_from_weaviate.items():
+                        if doc_data["title"] == work_title:
+                            doc_data["summaries_count"] += 1
+                            break
             except Exception:
                 # Summary collection may not exist
                 pass
@@ -3157,17 +3221,195 @@ def documents() -> str:
             "has_images": images_dir.exists() and any(images_dir.iterdir()) if images_dir.exists() else False,
             "image_count": len(list(images_dir.glob("*.png"))) if images_dir.exists() else 0,
             "metadata": metadata,
+            "pages": weaviate_data.get("pages", pages),  # FROM WEAVIATE, fallback to file
             "summaries_count": weaviate_data["summaries_count"],  # FROM WEAVIATE
             "authors_count": len(weaviate_data["authors"]),  # FROM WEAVIATE
             "chunks_count": weaviate_data["chunks_count"],  # FROM WEAVIATE
             "title": weaviate_data["title"],  # FROM WEAVIATE
             "author": weaviate_data["author"],  # FROM WEAVIATE
+            "edition": weaviate_data.get("edition", ""),  # FROM WEAVIATE
             "toc": toc,
         })
 
     return render_template("documents.html", documents=documents_list)
 
 
+# ═══════════════════════════════════════════════════════════════════════════════
+# Memory Routes (Phase 5: Backend Integration)
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def run_async(coro):
+    """Run async coroutine in sync Flask context."""
+    import asyncio
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        return loop.run_until_complete(coro)
+    finally:
+        loop.close()
+
+
+@app.route("/memories")
+def memories() -> str:
+    """Render the Memory search page (Thoughts + Messages)."""
+    # Get memory statistics
+    with get_weaviate_client() as client:
+        if client is None:
+            flash("Cannot connect to Weaviate database", "error")
+            stats = {"thoughts": 0, "messages": 0, "conversations": 0}
+        else:
+            try:
+                thoughts = client.collections.get("Thought")
+                messages = client.collections.get("Message")
+                conversations = client.collections.get("Conversation")
+
+                thoughts_count = thoughts.aggregate.over_all(total_count=True).total_count
+                messages_count = messages.aggregate.over_all(total_count=True).total_count
+                conversations_count = conversations.aggregate.over_all(total_count=True).total_count
+
+                stats = {
+                    "thoughts": thoughts_count or 0,
+                    "messages": messages_count or 0,
+                    "conversations": conversations_count or 0,
+                }
+            except Exception as e:
+                print(f"Error fetching memory stats: {e}")
+                stats = {"thoughts": 0, "messages": 0, "conversations": 0}
+
+    return render_template("memories.html", stats=stats)
+
+
+@app.route("/api/memories/search-thoughts", methods=["POST"])
+def api_search_thoughts():
+    """API endpoint for thought semantic search."""
+    try:
+        # Import Memory MCP tools locally
+        from memory.mcp import SearchThoughtsInput, search_thoughts_handler
+
+        data = request.json
+        query = data.get("query", "")
+        limit = data.get("limit", 10)
+        thought_type_filter = data.get("thought_type_filter")
+
+        input_data = SearchThoughtsInput(
+            query=query,
+            limit=limit,
+            thought_type_filter=thought_type_filter
+        )
+
+        result = run_async(search_thoughts_handler(input_data))
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({"success": False, "error": str(e)}), 500
+
+
+@app.route("/api/memories/search-messages", methods=["POST"])
+def api_search_messages():
+    """API endpoint for message semantic search."""
+    try:
+        from memory.mcp import SearchMessagesInput, search_messages_handler
+
+        data = request.json
+        query = data.get("query", "")
+        limit = data.get("limit", 10)
+        conversation_id_filter = data.get("conversation_id_filter")
+
+        input_data = SearchMessagesInput(
+            query=query,
+            limit=limit,
+            conversation_id_filter=conversation_id_filter
+        )
+
+        result = run_async(search_messages_handler(input_data))
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({"success": False, "error": str(e)}), 500
+
+
+@app.route("/conversations")
+def conversations() -> str:
+    """Render the Conversations page."""
+    try:
+        from memory.mcp import ListConversationsInput, list_conversations_handler
+
+        limit = request.args.get("limit", 20, type=int)
+        category_filter = request.args.get("category")
+
+        input_data = ListConversationsInput(
+            limit=limit,
+            category_filter=category_filter
+        )
+
+        result = run_async(list_conversations_handler(input_data))
+
+        if result.get("success"):
+            conversations_list = result.get("conversations", [])
+        else:
+            flash(f"Error loading conversations: {result.get('error')}", "error")
+            conversations_list = []
+
+        return render_template("conversations.html", conversations=conversations_list)
+    except Exception as e:
+        flash(f"Error loading conversations: {str(e)}", "error")
+        return render_template("conversations.html", conversations=[])
+
+
+@app.route("/conversation/<conversation_id>")
+def conversation_view(conversation_id: str) -> str:
+    """View a specific conversation with all its messages."""
+    try:
+        from memory.mcp import (
+            GetConversationInput, get_conversation_handler,
+            GetMessagesInput, get_messages_handler
+        )
+
+        # Get conversation metadata
+        conv_input = GetConversationInput(conversation_id=conversation_id)
+        conversation = run_async(get_conversation_handler(conv_input))
+
+        if not conversation.get("success"):
+            flash(f"Conversation not found: {conversation.get('error')}", "error")
+            return redirect(url_for("conversations"))
+
+        # Get all messages
+        msg_input = GetMessagesInput(conversation_id=conversation_id, limit=500)
+        messages_result = run_async(get_messages_handler(msg_input))
+
+        messages = messages_result.get("messages", []) if messages_result.get("success") else []
+
+        return render_template(
+            "conversation_view.html",
+            conversation=conversation,
+            messages=messages
+        )
+    except Exception as e:
+        flash(f"Error loading conversation: {str(e)}", "error")
+        return redirect(url_for("conversations"))
+
+
+@app.route("/api/conversations/search", methods=["POST"])
+def api_search_conversations():
+    """API endpoint for conversation semantic search."""
+    try:
+        from memory.mcp import SearchConversationsInput, search_conversations_handler
+
+        data = request.json
+        query = data.get("query", "")
+        limit = data.get("limit", 10)
+        category_filter = data.get("category_filter")
+
+        input_data = SearchConversationsInput(
+            query=query,
+            limit=limit,
+            category_filter=category_filter
+        )
+
+        result = run_async(search_conversations_handler(input_data))
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({"success": False, "error": str(e)}), 500
+
+
 # ═══════════════════════════════════════════════════════════════════════════════
 # Main
 # ═══════════════════════════════════════════════════════════════════════════════
diff --git a/generations/library_rag/mcp_server.py b/generations/library_rag/mcp_server.py
index cb1f26b..221390a 100644
--- a/generations/library_rag/mcp_server.py
+++ b/generations/library_rag/mcp_server.py
@@ -62,6 +62,31 @@ from mcp_tools import (
     PDFProcessingError,
 )
 
+# Memory MCP Tools (added for unified Memory + Library system)
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from memory.mcp import (
+    # Thought tools
+    AddThoughtInput,
+    SearchThoughtsInput,
+    add_thought_handler,
+    search_thoughts_handler,
+    get_thought_handler,
+    # Message tools
+    AddMessageInput,
+    GetMessagesInput,
+    SearchMessagesInput,
+    add_message_handler,
+    get_messages_handler,
+    search_messages_handler,
+    # Conversation tools
+    GetConversationInput,
+    SearchConversationsInput,
+    ListConversationsInput,
+    get_conversation_handler,
+    search_conversations_handler,
+    list_conversations_handler,
+)
+
 # =============================================================================
 # Logging Configuration
 # =============================================================================
@@ -551,6 +576,264 @@ async def delete_document(
     return result.model_dump(mode='json')
 
 
+# =============================================================================
+# Memory Tools (Thoughts, Messages, Conversations)
+# =============================================================================
+
+
+@mcp.tool()
+async def add_thought(
+    content: str,
+    thought_type: str = "reflection",
+    trigger: str = "",
+    concepts: list[str] | None = None,
+    privacy_level: str = "private",
+) -> Dict[str, Any]:
+    """
+    Add a new thought to the Memory system.
+
+    Args:
+        content: The thought content.
+        thought_type: Type (reflection, question, intuition, observation, etc.).
+        trigger: What triggered this thought (optional).
+        concepts: Related concepts/tags (optional).
+        privacy_level: Privacy level (private, shared, public).
+
+    Returns:
+        Dictionary containing:
+        - success: Whether thought was added successfully
+        - uuid: UUID of the created thought
+        - content: Preview of the thought content
+        - thought_type: The thought type
+    """
+    input_data = AddThoughtInput(
+        content=content,
+        thought_type=thought_type,
+        trigger=trigger,
+        concepts=concepts or [],
+        privacy_level=privacy_level,
+    )
+    result = await add_thought_handler(input_data)
+    return result
+
+
+@mcp.tool()
+async def search_thoughts(
+    query: str,
+    limit: int = 10,
+    thought_type_filter: str | None = None,
+) -> Dict[str, Any]:
+    """
+    Search thoughts using semantic similarity.
+
+    Args:
+        query: Search query text.
+        limit: Maximum number of results (1-100, default 10).
+        thought_type_filter: Filter by thought type (optional).
+
+    Returns:
+        Dictionary containing:
+        - success: Whether search succeeded
+        - query: The original search query
+        - results: List of matching thoughts
+        - count: Number of results returned
+    """
+    input_data = SearchThoughtsInput(
+        query=query,
+        limit=limit,
+        thought_type_filter=thought_type_filter,
+    )
+    result = await search_thoughts_handler(input_data)
+    return result
+
+
+@mcp.tool()
+async def get_thought(uuid: str) -> Dict[str, Any]:
+    """
+    Get a specific thought by UUID.
+
+    Args:
+        uuid: Thought UUID.
+
+    Returns:
+        Dictionary containing complete thought data or error message.
+    """
+    result = await get_thought_handler(uuid)
+    return result
+
+
+@mcp.tool()
+async def add_message(
+    content: str,
+    role: str,
+    conversation_id: str,
+    order_index: int = 0,
+) -> Dict[str, Any]:
+    """
+    Add a new message to a conversation.
+
+    Args:
+        content: Message content.
+        role: Role (user, assistant, system).
+        conversation_id: Conversation identifier.
+        order_index: Position in conversation (default 0).
+
+    Returns:
+        Dictionary containing:
+        - success: Whether message was added successfully
+        - uuid: UUID of the created message
+        - content: Preview of the message content
+        - role: The message role
+        - conversation_id: The conversation ID
+    """
+    input_data = AddMessageInput(
+        content=content,
+        role=role,
+        conversation_id=conversation_id,
+        order_index=order_index,
+    )
+    result = await add_message_handler(input_data)
+    return result
+
+
+@mcp.tool()
+async def get_messages(
+    conversation_id: str,
+    limit: int = 50,
+) -> Dict[str, Any]:
+    """
+    Get all messages from a conversation in order.
+
+    Args:
+        conversation_id: Conversation identifier.
+        limit: Maximum messages to return (1-500, default 50).
+
+    Returns:
+        Dictionary containing:
+        - success: Whether query succeeded
+        - conversation_id: The conversation ID
+        - messages: List of messages in order
+        - count: Number of messages returned
+    """
+    input_data = GetMessagesInput(
+        conversation_id=conversation_id,
+        limit=limit,
+    )
+    result = await get_messages_handler(input_data)
+    return result
+
+
+@mcp.tool()
+async def search_messages(
+    query: str,
+    limit: int = 10,
+    conversation_id_filter: str | None = None,
+) -> Dict[str, Any]:
+    """
+    Search messages using semantic similarity.
+
+    Args:
+        query: Search query text.
+        limit: Maximum number of results (1-100, default 10).
+        conversation_id_filter: Filter by conversation ID (optional).
+
+    Returns:
+        Dictionary containing:
+        - success: Whether search succeeded
+        - query: The original search query
+        - results: List of matching messages
+        - count: Number of results returned
+    """
+    input_data = SearchMessagesInput(
+        query=query,
+        limit=limit,
+        conversation_id_filter=conversation_id_filter,
+    )
+    result = await search_messages_handler(input_data)
+    return result
+
+
+@mcp.tool()
+async def get_conversation(conversation_id: str) -> Dict[str, Any]:
+    """
+    Get a specific conversation by ID.
+
+    Args:
+        conversation_id: Conversation identifier.
+
+    Returns:
+        Dictionary containing:
+        - success: Whether conversation was found
+        - conversation_id: The conversation ID
+        - category: Conversation category
+        - summary: Conversation summary
+        - timestamp_start: Start time
+        - timestamp_end: End time
+        - participants: List of participants
+        - tags: Semantic tags
+        - message_count: Number of messages
+    """
+    input_data = GetConversationInput(conversation_id=conversation_id)
+    result = await get_conversation_handler(input_data)
+    return result
+
+
+@mcp.tool()
+async def search_conversations(
+    query: str,
+    limit: int = 10,
+    category_filter: str | None = None,
+) -> Dict[str, Any]:
+    """
+    Search conversations using semantic similarity.
+
+    Args:
+        query: Search query text.
+        limit: Maximum number of results (1-50, default 10).
+        category_filter: Filter by category (optional).
+
+    Returns:
+        Dictionary containing:
+        - success: Whether search succeeded
+        - query: The original search query
+        - results: List of matching conversations
+        - count: Number of results returned
+    """
+    input_data = SearchConversationsInput(
+        query=query,
+        limit=limit,
+        category_filter=category_filter,
+    )
+    result = await search_conversations_handler(input_data)
+    return result
+
+
+@mcp.tool()
+async def list_conversations(
+    limit: int = 20,
+    category_filter: str | None = None,
+) -> Dict[str, Any]:
+    """
+    List all conversations with optional filtering.
+
+    Args:
+        limit: Maximum conversations to return (1-100, default 20).
+        category_filter: Filter by category (optional).
+
+    Returns:
+        Dictionary containing:
+        - success: Whether query succeeded
+        - conversations: List of conversations
+        - count: Number of conversations returned
+    """
+    input_data = ListConversationsInput(
+        limit=limit,
+        category_filter=category_filter,
+    )
+    result = await list_conversations_handler(input_data)
+    return result
+
+
 # =============================================================================
 # Signal Handlers
 # =============================================================================
diff --git a/generations/library_rag/templates/base.html b/generations/library_rag/templates/base.html
index 65f1618..78e5327 100644
--- a/generations/library_rag/templates/base.html
+++ b/generations/library_rag/templates/base.html
@@ -718,6 +718,15 @@
                 <span class="icon">📚</span>
                 <span>Documents</span>
             </a>
+            <div style="margin: 1rem 0; border-top: 1px solid rgba(255,255,255,0.1);"></div>
+            <a href="/memories" class="{{ 'active' if request.endpoint == 'memories' else '' }}">
+                <span class="icon">🧠</span>
+                <span>Memory (Ikario)</span>
+            </a>
+            <a href="/conversations" class="{{ 'active' if request.endpoint == 'conversations' else '' }}">
+                <span class="icon">💭</span>
+                <span>Conversations</span>
+            </a>
         </div>
     </nav>
 
@@ -736,6 +745,7 @@
                 <a href="/chat" class="{{ 'active' if request.endpoint == 'chat' else '' }}">Conversation</a>
                 <a href="/upload" class="{{ 'active' if request.endpoint == 'upload' else '' }}">Parser PDF</a>
                 <a href="/documents" class="{{ 'active' if request.endpoint == 'documents' else '' }}">Documents</a>
+                <a href="/memories" class="{{ 'active' if request.endpoint == 'memories' else '' }}">Memory</a>
             </nav>
         </div>
     </header>