diff --git a/ANALYSE_MCP_TOOLS.md b/ANALYSE_MCP_TOOLS.md index e02c715..1a976fc 100644 --- a/ANALYSE_MCP_TOOLS.md +++ b/ANALYSE_MCP_TOOLS.md @@ -37,7 +37,7 @@ ping() #### 2. `parse_pdf(pdf_path)` **Type**: Ingestion PDF -**Collection cible**: Work, Chunk_v2, Summary_v2 +**Collection cible**: Work, Chunk, Summary **Paramètres**: - `pdf_path` (str): Chemin local ou URL vers le PDF @@ -87,7 +87,7 @@ ping() #### 3. `search_chunks(query, limit, min_similarity, author_filter, work_filter, language_filter)` **Type**: Recherche sémantique -**Collection**: Chunk_v2 (5,372 chunks) +**Collection**: Chunk (5,372 chunks) **Paramètres**: - `query` (str): Requête en langage naturel @@ -126,7 +126,7 @@ ping() #### 4. `search_summaries(query, limit, min_level, max_level)` **Type**: Recherche sémantique (haut niveau) -**Collection**: Summary_v2 (114 résumés) +**Collection**: Summary (114 résumés) **Paramètres**: - `query` (str): Requête en langage naturel @@ -161,7 +161,7 @@ ping() #### 5. `get_document(source_id, include_chunks, include_toc)` **Type**: Récupération document -**Collections**: Work, Chunk_v2 +**Collections**: Work, Chunk **Paramètres**: - `source_id` (str): Identifiant du document @@ -226,7 +226,7 @@ ping() #### 7. `get_chunks_by_document(source_id, limit, offset)` **Type**: Récupération chunks -**Collection**: Chunk_v2 +**Collection**: Chunk **Paramètres**: - `source_id` (str): Identifiant du document @@ -260,7 +260,7 @@ ping() #### 8. `filter_by_author(author, limit)` **Type**: Filtrage par auteur -**Collections**: Work, Chunk_v2 +**Collections**: Work, Chunk **Paramètres**: - `author` (str): Nom de l'auteur @@ -294,7 +294,7 @@ ping() #### 9. `delete_document(source_id, confirm)` **Type**: Suppression -**Collections**: Work, Chunk_v2, Summary_v2 +**Collections**: Work, Chunk, Summary **Paramètres**: - `source_id` (str): Identifiant du document @@ -605,7 +605,7 @@ ping() | Catégorie | Nombre | Collections utilisées | GPU Embedder | |-----------|--------|----------------------|--------------| | **Système** | 1 | - | - | -| **Library RAG** | 8 | Work, Chunk_v2, Summary_v2 | ✅ | +| **Library RAG** | 8 | Work, Chunk, Summary | ✅ | | **Memory** | 9 | Thought, Message, Conversation | ✅ | | **TOTAL** | **18** | **6 collections** | **5 vectorisées** | @@ -719,8 +719,8 @@ messages = search_messages("transformers architecture") RAG (3): - Work (no vectorizer) - 19 œuvres -- Chunk_v2 (GPU embedder) - 5,372 chunks -- Summary_v2 (GPU embedder) - 114 résumés +- Chunk (GPU embedder) - 5,372 chunks +- Summary (GPU embedder) - 114 résumés Memory (3): - Conversation (GPU embedder) - 12 conversations diff --git a/README.md b/README.md index 59f2c9a..645de43 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,8 @@ Library RAG combine deux systèmes de recherche sémantique distincts: ``` 📦 Library Philosophique (3 collections) ├─ Work → Métadonnées des œuvres philosophiques -├─ Chunk_v2 → 5355 passages de texte (1024-dim vectors) -└─ Summary_v2 → Résumés hiérarchiques des documents +├─ Chunk → 5355 passages de texte (1024-dim vectors) +└─ Summary → Résumés hiérarchiques des documents 🧠 Memory Ikario (2 collections) ├─ Thought → 104 pensées (réflexions, insights) @@ -249,7 +249,7 @@ curl http://localhost:5000/search?q=Turing curl http://localhost:8080/v1/meta # Vérifier nombre de chunks -python -c "import weaviate; c=weaviate.connect_to_local(); print(c.collections.get('Chunk_v2').aggregate.over_all()); c.close()" +python -c "import weaviate; c=weaviate.connect_to_local(); print(c.collections.get('Chunk').aggregate.over_all()); c.close()" ``` ## 📊 Métriques de Performance @@ -407,7 +407,7 @@ docker compose restart **Solution**: ```bash # Vérifier nombre de chunks dans Weaviate -python -c "import weaviate; c=weaviate.connect_to_local(); print(f'Chunks: {c.collections.get(\"Chunk_v2\").aggregate.over_all().total_count}'); c.close()" +python -c "import weaviate; c=weaviate.connect_to_local(); print(f'Chunks: {c.collections.get(\"Chunk\").aggregate.over_all().total_count}'); c.close()" # Réinjecter les données si nécessaire python schema.py --recreate-chunk diff --git a/generations/library_rag/flask_app.py b/generations/library_rag/flask_app.py index f7bd433..b5fc475 100644 --- a/generations/library_rag/flask_app.py +++ b/generations/library_rag/flask_app.py @@ -193,7 +193,7 @@ def get_collection_stats() -> Optional[CollectionStats]: stats: CollectionStats = {} # Chunk stats (renamed from Passage) - passages = client.collections.get("Chunk_v2") + passages = client.collections.get("Chunk") passage_count = passages.aggregate.over_all(total_count=True) stats["passages"] = passage_count.total_count or 0 @@ -248,7 +248,7 @@ def get_all_passages( if client is None: return [] - chunks = client.collections.get("Chunk_v2") + chunks = client.collections.get("Chunk") result = chunks.query.fetch_objects( limit=limit, @@ -293,7 +293,7 @@ def simple_search( if client is None: return [] - chunks = client.collections.get("Chunk_v2") + chunks = client.collections.get("Chunk") # Build filters using top-level properties (workAuthor, workTitle) filters: Optional[Any] = None @@ -377,7 +377,7 @@ def hierarchical_search( # STAGE 1: Search Summary collection for relevant sections # ═══════════════════════════════════════════════════════════════ - summary_collection = client.collections.get("Summary_v2") + summary_collection = client.collections.get("Summary") # Generate query vector with GPU embedder (Phase 5: manual vectorization) embedder = get_gpu_embedder() @@ -423,7 +423,7 @@ def hierarchical_search( "similarity": round((1 - summary_obj.metadata.distance) * 100, 1) if summary_obj.metadata and summary_obj.metadata.distance else 0, }) - # Post-filter sections by author/work (Summary_v2 has workTitle property) + # Post-filter sections by author/work (Summary has workTitle property) if author_filter or work_filter: print(f"[HIERARCHICAL] Post-filtering {len(sections_data)} sections by work='{work_filter}'") @@ -485,7 +485,7 @@ def hierarchical_search( # For each section, search chunks using the section's summary text # This groups chunks under their relevant sections - chunk_collection = client.collections.get("Chunk_v2") + chunk_collection = client.collections.get("Chunk") # Build base filters (author/work only) base_filters: Optional[Any] = None @@ -650,9 +650,9 @@ def summary_only_search( if client is None: return [] - summaries = client.collections.get("Summary_v2") + summaries = client.collections.get("Summary") - # Build Work map for metadata lookup (Summary_v2 has workTitle, not document) + # Build Work map for metadata lookup (Summary has workTitle, not document) work_collection = client.collections.get("Work") work_map = {} for work in work_collection.iterator(include_vector=False): @@ -1043,7 +1043,7 @@ def rag_search( print("[RAG Search] Weaviate client unavailable") return [] - chunks = client.collections.get("Chunk_v2") + chunks = client.collections.get("Chunk") # Build work filter if selected_works is provided work_filter: Optional[Any] = None @@ -1536,8 +1536,8 @@ def api_get_works() -> Union[Response, tuple[Response, int]]: "message": "Cannot connect to Weaviate database" }), 500 - # Query Chunk_v2 collection to get all unique works with counts - chunks = client.collections.get("Chunk_v2") + # Query Chunk collection to get all unique works with counts + chunks = client.collections.get("Chunk") # Fetch all chunks to aggregate by work # In v2: work is NOT a nested object, use workTitle and workAuthor properties @@ -3421,7 +3421,7 @@ def documents() -> str: # Get all Works (now with sourceId added in Phase 1 of migration) try: work_collection = client.collections.get("Work") - chunk_collection = client.collections.get("Chunk_v2") + chunk_collection = client.collections.get("Chunk") # Build documents from Work collection for work in work_collection.iterator(include_vector=False): @@ -3461,7 +3461,7 @@ def documents() -> str: # Count summaries (if collection exists) try: - summary_collection = client.collections.get("Summary_v2") + summary_collection = client.collections.get("Summary") for summary in summary_collection.iterator(include_vector=False): work_title = summary.properties.get("workTitle") diff --git a/generations/library_rag/migrate_rename_collections.py b/generations/library_rag/migrate_rename_collections.py new file mode 100644 index 0000000..b07778e --- /dev/null +++ b/generations/library_rag/migrate_rename_collections.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +""" +Rename collections: Chunk_v2 -> Chunk, Summary_v2 -> Summary + +Weaviate doesn't support renaming collections directly, so this script: +1. Creates new collections (Chunk, Summary) with identical schema +2. Copies all objects with their vectors (batch insert) +3. Validates the migration (count check) +4. Optionally deletes old collections (--cleanup flag) + +Usage: + python migrate_rename_collections.py --dry-run # Preview without changes + python migrate_rename_collections.py # Execute migration + python migrate_rename_collections.py --cleanup # Delete old collections after validation +""" + +import weaviate +import weaviate.classes as wvc +from weaviate.classes.config import Configure, Property, DataType, VectorDistances +from weaviate.classes.query import Filter +import sys +import argparse +from typing import Any +import time + +MIGRATIONS = [ + ("Chunk_v2", "Chunk"), + ("Summary_v2", "Summary"), +] + +BATCH_SIZE = 100 + + +def get_collection_count(client: weaviate.WeaviateClient, name: str) -> int: + """Get the number of objects in a collection.""" + try: + coll = client.collections.get(name) + return coll.aggregate.over_all().total_count + except Exception: + return 0 + + +def collection_exists(client: weaviate.WeaviateClient, name: str) -> bool: + """Check if a collection exists.""" + try: + client.collections.get(name) + return True + except Exception: + return False + + +def create_chunk_collection(client: weaviate.WeaviateClient) -> None: + """Create the new Chunk collection with schema from Chunk_v2.""" + print(" Creating Chunk collection...") + + client.collections.create( + name="Chunk", + description="Document chunks with manual GPU vectorization (BAAI/bge-m3, 1024-dim)", + vectorizer_config=Configure.Vectorizer.none(), + vector_index_config=Configure.VectorIndex.hnsw( + distance_metric=VectorDistances.COSINE, + ef_construction=128, + max_connections=32, + quantizer=Configure.VectorIndex.Quantizer.rq(), + ), + properties=[ + Property(name="text", data_type=DataType.TEXT, description="Chunk text content"), + Property(name="workTitle", data_type=DataType.TEXT, description="Work title"), + Property(name="workAuthor", data_type=DataType.TEXT, description="Work author"), + Property(name="sectionPath", data_type=DataType.TEXT, description="Section path"), + Property(name="sectionLevel", data_type=DataType.INT, description="Section level"), + Property(name="chapterTitle", data_type=DataType.TEXT, description="Chapter title"), + Property(name="canonicalReference", data_type=DataType.TEXT, description="Canonical reference"), + Property(name="unitType", data_type=DataType.TEXT, description="Unit type"), + Property(name="keywords", data_type=DataType.TEXT_ARRAY, description="Keywords"), + Property(name="language", data_type=DataType.TEXT, description="Language code"), + Property(name="year", data_type=DataType.INT, description="Publication year"), + Property(name="orderIndex", data_type=DataType.INT, description="Order index"), + Property(name="summary", data_type=DataType.TEXT, description="Chunk summary"), + Property(name="document", data_type=DataType.TEXT, description="Document reference"), + ], + ) + print(" [OK] Chunk collection created") + + +def create_summary_collection(client: weaviate.WeaviateClient) -> None: + """Create the new Summary collection with schema from Summary_v2.""" + print(" Creating Summary collection...") + + client.collections.create( + name="Summary", + description="Section summaries (v2 - sans Document)", + vectorizer_config=Configure.Vectorizer.none(), + vector_index_config=Configure.VectorIndex.hnsw( + distance_metric=VectorDistances.COSINE, + ef_construction=128, + max_connections=32, + quantizer=Configure.VectorIndex.Quantizer.rq(), + ), + properties=[ + Property(name="text", data_type=DataType.TEXT, description="Summary text (vectorized)"), + Property(name="concepts", data_type=DataType.TEXT_ARRAY, description="Key concepts"), + Property(name="workTitle", data_type=DataType.TEXT, description="Work title"), + Property(name="sectionPath", data_type=DataType.TEXT, description="Section path"), + Property(name="title", data_type=DataType.TEXT, description="Section title"), + Property(name="level", data_type=DataType.INT, description="Hierarchy level"), + Property(name="chunksCount", data_type=DataType.INT, description="Chunks count"), + Property(name="language", data_type=DataType.TEXT, description="Language code"), + Property(name="workAuthor", data_type=DataType.TEXT, description="Work author"), + Property(name="year", data_type=DataType.NUMBER, description="Publication year"), + ], + ) + print(" [OK] Summary collection created") + + +def clean_properties(props: dict[str, Any], collection_name: str) -> dict[str, Any]: + """Clean properties to ensure correct types.""" + cleaned = dict(props) + + # Integer fields that may have been stored as float + int_fields = ["sectionLevel", "year", "orderIndex", "level", "chunksCount"] + + for field in int_fields: + if field in cleaned and cleaned[field] is not None: + try: + cleaned[field] = int(cleaned[field]) + except (ValueError, TypeError): + pass + + return cleaned + + +def migrate_objects( + client: weaviate.WeaviateClient, + source_name: str, + target_name: str, + dry_run: bool = False +) -> int: + """Copy all objects from source to target collection with vectors.""" + source = client.collections.get(source_name) + target = client.collections.get(target_name) + + total = source.aggregate.over_all().total_count + print(f" Migrating {total} objects from {source_name} -> {target_name}") + + if dry_run: + print(f" [DRY-RUN] Would migrate {total} objects") + return total + + migrated = 0 + errors = 0 + batch_objects: list[dict[str, Any]] = [] + + for obj in source.iterator(include_vector=True): + # Get vector (handle both dict and direct vector) + vector = obj.vector + if isinstance(vector, dict): + vector = vector.get("default", list(vector.values())[0] if vector else None) + + # Clean properties to ensure correct types + cleaned_props = clean_properties(obj.properties, target_name) + + batch_objects.append({ + "uuid": obj.uuid, + "properties": cleaned_props, + "vector": vector, + }) + + if len(batch_objects) >= BATCH_SIZE: + # Insert batch + with target.batch.dynamic() as batch: + for item in batch_objects: + batch.add_object( + uuid=item["uuid"], + properties=item["properties"], + vector=item["vector"], + ) + migrated += len(batch_objects) + print(f" Progress: {migrated}/{total} ({100*migrated//total}%)", end='\r') + batch_objects = [] + + # Insert remaining objects + if batch_objects: + with target.batch.dynamic() as batch: + for item in batch_objects: + batch.add_object( + uuid=item["uuid"], + properties=item["properties"], + vector=item["vector"], + ) + migrated += len(batch_objects) + + print(f" Progress: {migrated}/{total} (100%) ") + print(f" [OK] Migrated {migrated} objects") + + return migrated + + +def validate_migration( + client: weaviate.WeaviateClient, + source_name: str, + target_name: str +) -> bool: + """Validate that source and target have same object count.""" + source_count = get_collection_count(client, source_name) + target_count = get_collection_count(client, target_name) + + if source_count == target_count: + print(f" [OK] Validation passed: {source_name}={source_count}, {target_name}={target_count}") + return True + else: + print(f" [ERROR] Validation FAILED: {source_name}={source_count}, {target_name}={target_count}") + return False + + +def cleanup_old_collections(client: weaviate.WeaviateClient, dry_run: bool = False) -> None: + """Delete old collections after successful migration.""" + print("\n" + "="*70) + print("CLEANUP: Deleting old collections") + print("="*70) + + for source_name, target_name in MIGRATIONS: + if not collection_exists(client, source_name): + print(f" {source_name}: Already deleted") + continue + + if not collection_exists(client, target_name): + print(f" [ERROR] Cannot delete {source_name}: {target_name} doesn't exist!") + continue + + # Validate before deleting + if not validate_migration(client, source_name, target_name): + print(f" [ERROR] Skipping {source_name} deletion: validation failed") + continue + + if dry_run: + print(f" [DRY-RUN] Would delete {source_name}") + else: + client.collections.delete(source_name) + print(f" [OK] Deleted {source_name}") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Rename Weaviate collections") + parser.add_argument("--dry-run", action="store_true", help="Preview without making changes") + parser.add_argument("--cleanup", action="store_true", help="Delete old collections after validation") + args = parser.parse_args() + + print("="*70) + print("WEAVIATE COLLECTION RENAME: Chunk_v2 -> Chunk, Summary_v2 -> Summary") + print("="*70) + print(f"Mode: {'DRY-RUN' if args.dry_run else 'LIVE'}") + print(f"Cleanup: {'YES' if args.cleanup else 'NO'}") + print() + + client = weaviate.connect_to_local() + + try: + # Show current state + print("Current collections:") + for source_name, target_name in MIGRATIONS: + source_count = get_collection_count(client, source_name) + target_exists = collection_exists(client, target_name) + target_count = get_collection_count(client, target_name) if target_exists else 0 + print(f" {source_name}: {source_count} objects") + print(f" {target_name}: {'exists (' + str(target_count) + ' objects)' if target_exists else 'does not exist'}") + print() + + if args.cleanup: + cleanup_old_collections(client, dry_run=args.dry_run) + return + + # Migration + for source_name, target_name in MIGRATIONS: + print("="*70) + print(f"MIGRATING: {source_name} -> {target_name}") + print("="*70) + + # Check source exists + if not collection_exists(client, source_name): + print(f" [ERROR] Source collection {source_name} does not exist!") + continue + + # Check if target already exists + if collection_exists(client, target_name): + target_count = get_collection_count(client, target_name) + if target_count > 0: + print(f" Target {target_name} already exists with {target_count} objects") + print(f" Skipping (already migrated)") + continue + else: + print(f" Target {target_name} exists but empty, will populate") + else: + # Create target collection + if not args.dry_run: + if target_name == "Chunk": + create_chunk_collection(client) + elif target_name == "Summary": + create_summary_collection(client) + else: + print(f" [DRY-RUN] Would create {target_name} collection") + + # Migrate objects + if not args.dry_run: + migrate_objects(client, source_name, target_name, dry_run=False) + else: + migrate_objects(client, source_name, target_name, dry_run=True) + + # Validate + if not args.dry_run: + validate_migration(client, source_name, target_name) + + print() + + # Final status + print("="*70) + print("MIGRATION COMPLETE") + print("="*70) + print("\nFinal state:") + for source_name, target_name in MIGRATIONS: + source_count = get_collection_count(client, source_name) + target_count = get_collection_count(client, target_name) + print(f" {source_name}: {source_count} objects") + print(f" {target_name}: {target_count} objects") + + if not args.dry_run: + print("\nNext steps:") + print(" 1. Update code: replace 'Chunk_v2' -> 'Chunk', 'Summary_v2' -> 'Summary'") + print(" 2. Test the application") + print(" 3. Run: python migrate_rename_collections.py --cleanup") + + finally: + client.close() + + +if __name__ == "__main__": + main() diff --git a/generations/library_rag/schema.py b/generations/library_rag/schema.py index 10502f9..d65defc 100644 --- a/generations/library_rag/schema.py +++ b/generations/library_rag/schema.py @@ -9,8 +9,8 @@ Schema Architecture: querying. The hierarchy is:: Work (metadata only) - ├── Chunk_v2 (vectorized text fragments) - └── Summary_v2 (vectorized chapter summaries) + ├── Chunk (vectorized text fragments) + └── Summary (vectorized chapter summaries) Collections: **Work** (no vectorization): @@ -18,21 +18,21 @@ Collections: Stores canonical metadata: title, author, year, language, genre. Not vectorized - used only for metadata and relationships. - **Chunk_v2** (manual GPU vectorization): + **Chunk** (manual GPU vectorization): Text fragments optimized for semantic search (200-800 chars). Vectorized with Python GPU embedder (BAAI/bge-m3, 1024-dim). Vectorized fields: text, keywords. Non-vectorized fields: workTitle, workAuthor, sectionPath, chapterTitle, unitType, orderIndex. Includes nested Work reference for denormalized access. - **Summary_v2** (manual GPU vectorization): + **Summary** (manual GPU vectorization): LLM-generated chapter/section summaries for high-level search. Vectorized with Python GPU embedder (BAAI/bge-m3, 1024-dim). Vectorized fields: text, concepts. Includes nested Work reference for denormalized access. Vectorization Strategy: - - Only Chunk_v2.text, Chunk_v2.keywords, Summary_v2.text, and Summary_v2.concepts are vectorized + - Only Chunk.text, Chunk.keywords, Summary.text, and Summary.concepts are vectorized - Manual vectorization with Python GPU embedder (BAAI/bge-m3, 1024-dim, RTX 4070) - Metadata fields use skip_vectorization=True for filtering only - Work collection has no vectorizer (metadata only) @@ -56,8 +56,8 @@ Nested Objects: denormalized data access. This allows single-query retrieval of chunk data with its Work metadata without joins:: - Chunk_v2.work = {title, author} - Summary_v2.work = {title, author} + Chunk.work = {title, author} + Summary.work = {title, author} Usage: From command line:: diff --git a/generations/library_rag/utils/weaviate_ingest.py b/generations/library_rag/utils/weaviate_ingest.py index 71f5e9e..d679ae1 100644 --- a/generations/library_rag/utils/weaviate_ingest.py +++ b/generations/library_rag/utils/weaviate_ingest.py @@ -190,8 +190,8 @@ class DeleteResult(TypedDict, total=False): Attributes: success: Whether deletion succeeded. error: Error message if deletion failed. - deleted_chunks: Number of chunks deleted from Chunk_v2 collection. - deleted_summaries: Number of summaries deleted from Summary_v2 collection. + deleted_chunks: Number of chunks deleted from Chunk collection. + deleted_summaries: Number of summaries deleted from Summary collection. Example: >>> result = delete_document_chunks("platon_republique") @@ -725,7 +725,7 @@ def ingest_summaries( Recursively processes nested TOC entries (children). """ try: - summary_collection: Collection[Any, Any] = client.collections.get("Summary_v2") + summary_collection: Collection[Any, Any] = client.collections.get("Summary") except Exception as e: logger.warning(f"Collection Summary non trouvée: {e}") return 0 @@ -833,9 +833,9 @@ def ingest_document( ) -> IngestResult: """Ingest document chunks into Weaviate with nested objects. - Main ingestion function that inserts chunks into the Chunk_v2 collection + Main ingestion function that inserts chunks into the Chunk collection with nested Work references. Optionally also creates entries in the - Summary_v2 collection. + Summary collection. This function uses batch insertion for optimal performance and constructs proper nested objects for filtering capabilities. @@ -856,7 +856,7 @@ def ingest_document( toc: Optional table of contents for Summary collection. hierarchy: Optional complete document hierarchy structure. pages: Number of pages in source document. Defaults to 0. - ingest_summary_collection: If True, also insert into Summary_v2 + ingest_summary_collection: If True, also insert into Summary collection (requires toc). Defaults to False. Returns: @@ -911,7 +911,7 @@ def ingest_document( # Récupérer la collection Chunk try: - chunk_collection: Collection[Any, Any] = client.collections.get("Chunk_v2") + chunk_collection: Collection[Any, Any] = client.collections.get("Chunk") except Exception as e: return IngestResult( success=False, @@ -983,14 +983,14 @@ def ingest_document( "keywords": chunk.get("concepts", chunk.get("keywords", [])), "language": language, "orderIndex": idx, - # Use flat fields instead of nested objects for Chunk_v2 schema + # Use flat fields instead of nested objects for Chunk schema "workTitle": title, "workAuthor": author, "year": metadata.get("year", 0) if metadata.get("year") else 0, - # Note: document reference fields not used in current Chunk_v2 schema + # Note: document reference fields not used in current Chunk schema } - # Note: Nested objects validation skipped for Chunk_v2 flat schema + # Note: Nested objects validation skipped for Chunk flat schema # validate_chunk_nested_objects(chunk_obj, idx, doc_name) objects_to_insert.append(chunk_obj) @@ -1130,7 +1130,7 @@ def delete_document_chunks(doc_name: str) -> DeleteResult: # Supprimer les chunks (filtrer sur document.sourceId nested) try: - chunk_collection: Collection[Any, Any] = client.collections.get("Chunk_v2") + chunk_collection: Collection[Any, Any] = client.collections.get("Chunk") result = chunk_collection.data.delete_many( where=wvq.Filter.by_property("document.sourceId").equal(doc_name) ) @@ -1140,7 +1140,7 @@ def delete_document_chunks(doc_name: str) -> DeleteResult: # Supprimer les summaries (filtrer sur document.sourceId nested) try: - summary_collection: Collection[Any, Any] = client.collections.get("Summary_v2") + summary_collection: Collection[Any, Any] = client.collections.get("Summary") result = summary_collection.data.delete_many( where=wvq.Filter.by_property("document.sourceId").equal(doc_name) )