refactor: Rename Chunk_v2/Summary_v2 collections to Chunk/Summary

- Add migrate_rename_collections.py script for data migration - Update flask_app.py to use new collection names - Update weaviate_ingest.py to use new collection names - Update schema.py documentation - Update README.md and ANALYSE_MCP_TOOLS.md Migration completed: 5372 chunks + 114 summaries preserved with vectors. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 23:59:03 +01:00
parent 5a732e885f
commit 1bf570e201
6 changed files with 383 additions and 46 deletions
--- a/generations/library_rag/utils/weaviate_ingest.py
+++ b/generations/library_rag/utils/weaviate_ingest.py
@@ -190,8 +190,8 @@ class DeleteResult(TypedDict, total=False):
    Attributes:
        success: Whether deletion succeeded.
        error: Error message if deletion failed.
-        deleted_chunks: Number of chunks deleted from Chunk_v2 collection.
-        deleted_summaries: Number of summaries deleted from Summary_v2 collection.
+        deleted_chunks: Number of chunks deleted from Chunk collection.
+        deleted_summaries: Number of summaries deleted from Summary collection.

    Example:
        >>> result = delete_document_chunks("platon_republique")
@@ -725,7 +725,7 @@ def ingest_summaries(
        Recursively processes nested TOC entries (children).
    """
    try:
-        summary_collection: Collection[Any, Any] = client.collections.get("Summary_v2")
+        summary_collection: Collection[Any, Any] = client.collections.get("Summary")
    except Exception as e:
        logger.warning(f"Collection Summary non trouvée: {e}")
        return 0
@@ -833,9 +833,9 @@ def ingest_document(
 ) -> IngestResult:
    """Ingest document chunks into Weaviate with nested objects.

-    Main ingestion function that inserts chunks into the Chunk_v2 collection
+    Main ingestion function that inserts chunks into the Chunk collection
    with nested Work references. Optionally also creates entries in the
-    Summary_v2 collection.
+    Summary collection.

    This function uses batch insertion for optimal performance and
    constructs proper nested objects for filtering capabilities.
@@ -856,7 +856,7 @@ def ingest_document(
        toc: Optional table of contents for Summary collection.
        hierarchy: Optional complete document hierarchy structure.
        pages: Number of pages in source document. Defaults to 0.
-        ingest_summary_collection: If True, also insert into Summary_v2
+        ingest_summary_collection: If True, also insert into Summary
            collection (requires toc). Defaults to False.

    Returns:
@@ -911,7 +911,7 @@ def ingest_document(

            # Récupérer la collection Chunk
            try:
-                chunk_collection: Collection[Any, Any] = client.collections.get("Chunk_v2")
+                chunk_collection: Collection[Any, Any] = client.collections.get("Chunk")
            except Exception as e:
                return IngestResult(
                    success=False,
@@ -983,14 +983,14 @@ def ingest_document(
                    "keywords": chunk.get("concepts", chunk.get("keywords", [])),
                    "language": language,
                    "orderIndex": idx,
-                    # Use flat fields instead of nested objects for Chunk_v2 schema
+                    # Use flat fields instead of nested objects for Chunk schema
                    "workTitle": title,
                    "workAuthor": author,
                    "year": metadata.get("year", 0) if metadata.get("year") else 0,
-                    # Note: document reference fields not used in current Chunk_v2 schema
+                    # Note: document reference fields not used in current Chunk schema
                }

-                # Note: Nested objects validation skipped for Chunk_v2 flat schema
+                # Note: Nested objects validation skipped for Chunk flat schema
                # validate_chunk_nested_objects(chunk_obj, idx, doc_name)

                objects_to_insert.append(chunk_obj)
@@ -1130,7 +1130,7 @@ def delete_document_chunks(doc_name: str) -> DeleteResult:

            # Supprimer les chunks (filtrer sur document.sourceId nested)
            try:
-                chunk_collection: Collection[Any, Any] = client.collections.get("Chunk_v2")
+                chunk_collection: Collection[Any, Any] = client.collections.get("Chunk")
                result = chunk_collection.data.delete_many(
                    where=wvq.Filter.by_property("document.sourceId").equal(doc_name)
                )
@@ -1140,7 +1140,7 @@ def delete_document_chunks(doc_name: str) -> DeleteResult:

            # Supprimer les summaries (filtrer sur document.sourceId nested)
            try:
-                summary_collection: Collection[Any, Any] = client.collections.get("Summary_v2")
+                summary_collection: Collection[Any, Any] = client.collections.get("Summary")
                result = summary_collection.data.delete_many(
                    where=wvq.Filter.by_property("document.sourceId").equal(doc_name)
                )