feat: Add vectorized summary field and migration tools

- Add 'summary' field to Chunk collection (vectorized with text2vec) - Migrate from Dynamic index to HNSW + RQ for both Chunk and Summary - Add LLM summarizer module (utils/llm_summarizer.py) - Add migration scripts (migrate_add_summary.py, restore_*.py) - Add summary generation utilities and progress tracking - Add testing and cleaning tools (outils_test_and_cleaning/) - Add comprehensive documentation (ANALYSE_*.md, guides) - Remove obsolete files (linear_config.py, old test files) - Update .gitignore to exclude backups and temp files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-07 22:56:03 +01:00
parent feb215dae0
commit 636ad6206c
40 changed files with 11937 additions and 712 deletions
--- a/generations/library_rag/api_get_works.py
+++ b/generations/library_rag/api_get_works.py
@@ -0,0 +1,80 @@
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# Works Filter API
+# ═══════════════════════════════════════════════════════════════════════════════
+
+@app.route("/api/get-works")
+def api_get_works() -> WerkzeugResponse:
+    """Get list of all available works with metadata for filtering.
+
+    Returns a JSON array of all unique works in the database, sorted by author
+    then title. Each work includes the title, author, and number of chunks.
+
+    Returns:
+        JSON response with array of works:
+        [
+            {"title": "Ménon", "author": "Platon", "chunks_count": 127},
+            ...
+        ]
+
+    Raises:
+        500: If Weaviate connection fails or query errors occur.
+
+    Example:
+        GET /api/get-works
+        Returns: [{"title": "Ménon", "author": "Platon", "chunks_count": 127}, ...]
+    """
+    try:
+        with get_weaviate_client() as client:
+            if client is None:
+                return jsonify({
+                    "error": "Weaviate connection failed",
+                    "message": "Cannot connect to Weaviate database"
+                }), 500
+
+            # Query Chunk collection to get all unique works with counts
+            chunks = client.collections.get("Chunk")
+
+            # Fetch all chunks to aggregate by work
+            # Using a larger limit to get all documents
+            all_chunks = chunks.query.fetch_objects(
+                limit=10000,
+                return_properties=["work"]
+            )
+
+            # Aggregate chunks by work (title + author)
+            works_count: Dict[str, Dict[str, Any]] = {}
+
+            for obj in all_chunks.objects:
+                work_obj = obj.properties.get("work")
+                if work_obj and isinstance(work_obj, dict):
+                    title = work_obj.get("title", "")
+                    author = work_obj.get("author", "")
+
+                    if title:  # Only count if title exists
+                        # Use title as key (assumes unique titles)
+                        if title not in works_count:
+                            works_count[title] = {
+                                "title": title,
+                                "author": author or "Unknown",
+                                "chunks_count": 0
+                            }
+                        works_count[title]["chunks_count"] += 1
+
+            # Convert to list and sort by author, then title
+            works_list = list(works_count.values())
+            works_list.sort(key=lambda w: (w["author"].lower(), w["title"].lower()))
+
+            print(f"[API] /api/get-works: Found {len(works_list)} unique works")
+
+            return jsonify(works_list)
+
+    except Exception as e:
+        print(f"[API] /api/get-works error: {e}")
+        return jsonify({
+            "error": "Database query failed",
+            "message": str(e)
+        }), 500
+
+