feat: Add vectorized summary field and migration tools
- Add 'summary' field to Chunk collection (vectorized with text2vec) - Migrate from Dynamic index to HNSW + RQ for both Chunk and Summary - Add LLM summarizer module (utils/llm_summarizer.py) - Add migration scripts (migrate_add_summary.py, restore_*.py) - Add summary generation utilities and progress tracking - Add testing and cleaning tools (outils_test_and_cleaning/) - Add comprehensive documentation (ANALYSE_*.md, guides) - Remove obsolete files (linear_config.py, old test files) - Update .gitignore to exclude backups and temp files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
32
generations/library_rag/sample_summaries.py
Normal file
32
generations/library_rag/sample_summaries.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""Récupère des exemples de résumés générés."""
|
||||
import weaviate
|
||||
|
||||
client = weaviate.connect_to_local()
|
||||
chunk_col = client.collections.get('Chunk')
|
||||
|
||||
# Récupérer les 10 premiers chunks avec résumé
|
||||
response = chunk_col.query.fetch_objects(limit=100)
|
||||
|
||||
summaries_found = 0
|
||||
for obj in response.objects:
|
||||
summary = obj.properties.get('summary', '')
|
||||
if summary and summary != '':
|
||||
text = obj.properties.get('text', '')
|
||||
work = obj.properties.get('work', {})
|
||||
|
||||
print("=" * 80)
|
||||
print(f"WORK: {work.get('title', 'N/A')} - {work.get('author', 'N/A')}")
|
||||
print("=" * 80)
|
||||
print(f"\nTEXTE ORIGINAL ({len(text)} chars):")
|
||||
print(text[:300] + "..." if len(text) > 300 else text)
|
||||
print(f"\nRÉSUMÉ GÉNÉRÉ ({len(summary)} chars):")
|
||||
print(summary)
|
||||
print("\n")
|
||||
|
||||
summaries_found += 1
|
||||
if summaries_found >= 5:
|
||||
break
|
||||
|
||||
client.close()
|
||||
|
||||
print(f"\n✓ {summaries_found} exemples affichés")
|
||||
Reference in New Issue
Block a user