diff --git a/generations/library_rag/flask_app.py b/generations/library_rag/flask_app.py index 894e36b..92f20a2 100644 --- a/generations/library_rag/flask_app.py +++ b/generations/library_rag/flask_app.py @@ -457,22 +457,18 @@ def hierarchical_search( for obj in chunks_result.objects ] - # Distribute chunks to sections using prefix matching - all_chunks = [] + # NOTE: Summary.sectionPath format doesn't match Chunk.sectionPath + # This is a data quality issue that needs to be fixed at ingestion + # For now, sections provide context, chunks are shown globally + print(f"[HIERARCHICAL] Got {len(all_chunks_list)} chunks total") + print(f"[HIERARCHICAL] Found {len(sections_data)} relevant sections") + + all_chunks = all_chunks_list + + # Clear chunks from sections (they're displayed separately) for section in sections_data: - section_ref = section["section_path"] # e.g., "Peirce: CP 2.504" - - # Find chunks whose sectionPath starts with this reference - section_chunks = [ - chunk for chunk in all_chunks_list - if chunk.get("sectionPath", "").startswith(section_ref) - ] - - # Sort by similarity and limit per section - section_chunks.sort(key=lambda x: x.get("similarity", 0) or 0, reverse=True) - section["chunks"] = section_chunks[:limit] - section["chunks_count"] = len(section["chunks"]) - all_chunks.extend(section["chunks"]) + section["chunks"] = [] + section["chunks_count"] = 0 # Sort all chunks by similarity (descending) all_chunks.sort(key=lambda x: x.get("similarity", 0) or 0, reverse=True) diff --git a/generations/library_rag/templates/search.html b/generations/library_rag/templates/search.html index 13a8397..23e35a7 100644 --- a/generations/library_rag/templates/search.html +++ b/generations/library_rag/templates/search.html @@ -174,62 +174,62 @@ {% if results_data.mode == "hierarchical" and results_data.sections %} - {% for section in results_data.sections %} -
-
-

- πŸ“‚ {{ section.title[:80] }}{% if section.title|length > 80 %}...{% endif %} - {{ section.chunks_count }} passage{% if section.chunks_count > 1 %}s{% endif %} - ⚑ {{ section.similarity }}% similaire -

- {% if section.section_path and section.section_path != section.title %} -

πŸ“ {{ section.section_path }}

- {% endif %} - {% if section.summary_text %} -

{{ section.summary_text }}

- {% endif %} - {% if section.concepts %} -
- {% for concept in section.concepts %} - {{ concept }} + +
+

πŸ“š Sections pertinentes trouvΓ©es

+
+ {% for section in results_data.sections %} +
+
+ {{ section.title[:80] }}{% if section.title|length > 80 %}...{% endif %} + ⚑ {{ section.similarity }}% similaire +
+ {% if section.summary_text %} +

{{ section.summary_text[:150] }}{% if section.summary_text|length > 150 %}...{% endif %}

+ {% endif %} + {% if section.concepts %} +
+ {% for concept in section.concepts %} + {{ concept }} + {% endfor %} +
+ {% endif %} +
+ {% endfor %} +
+
+ + +
+

πŸ“„ Passages les plus pertinents

+ {% for chunk in results_data.results[:20] %} +
+
+ {% if chunk.work and chunk.work.author %} + {{ chunk.work.author }} + {% endif %} + {% if chunk.work and chunk.work.title %} + {{ chunk.work.title }} + {% endif %} + ⚑ {{ chunk.similarity }}% similaire +
+
"{{ chunk.text }}"
+
+ Section : {{ chunk.sectionPath or 'β€”' }}  β”‚  + Type : {{ chunk.unitType or 'β€”' }}  β”‚  + Langue : {{ (chunk.language or 'β€”') | upper }} +
+ {% if chunk.keywords %} +
+ {% for kw in chunk.keywords %} + {{ kw }} {% endfor %}
{% endif %}
- - - {% if section.chunks %} -
- {% for chunk in section.chunks %} -
-
- {% if chunk.work and chunk.work.author %} - {{ chunk.work.author }} - {% endif %} - {% if chunk.work and chunk.work.title %} - {{ chunk.work.title }} - {% endif %} - ⚑ {{ chunk.similarity }}% similaire -
-
"{{ chunk.text }}"
-
- Section : {{ chunk.sectionPath or section.section_path or 'β€”' }}  β”‚  - Type : {{ chunk.unitType or 'β€”' }}  β”‚  - Langue : {{ (chunk.language or 'β€”') | upper }} -
- {% if chunk.keywords %} -
- {% for kw in chunk.keywords %} - {{ kw }} - {% endfor %} -
- {% endif %} -
- {% endfor %} -
- {% endif %} -
- {% endfor %} + {% endfor %} +
+ {% endif %} {% else %}