From d8242696068e35ef20143e3b68352bb2aeb80a70 Mon Sep 17 00:00:00 2001 From: David Blanc Brioir Date: Thu, 1 Jan 2026 15:51:11 +0100 Subject: [PATCH] fix: Adapt hierarchical display for mismatched sectionPath formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: - Summary.sectionPath: '635. As for the subject...' (paragraph numbers) - Chunk.sectionPath: 'Peirce: CP 4.47 > 47. Β§3 THE NATURE...' (canonical refs) - No way to match them with prefix/equal filters Solution (workaround until summaries are regenerated): - Show sections as **context** (relevant high-level topics found) - Show chunks **globally** (top 20 most relevant passages) - Don't try to group chunks under sections UI changes: - 'πŸ“š Sections pertinentes trouvΓ©es' (context cards with summary) - 'πŸ“„ Passages les plus pertinents' (top chunks, not grouped) - Cleaner, more honest representation of what we found Next steps to fully fix: - Regenerate Summary collection with correct sectionPath format - Or create a mapping between Summary titles and Chunk sectionPaths --- generations/library_rag/flask_app.py | 26 ++--- generations/library_rag/templates/search.html | 104 +++++++++--------- 2 files changed, 63 insertions(+), 67 deletions(-) diff --git a/generations/library_rag/flask_app.py b/generations/library_rag/flask_app.py index 894e36b..92f20a2 100644 --- a/generations/library_rag/flask_app.py +++ b/generations/library_rag/flask_app.py @@ -457,22 +457,18 @@ def hierarchical_search( for obj in chunks_result.objects ] - # Distribute chunks to sections using prefix matching - all_chunks = [] + # NOTE: Summary.sectionPath format doesn't match Chunk.sectionPath + # This is a data quality issue that needs to be fixed at ingestion + # For now, sections provide context, chunks are shown globally + print(f"[HIERARCHICAL] Got {len(all_chunks_list)} chunks total") + print(f"[HIERARCHICAL] Found {len(sections_data)} relevant sections") + + all_chunks = all_chunks_list + + # Clear chunks from sections (they're displayed separately) for section in sections_data: - section_ref = section["section_path"] # e.g., "Peirce: CP 2.504" - - # Find chunks whose sectionPath starts with this reference - section_chunks = [ - chunk for chunk in all_chunks_list - if chunk.get("sectionPath", "").startswith(section_ref) - ] - - # Sort by similarity and limit per section - section_chunks.sort(key=lambda x: x.get("similarity", 0) or 0, reverse=True) - section["chunks"] = section_chunks[:limit] - section["chunks_count"] = len(section["chunks"]) - all_chunks.extend(section["chunks"]) + section["chunks"] = [] + section["chunks_count"] = 0 # Sort all chunks by similarity (descending) all_chunks.sort(key=lambda x: x.get("similarity", 0) or 0, reverse=True) diff --git a/generations/library_rag/templates/search.html b/generations/library_rag/templates/search.html index 13a8397..23e35a7 100644 --- a/generations/library_rag/templates/search.html +++ b/generations/library_rag/templates/search.html @@ -174,62 +174,62 @@ {% if results_data.mode == "hierarchical" and results_data.sections %} - {% for section in results_data.sections %} -
-
-

- πŸ“‚ {{ section.title[:80] }}{% if section.title|length > 80 %}...{% endif %} - {{ section.chunks_count }} passage{% if section.chunks_count > 1 %}s{% endif %} - ⚑ {{ section.similarity }}% similaire -

- {% if section.section_path and section.section_path != section.title %} -

πŸ“ {{ section.section_path }}

- {% endif %} - {% if section.summary_text %} -

{{ section.summary_text }}

- {% endif %} - {% if section.concepts %} -
- {% for concept in section.concepts %} - {{ concept }} + +
+

πŸ“š Sections pertinentes trouvΓ©es

+
+ {% for section in results_data.sections %} +
+
+ {{ section.title[:80] }}{% if section.title|length > 80 %}...{% endif %} + ⚑ {{ section.similarity }}% similaire +
+ {% if section.summary_text %} +

{{ section.summary_text[:150] }}{% if section.summary_text|length > 150 %}...{% endif %}

+ {% endif %} + {% if section.concepts %} +
+ {% for concept in section.concepts %} + {{ concept }} + {% endfor %} +
+ {% endif %} +
+ {% endfor %} +
+
+ + +
+

πŸ“„ Passages les plus pertinents

+ {% for chunk in results_data.results[:20] %} +
+
+ {% if chunk.work and chunk.work.author %} + {{ chunk.work.author }} + {% endif %} + {% if chunk.work and chunk.work.title %} + {{ chunk.work.title }} + {% endif %} + ⚑ {{ chunk.similarity }}% similaire +
+
"{{ chunk.text }}"
+
+ Section : {{ chunk.sectionPath or 'β€”' }}  β”‚  + Type : {{ chunk.unitType or 'β€”' }}  β”‚  + Langue : {{ (chunk.language or 'β€”') | upper }} +
+ {% if chunk.keywords %} +
+ {% for kw in chunk.keywords %} + {{ kw }} {% endfor %}
{% endif %}
- - - {% if section.chunks %} -
- {% for chunk in section.chunks %} -
-
- {% if chunk.work and chunk.work.author %} - {{ chunk.work.author }} - {% endif %} - {% if chunk.work and chunk.work.title %} - {{ chunk.work.title }} - {% endif %} - ⚑ {{ chunk.similarity }}% similaire -
-
"{{ chunk.text }}"
-
- Section : {{ chunk.sectionPath or section.section_path or 'β€”' }}  β”‚  - Type : {{ chunk.unitType or 'β€”' }}  β”‚  - Langue : {{ (chunk.language or 'β€”') | upper }} -
- {% if chunk.keywords %} -
- {% for kw in chunk.keywords %} - {{ kw }} - {% endfor %} -
- {% endif %} -
- {% endfor %} -
- {% endif %} -
- {% endfor %} + {% endfor %} +
+ {% endif %} {% else %}