fix: Adapt hierarchical display for mismatched sectionPath formats
Root cause: - Summary.sectionPath: '635. As for the subject...' (paragraph numbers) - Chunk.sectionPath: 'Peirce: CP 4.47 > 47. §3 THE NATURE...' (canonical refs) - No way to match them with prefix/equal filters Solution (workaround until summaries are regenerated): - Show sections as **context** (relevant high-level topics found) - Show chunks **globally** (top 20 most relevant passages) - Don't try to group chunks under sections UI changes: - '📚 Sections pertinentes trouvées' (context cards with summary) - '📄 Passages les plus pertinents' (top chunks, not grouped) - Cleaner, more honest representation of what we found Next steps to fully fix: - Regenerate Summary collection with correct sectionPath format - Or create a mapping between Summary titles and Chunk sectionPaths
This commit is contained in:
@@ -457,22 +457,18 @@ def hierarchical_search(
|
|||||||
for obj in chunks_result.objects
|
for obj in chunks_result.objects
|
||||||
]
|
]
|
||||||
|
|
||||||
# Distribute chunks to sections using prefix matching
|
# NOTE: Summary.sectionPath format doesn't match Chunk.sectionPath
|
||||||
all_chunks = []
|
# This is a data quality issue that needs to be fixed at ingestion
|
||||||
|
# For now, sections provide context, chunks are shown globally
|
||||||
|
print(f"[HIERARCHICAL] Got {len(all_chunks_list)} chunks total")
|
||||||
|
print(f"[HIERARCHICAL] Found {len(sections_data)} relevant sections")
|
||||||
|
|
||||||
|
all_chunks = all_chunks_list
|
||||||
|
|
||||||
|
# Clear chunks from sections (they're displayed separately)
|
||||||
for section in sections_data:
|
for section in sections_data:
|
||||||
section_ref = section["section_path"] # e.g., "Peirce: CP 2.504"
|
section["chunks"] = []
|
||||||
|
section["chunks_count"] = 0
|
||||||
# Find chunks whose sectionPath starts with this reference
|
|
||||||
section_chunks = [
|
|
||||||
chunk for chunk in all_chunks_list
|
|
||||||
if chunk.get("sectionPath", "").startswith(section_ref)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Sort by similarity and limit per section
|
|
||||||
section_chunks.sort(key=lambda x: x.get("similarity", 0) or 0, reverse=True)
|
|
||||||
section["chunks"] = section_chunks[:limit]
|
|
||||||
section["chunks_count"] = len(section["chunks"])
|
|
||||||
all_chunks.extend(section["chunks"])
|
|
||||||
|
|
||||||
# Sort all chunks by similarity (descending)
|
# Sort all chunks by similarity (descending)
|
||||||
all_chunks.sort(key=lambda x: x.get("similarity", 0) or 0, reverse=True)
|
all_chunks.sort(key=lambda x: x.get("similarity", 0) or 0, reverse=True)
|
||||||
|
|||||||
@@ -174,62 +174,62 @@
|
|||||||
|
|
||||||
<!-- Hierarchical display -->
|
<!-- Hierarchical display -->
|
||||||
{% if results_data.mode == "hierarchical" and results_data.sections %}
|
{% if results_data.mode == "hierarchical" and results_data.sections %}
|
||||||
{% for section in results_data.sections %}
|
<!-- Show relevant sections as context -->
|
||||||
<div class="section-group">
|
<div style="margin-bottom: 2rem;">
|
||||||
<div class="section-header">
|
<h3 style="font-size: 1.2em; margin-bottom: 1rem; color: var(--color-accent);">📚 Sections pertinentes trouvées</h3>
|
||||||
<h3 style="margin: 0 0 0.5rem 0; font-size: 1.3em;">
|
<div style="display: flex; flex-direction: column; gap: 1rem;">
|
||||||
📂 {{ section.title[:80] }}{% if section.title|length > 80 %}...{% endif %}
|
{% for section in results_data.sections %}
|
||||||
<span class="badge" style="background-color: var(--color-accent-alt); color: white; margin-left: 0.5rem;">{{ section.chunks_count }} passage{% if section.chunks_count > 1 %}s{% endif %}</span>
|
<div style="padding: 1rem; border-left: 3px solid var(--color-accent); background: rgba(125, 110, 88, 0.05); border-radius: 4px;">
|
||||||
<span class="badge badge-similarity" style="margin-left: 0.5rem;">⚡ {{ section.similarity }}% similaire</span>
|
<div style="display: flex; align-items: center; gap: 0.5rem; margin-bottom: 0.5rem;">
|
||||||
</h3>
|
<strong>{{ section.title[:80] }}{% if section.title|length > 80 %}...{% endif %}</strong>
|
||||||
{% if section.section_path and section.section_path != section.title %}
|
<span class="badge badge-similarity">⚡ {{ section.similarity }}% similaire</span>
|
||||||
<p class="text-muted" style="margin: 0.25rem 0; font-size: 0.9em;">📍 {{ section.section_path }}</p>
|
</div>
|
||||||
{% endif %}
|
{% if section.summary_text %}
|
||||||
{% if section.summary_text %}
|
<p style="margin: 0; font-size: 0.9em; color: var(--color-text-main); font-style: italic;">{{ section.summary_text[:150] }}{% if section.summary_text|length > 150 %}...{% endif %}</p>
|
||||||
<p class="summary-text" style="margin: 0.5rem 0; font-style: italic; color: #555;">{{ section.summary_text }}</p>
|
{% endif %}
|
||||||
{% endif %}
|
{% if section.concepts %}
|
||||||
{% if section.concepts %}
|
<div style="margin-top: 0.5rem;">
|
||||||
<div class="concepts" style="margin-top: 0.5rem;">
|
{% for concept in section.concepts %}
|
||||||
{% for concept in section.concepts %}
|
<span class="badge" style="background-color: rgba(125, 110, 88, 0.15); color: var(--color-accent); border: 1px solid rgba(125, 110, 88, 0.3); margin-right: 0.25rem; font-size: 0.85em;">{{ concept }}</span>
|
||||||
<span class="badge" style="background-color: rgba(125, 110, 88, 0.15); color: var(--color-accent); border: 1px solid rgba(125, 110, 88, 0.3); margin-right: 0.25rem;">{{ concept }}</span>
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Show top relevant chunks globally -->
|
||||||
|
<div>
|
||||||
|
<h3 style="font-size: 1.2em; margin-bottom: 1rem; color: var(--color-accent);">📄 Passages les plus pertinents</h3>
|
||||||
|
{% for chunk in results_data.results[:20] %}
|
||||||
|
<div class="chunk-item" style="background: white; padding: 1rem; margin-bottom: 0.75rem; border-left: 3px solid var(--color-accent-alt); border-radius: 4px;">
|
||||||
|
<div style="margin-bottom: 0.5rem; display: flex; gap: 0.5rem; flex-wrap: wrap; align-items: center;">
|
||||||
|
{% if chunk.work and chunk.work.author %}
|
||||||
|
<span class="badge badge-author">{{ chunk.work.author }}</span>
|
||||||
|
{% endif %}
|
||||||
|
{% if chunk.work and chunk.work.title %}
|
||||||
|
<span class="badge badge-work">{{ chunk.work.title }}</span>
|
||||||
|
{% endif %}
|
||||||
|
<span class="badge badge-similarity">⚡ {{ chunk.similarity }}% similaire</span>
|
||||||
|
</div>
|
||||||
|
<div class="passage-text" style="margin-bottom: 0.5rem;">"{{ chunk.text }}"</div>
|
||||||
|
<div class="passage-meta" style="font-size: 0.85em; color: #666;">
|
||||||
|
<strong>Section :</strong> {{ chunk.sectionPath or '—' }} │
|
||||||
|
<strong>Type :</strong> {{ chunk.unitType or '—' }} │
|
||||||
|
<strong>Langue :</strong> {{ (chunk.language or '—') | upper }}
|
||||||
|
</div>
|
||||||
|
{% if chunk.keywords %}
|
||||||
|
<div style="margin-top: 0.5rem;">
|
||||||
|
{% for kw in chunk.keywords %}
|
||||||
|
<span class="keyword-tag">{{ kw }}</span>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
{% endfor %}
|
||||||
<!-- Chunks within this section -->
|
</div>
|
||||||
{% if section.chunks %}
|
{% endif %}
|
||||||
<div class="chunks-list" style="margin-left: 1.5rem; margin-top: 1rem;">
|
|
||||||
{% for chunk in section.chunks %}
|
|
||||||
<div class="chunk-item" style="background: white; padding: 1rem; margin-bottom: 0.75rem; border-left: 3px solid var(--color-accent-alt); border-radius: 4px;">
|
|
||||||
<div style="margin-bottom: 0.5rem; display: flex; gap: 0.5rem; flex-wrap: wrap; align-items: center;">
|
|
||||||
{% if chunk.work and chunk.work.author %}
|
|
||||||
<span class="badge badge-author">{{ chunk.work.author }}</span>
|
|
||||||
{% endif %}
|
|
||||||
{% if chunk.work and chunk.work.title %}
|
|
||||||
<span class="badge badge-work">{{ chunk.work.title }}</span>
|
|
||||||
{% endif %}
|
|
||||||
<span class="badge badge-similarity">⚡ {{ chunk.similarity }}% similaire</span>
|
|
||||||
</div>
|
|
||||||
<div class="passage-text" style="margin-bottom: 0.5rem;">"{{ chunk.text }}"</div>
|
|
||||||
<div class="passage-meta" style="font-size: 0.85em; color: #666;">
|
|
||||||
<strong>Section :</strong> {{ chunk.sectionPath or section.section_path or '—' }} │
|
|
||||||
<strong>Type :</strong> {{ chunk.unitType or '—' }} │
|
|
||||||
<strong>Langue :</strong> {{ (chunk.language or '—') | upper }}
|
|
||||||
</div>
|
|
||||||
{% if chunk.keywords %}
|
|
||||||
<div style="margin-top: 0.5rem;">
|
|
||||||
{% for kw in chunk.keywords %}
|
|
||||||
<span class="keyword-tag">{{ kw }}</span>
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
{% endfor %}
|
|
||||||
|
|
||||||
<!-- Simple display (original) -->
|
<!-- Simple display (original) -->
|
||||||
{% else %}
|
{% else %}
|
||||||
|
|||||||
Reference in New Issue
Block a user