refactor: Integrate summary search into dropdown and fix hierarchical mode

Previously created a separate page for summary search, which was redundant since hierarchical mode already demonstrates the summary→chunk pattern. Refactored to integrate summary-only mode as a dropdown option in the main search interface, reducing code duplication by ~370 lines.

Also fixed critical bug in hierarchical search where return_properties excluded the nested "document" object, causing source_id to be empty and all sections to be filtered out. Solution: removed return_properties to let Weaviate return all properties including nested objects.

All 4 search modes now functional:
- Auto-detection (default)
- Simple chunks (10% visibility)
- Hierarchical summary→chunks (variable)
- Summary-only (90% visibility)

Tests: 14/14 passed for dropdown integration, hierarchical mode confirmed working with 13 passages across 4 section groups.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-03 17:59:58 +01:00
parent b76e56e62e
commit 8c0e1cef0d
7 changed files with 1132 additions and 16 deletions

View File

@@ -339,9 +339,8 @@ def hierarchical_search(
query=query,
limit=sections_limit,
return_metadata=wvq.MetadataQuery(distance=True),
return_properties=[
"sectionPath", "title", "text", "level", "concepts"
],
# Note: Don't specify return_properties - let Weaviate return all properties
# including nested objects like "document" which we need for source_id
)
if not summaries_result.objects:
@@ -550,6 +549,110 @@ def should_use_hierarchical_search(query: str) -> bool:
return False
def summary_only_search(
query: str,
limit: int = 10,
author_filter: Optional[str] = None,
work_filter: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""Summary-only semantic search on Summary collection (90% visibility).
Searches high-level section summaries instead of detailed chunks. Offers
90% visibility of rich documents vs 10% for direct chunk search due to
Peirce chunk dominance (5,068/5,230 = 97% of chunks).
Args:
query: Search query text.
limit: Maximum number of summary results to return.
author_filter: Filter by author name (uses document.author property).
work_filter: Filter by work title (uses document.title property).
Returns:
List of summary dictionaries formatted as "results" with:
- uuid, similarity, text, title, concepts, doc_icon, doc_name
- author, year, chunks_count, section_path
"""
try:
with get_weaviate_client() as client:
if client is None:
return []
summaries = client.collections.get("Summary")
# Note: Cannot filter by nested document properties directly in Weaviate v4
# Must fetch all and filter in Python if author/work filters are present
# Semantic search
results = summaries.query.near_text(
query=query,
limit=limit * 3 if (author_filter or work_filter) else limit, # Fetch more if filtering
return_metadata=wvq.MetadataQuery(distance=True)
)
# Format and filter results
formatted_results: List[Dict[str, Any]] = []
for obj in results.objects:
props = obj.properties
similarity = 1 - obj.metadata.distance
# Apply filters (Python-side since nested properties)
if author_filter and props["document"].get("author", "") != author_filter:
continue
if work_filter and props["document"].get("title", "") != work_filter:
continue
# Determine document icon and name
doc_id = props["document"]["sourceId"].lower()
if "tiercelin" in doc_id:
doc_icon = "🟡"
doc_name = "Tiercelin"
elif "platon" in doc_id or "menon" in doc_id:
doc_icon = "🟢"
doc_name = "Platon"
elif "haugeland" in doc_id:
doc_icon = "🟣"
doc_name = "Haugeland"
elif "logique" in doc_id:
doc_icon = "🔵"
doc_name = "Logique"
else:
doc_icon = ""
doc_name = "Peirce"
# Format result (compatible with existing template expectations)
result = {
"uuid": str(obj.uuid),
"similarity": round(similarity * 100, 1), # Convert to percentage
"text": props.get("text", ""),
"title": props["title"],
"concepts": props.get("concepts", []),
"doc_icon": doc_icon,
"doc_name": doc_name,
"author": props["document"].get("author", ""),
"year": props["document"].get("year", 0),
"chunks_count": props.get("chunksCount", 0),
"section_path": props.get("sectionPath", ""),
"sectionPath": props.get("sectionPath", ""), # Alias for template compatibility
# Add work info for template compatibility
"work": {
"title": props["document"].get("title", ""),
"author": props["document"].get("author", ""),
},
}
formatted_results.append(result)
# Stop if we have enough results after filtering
if len(formatted_results) >= limit:
break
return formatted_results
except Exception as e:
print(f"Error in summary_only_search: {e}")
return []
def search_passages(
query: str,
limit: int = 10,
@@ -560,9 +663,8 @@ def search_passages(
) -> Dict[str, Any]:
"""Intelligent semantic search dispatcher with auto-detection.
Automatically chooses between simple (1-stage) and hierarchical (2-stage)
search based on query complexity. Complex queries use hierarchical search
for better precision and context.
Automatically chooses between simple (1-stage), hierarchical (2-stage),
or summary-only search based on query complexity or user selection.
Args:
query: Search query text.
@@ -570,14 +672,14 @@ def search_passages(
author_filter: Filter by author name (uses workAuthor property).
work_filter: Filter by work title (uses workTitle property).
sections_limit: Number of top sections for hierarchical search (default: 5).
force_mode: Force search mode ("simple", "hierarchical", or None for auto).
force_mode: Force search mode ("simple", "hierarchical", "summary", or None for auto).
Returns:
Dictionary with search results:
- mode: "simple" or "hierarchical"
- results: List of passage dictionaries (flat)
- mode: "simple", "hierarchical", or "summary"
- results: List of passage/summary dictionaries (flat)
- sections: List of section dicts with nested chunks (hierarchical only)
- total_chunks: Total number of chunks found
- total_chunks: Total number of chunks/summaries found
Examples:
>>> # Short query → auto-detects simple search
@@ -588,11 +690,20 @@ def search_passages(
>>> search_passages("Qu'est-ce que la vertu selon Aristote ?", limit=5)
{"mode": "hierarchical", "sections": [...], "results": [...], "total_chunks": 15}
>>> # Force hierarchical mode
>>> search_passages("justice", force_mode="hierarchical", sections_limit=3)
{"mode": "hierarchical", ...}
>>> # Force summary-only mode (90% visibility, high-level overviews)
>>> search_passages("What is the Turing test?", force_mode="summary", limit=10)
{"mode": "summary", "results": [...], "total_chunks": 7}
"""
# Determine search mode
# Handle summary-only mode
if force_mode == "summary":
results = summary_only_search(query, limit, author_filter, work_filter)
return {
"mode": "summary",
"results": results,
"total_chunks": len(results),
}
# Determine search mode for simple vs hierarchical
if force_mode == "simple":
use_hierarchical = False
elif force_mode == "hierarchical":