Files
linear-coding-agent/generations/library_rag/tests/mcp/test_schemas.py
David Blanc Brioir d2f7165120 Add Library RAG project and cleanup root directory
- Add complete Library RAG application (Flask + MCP server)
  - PDF processing pipeline with OCR and LLM extraction
  - Weaviate vector database integration (BGE-M3 embeddings)
  - Flask web interface with search and document management
  - MCP server for Claude Desktop integration
  - Comprehensive test suite (134 tests)

- Clean up root directory
  - Remove obsolete documentation files
  - Remove backup and temporary files
  - Update autonomous agent configuration

- Update prompts
  - Enhance initializer bis prompt with better instructions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-30 11:57:12 +01:00

257 lines
8.1 KiB
Python

"""
Unit tests for MCP Pydantic schemas.
Tests schema validation, field constraints, and JSON schema generation.
"""
import pytest
from pydantic import ValidationError
from mcp_tools.schemas import (
ParsePdfInput,
ParsePdfOutput,
SearchChunksInput,
SearchChunksOutput,
SearchSummariesInput,
GetDocumentInput,
ListDocumentsInput,
GetChunksByDocumentInput,
FilterByAuthorInput,
DeleteDocumentInput,
ChunkResult,
DocumentInfo,
)
class TestParsePdfInput:
"""Test ParsePdfInput schema validation."""
def test_valid_path(self) -> None:
"""Test valid PDF path is accepted."""
input_data = ParsePdfInput(pdf_path="/path/to/document.pdf")
assert input_data.pdf_path == "/path/to/document.pdf"
def test_valid_url(self) -> None:
"""Test valid URL is accepted."""
input_data = ParsePdfInput(pdf_path="https://example.com/doc.pdf")
assert input_data.pdf_path == "https://example.com/doc.pdf"
def test_empty_path_rejected(self) -> None:
"""Test empty path raises validation error."""
with pytest.raises(ValidationError) as exc_info:
ParsePdfInput(pdf_path="")
assert "string_too_short" in str(exc_info.value).lower()
class TestParsePdfOutput:
"""Test ParsePdfOutput schema."""
def test_full_output(self) -> None:
"""Test creating complete output."""
output = ParsePdfOutput(
success=True,
document_name="test-doc",
source_id="test-doc-v1",
pages=10,
chunks_count=25,
cost_ocr=0.03,
cost_llm=0.01,
cost_total=0.04,
output_dir="/output/test-doc",
metadata={"title": "Test", "author": "Unknown"},
)
assert output.success is True
assert output.cost_total == 0.04
assert output.metadata["title"] == "Test"
def test_output_with_error(self) -> None:
"""Test output with error field set."""
output = ParsePdfOutput(
success=False,
document_name="failed-doc",
source_id="",
pages=0,
chunks_count=0,
cost_ocr=0.0,
cost_llm=0.0,
cost_total=0.0,
output_dir="",
error="PDF processing failed: corrupted file",
)
assert output.success is False
assert "corrupted" in output.error # type: ignore
class TestSearchChunksInput:
"""Test SearchChunksInput schema validation."""
def test_minimal_input(self) -> None:
"""Test minimal valid input."""
input_data = SearchChunksInput(query="test query")
assert input_data.query == "test query"
assert input_data.limit == 10 # default
assert input_data.min_similarity == 0.0 # default
def test_full_input(self) -> None:
"""Test input with all fields."""
input_data = SearchChunksInput(
query="What is justice?",
limit=20,
min_similarity=0.5,
author_filter="Platon",
work_filter="Republic",
language_filter="fr",
)
assert input_data.limit == 20
assert input_data.author_filter == "Platon"
def test_empty_query_rejected(self) -> None:
"""Test empty query raises error."""
with pytest.raises(ValidationError):
SearchChunksInput(query="")
def test_query_too_long_rejected(self) -> None:
"""Test query over 1000 chars is rejected."""
with pytest.raises(ValidationError):
SearchChunksInput(query="a" * 1001)
def test_limit_bounds(self) -> None:
"""Test limit validation bounds."""
with pytest.raises(ValidationError):
SearchChunksInput(query="test", limit=0)
with pytest.raises(ValidationError):
SearchChunksInput(query="test", limit=101)
def test_similarity_bounds(self) -> None:
"""Test similarity validation bounds."""
with pytest.raises(ValidationError):
SearchChunksInput(query="test", min_similarity=-0.1)
with pytest.raises(ValidationError):
SearchChunksInput(query="test", min_similarity=1.1)
class TestSearchSummariesInput:
"""Test SearchSummariesInput schema validation."""
def test_level_filters(self) -> None:
"""Test min/max level filters."""
input_data = SearchSummariesInput(
query="test",
min_level=1,
max_level=3,
)
assert input_data.min_level == 1
assert input_data.max_level == 3
def test_level_bounds(self) -> None:
"""Test level validation bounds."""
with pytest.raises(ValidationError):
SearchSummariesInput(query="test", min_level=0)
with pytest.raises(ValidationError):
SearchSummariesInput(query="test", max_level=6)
class TestGetDocumentInput:
"""Test GetDocumentInput schema validation."""
def test_defaults(self) -> None:
"""Test default values."""
input_data = GetDocumentInput(source_id="doc-123")
assert input_data.include_chunks is False
assert input_data.chunk_limit == 50
def test_with_chunks(self) -> None:
"""Test requesting chunks."""
input_data = GetDocumentInput(
source_id="doc-123",
include_chunks=True,
chunk_limit=100,
)
assert input_data.include_chunks is True
assert input_data.chunk_limit == 100
class TestDeleteDocumentInput:
"""Test DeleteDocumentInput schema validation."""
def test_requires_confirmation(self) -> None:
"""Test confirm defaults to False."""
input_data = DeleteDocumentInput(source_id="doc-to-delete")
assert input_data.confirm is False
def test_with_confirmation(self) -> None:
"""Test explicit confirmation."""
input_data = DeleteDocumentInput(
source_id="doc-to-delete",
confirm=True,
)
assert input_data.confirm is True
class TestChunkResult:
"""Test ChunkResult model."""
def test_full_chunk(self) -> None:
"""Test creating full chunk result."""
chunk = ChunkResult(
text="This is the chunk content.",
similarity=0.85,
section_path="Chapter 1 > Section 1",
chapter_title="Introduction",
work_title="The Republic",
work_author="Platon",
order_index=5,
)
assert chunk.similarity == 0.85
assert chunk.order_index == 5
class TestDocumentInfo:
"""Test DocumentInfo model."""
def test_with_optional_fields(self) -> None:
"""Test DocumentInfo with all fields."""
doc = DocumentInfo(
source_id="platon-republic",
work_title="The Republic",
work_author="Platon",
edition="GF Flammarion",
pages=500,
language="fr",
toc={"chapters": ["I", "II", "III"]},
hierarchy={"level": 1},
)
assert doc.toc is not None
assert doc.hierarchy is not None
class TestJsonSchemaGeneration:
"""Test JSON schema generation from Pydantic models."""
def test_schemas_have_descriptions(self) -> None:
"""Test all fields have descriptions for JSON schema."""
schema = SearchChunksInput.model_json_schema()
# Check field descriptions exist
properties = schema["properties"]
assert "description" in properties["query"]
assert "description" in properties["limit"]
assert "description" in properties["min_similarity"]
def test_schema_includes_constraints(self) -> None:
"""Test validation constraints are in JSON schema."""
schema = SearchChunksInput.model_json_schema()
props = schema["properties"]
# Check minLength constraint
assert props["query"].get("minLength") == 1
assert props["query"].get("maxLength") == 1000
# Check numeric constraints
assert props["limit"].get("minimum") == 1
assert props["limit"].get("maximum") == 100
if __name__ == "__main__":
pytest.main([__file__, "-v"])