Add Library RAG project and cleanup root directory
- Add complete Library RAG application (Flask + MCP server) - PDF processing pipeline with OCR and LLM extraction - Weaviate vector database integration (BGE-M3 embeddings) - Flask web interface with search and document management - MCP server for Claude Desktop integration - Comprehensive test suite (134 tests) - Clean up root directory - Remove obsolete documentation files - Remove backup and temporary files - Update autonomous agent configuration - Update prompts - Enhance initializer bis prompt with better instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
1
generations/library_rag/tests/mcp/__init__.py
Normal file
1
generations/library_rag/tests/mcp/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""MCP server unit tests."""
|
||||
196
generations/library_rag/tests/mcp/conftest.py
Normal file
196
generations/library_rag/tests/mcp/conftest.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""
|
||||
Pytest fixtures for MCP server tests.
|
||||
|
||||
Provides common fixtures for mocking dependencies and test data.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Generator
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from mcp_config import MCPConfig
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_env_with_api_key() -> Generator[Dict[str, str], None, None]:
|
||||
"""
|
||||
Provide environment with MISTRAL_API_KEY set.
|
||||
|
||||
Yields:
|
||||
Dictionary of environment variables.
|
||||
"""
|
||||
env = {"MISTRAL_API_KEY": "test-api-key-12345"}
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
yield env
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def valid_config() -> MCPConfig:
|
||||
"""
|
||||
Provide a valid MCPConfig instance for testing.
|
||||
|
||||
Returns:
|
||||
MCPConfig with valid test values.
|
||||
"""
|
||||
return MCPConfig(
|
||||
mistral_api_key="test-api-key",
|
||||
ollama_base_url="http://localhost:11434",
|
||||
structure_llm_model="test-model",
|
||||
structure_llm_temperature=0.2,
|
||||
default_llm_provider="ollama",
|
||||
weaviate_host="localhost",
|
||||
weaviate_port=8080,
|
||||
log_level="INFO",
|
||||
output_dir=Path("test_output"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_weaviate_client() -> Generator[MagicMock, None, None]:
|
||||
"""
|
||||
Provide a mocked Weaviate client.
|
||||
|
||||
Yields:
|
||||
MagicMock configured as a Weaviate client.
|
||||
"""
|
||||
with patch("weaviate.connect_to_local") as mock_connect:
|
||||
mock_client = MagicMock()
|
||||
mock_connect.return_value = mock_client
|
||||
yield mock_client
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Parsing Tools Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_pdf_bytes() -> bytes:
|
||||
"""
|
||||
Provide minimal valid PDF bytes for testing.
|
||||
|
||||
Returns:
|
||||
Bytes representing a minimal valid PDF file.
|
||||
"""
|
||||
# Minimal valid PDF structure
|
||||
return b"""%PDF-1.4
|
||||
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
|
||||
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
|
||||
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj
|
||||
xref
|
||||
0 4
|
||||
0000000000 65535 f
|
||||
0000000009 00000 n
|
||||
0000000058 00000 n
|
||||
0000000115 00000 n
|
||||
trailer << /Size 4 /Root 1 0 R >>
|
||||
startxref
|
||||
193
|
||||
%%EOF"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def successful_pipeline_result() -> Dict[str, Any]:
|
||||
"""
|
||||
Provide a successful pipeline result for mocking.
|
||||
|
||||
Returns:
|
||||
Dictionary mimicking a successful process_pdf result.
|
||||
"""
|
||||
return {
|
||||
"success": True,
|
||||
"document_name": "test-document",
|
||||
"source_id": "test-document",
|
||||
"pages": 10,
|
||||
"chunks_count": 25,
|
||||
"cost_ocr": 0.03,
|
||||
"cost_llm": 0.05,
|
||||
"cost_total": 0.08,
|
||||
"output_dir": Path("output/test-document"),
|
||||
"metadata": {
|
||||
"title": "Test Document Title",
|
||||
"author": "Test Author",
|
||||
"language": "en",
|
||||
"year": 2023,
|
||||
},
|
||||
"error": None,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def failed_pipeline_result() -> Dict[str, Any]:
|
||||
"""
|
||||
Provide a failed pipeline result for mocking.
|
||||
|
||||
Returns:
|
||||
Dictionary mimicking a failed process_pdf result.
|
||||
"""
|
||||
return {
|
||||
"success": False,
|
||||
"document_name": "failed-document",
|
||||
"source_id": "failed-document",
|
||||
"pages": 0,
|
||||
"chunks_count": 0,
|
||||
"cost_ocr": 0.0,
|
||||
"cost_llm": 0.0,
|
||||
"cost_total": 0.0,
|
||||
"output_dir": "",
|
||||
"metadata": {},
|
||||
"error": "OCR processing failed: Invalid PDF structure",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_process_pdf() -> Generator[MagicMock, None, None]:
|
||||
"""
|
||||
Provide a mocked process_pdf function.
|
||||
|
||||
Yields:
|
||||
MagicMock for utils.pdf_pipeline.process_pdf.
|
||||
"""
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock:
|
||||
yield mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_process_pdf_bytes() -> Generator[MagicMock, None, None]:
|
||||
"""
|
||||
Provide a mocked process_pdf_bytes function.
|
||||
|
||||
Yields:
|
||||
MagicMock for utils.pdf_pipeline.process_pdf_bytes.
|
||||
"""
|
||||
with patch("mcp_tools.parsing_tools.process_pdf_bytes") as mock:
|
||||
yield mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_download_pdf() -> Generator[AsyncMock, None, None]:
|
||||
"""
|
||||
Provide a mocked download_pdf function.
|
||||
|
||||
Yields:
|
||||
AsyncMock for mcp_tools.parsing_tools.download_pdf.
|
||||
"""
|
||||
with patch("mcp_tools.parsing_tools.download_pdf", new_callable=AsyncMock) as mock:
|
||||
yield mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_pdf_file(tmp_path: Path, sample_pdf_bytes: bytes) -> Path:
|
||||
"""
|
||||
Create a temporary PDF file for testing.
|
||||
|
||||
Args:
|
||||
tmp_path: Pytest tmp_path fixture.
|
||||
sample_pdf_bytes: Sample PDF content.
|
||||
|
||||
Returns:
|
||||
Path to the temporary PDF file.
|
||||
"""
|
||||
pdf_path = tmp_path / "test_document.pdf"
|
||||
pdf_path.write_bytes(sample_pdf_bytes)
|
||||
return pdf_path
|
||||
133
generations/library_rag/tests/mcp/test_config.py
Normal file
133
generations/library_rag/tests/mcp/test_config.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Unit tests for MCP configuration management.
|
||||
|
||||
Tests the MCPConfig class for proper loading, validation, and defaults.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from mcp_config import MCPConfig
|
||||
|
||||
|
||||
class TestMCPConfigFromEnv:
|
||||
"""Test MCPConfig.from_env() method."""
|
||||
|
||||
def test_loads_with_required_key(self) -> None:
|
||||
"""Test config loads when MISTRAL_API_KEY is present."""
|
||||
with patch.dict(os.environ, {"MISTRAL_API_KEY": "test-key-123"}, clear=True):
|
||||
config = MCPConfig.from_env()
|
||||
assert config.mistral_api_key == "test-key-123"
|
||||
|
||||
def test_raises_without_api_key(self) -> None:
|
||||
"""Test ValueError is raised when MISTRAL_API_KEY is missing."""
|
||||
with patch("mcp_config.load_dotenv"): # Prevent reading .env file
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
MCPConfig.from_env()
|
||||
assert "MISTRAL_API_KEY" in str(exc_info.value)
|
||||
|
||||
def test_default_values_applied(self) -> None:
|
||||
"""Test all default values are applied correctly."""
|
||||
with patch.dict(os.environ, {"MISTRAL_API_KEY": "test-key"}, clear=True):
|
||||
config = MCPConfig.from_env()
|
||||
|
||||
# Check all defaults
|
||||
assert config.ollama_base_url == "http://localhost:11434"
|
||||
assert config.structure_llm_model == "deepseek-r1:14b"
|
||||
assert config.structure_llm_temperature == 0.2
|
||||
assert config.default_llm_provider == "ollama"
|
||||
assert config.weaviate_host == "localhost"
|
||||
assert config.weaviate_port == 8080
|
||||
assert config.log_level == "INFO"
|
||||
assert config.output_dir == Path("output")
|
||||
|
||||
def test_custom_values_loaded(self) -> None:
|
||||
"""Test custom environment values are loaded correctly."""
|
||||
custom_env = {
|
||||
"MISTRAL_API_KEY": "custom-key",
|
||||
"OLLAMA_BASE_URL": "http://custom:1234",
|
||||
"STRUCTURE_LLM_MODEL": "custom-model",
|
||||
"STRUCTURE_LLM_TEMPERATURE": "0.7",
|
||||
"DEFAULT_LLM_PROVIDER": "mistral",
|
||||
"WEAVIATE_HOST": "weaviate.example.com",
|
||||
"WEAVIATE_PORT": "9999",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"OUTPUT_DIR": "/custom/output",
|
||||
}
|
||||
with patch.dict(os.environ, custom_env, clear=True):
|
||||
config = MCPConfig.from_env()
|
||||
|
||||
assert config.mistral_api_key == "custom-key"
|
||||
assert config.ollama_base_url == "http://custom:1234"
|
||||
assert config.structure_llm_model == "custom-model"
|
||||
assert config.structure_llm_temperature == 0.7
|
||||
assert config.default_llm_provider == "mistral"
|
||||
assert config.weaviate_host == "weaviate.example.com"
|
||||
assert config.weaviate_port == 9999
|
||||
assert config.log_level == "DEBUG"
|
||||
assert config.output_dir == Path("/custom/output")
|
||||
|
||||
|
||||
class TestMCPConfigValidation:
|
||||
"""Test MCPConfig.validate() method."""
|
||||
|
||||
def test_valid_config_passes(self) -> None:
|
||||
"""Test valid configuration passes validation."""
|
||||
config = MCPConfig(
|
||||
mistral_api_key="test-key",
|
||||
default_llm_provider="ollama",
|
||||
log_level="INFO",
|
||||
structure_llm_temperature=0.5,
|
||||
)
|
||||
# Should not raise
|
||||
config.validate()
|
||||
|
||||
def test_invalid_llm_provider_fails(self) -> None:
|
||||
"""Test invalid LLM provider raises ValueError."""
|
||||
config = MCPConfig(
|
||||
mistral_api_key="test-key",
|
||||
default_llm_provider="invalid", # type: ignore
|
||||
)
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
config.validate()
|
||||
assert "Invalid LLM provider" in str(exc_info.value)
|
||||
|
||||
def test_invalid_log_level_fails(self) -> None:
|
||||
"""Test invalid log level raises ValueError."""
|
||||
config = MCPConfig(
|
||||
mistral_api_key="test-key",
|
||||
log_level="INVALID",
|
||||
)
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
config.validate()
|
||||
assert "Invalid log level" in str(exc_info.value)
|
||||
|
||||
def test_invalid_temperature_fails(self) -> None:
|
||||
"""Test temperature outside 0-2 range raises ValueError."""
|
||||
config = MCPConfig(
|
||||
mistral_api_key="test-key",
|
||||
structure_llm_temperature=2.5,
|
||||
)
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
config.validate()
|
||||
assert "Invalid temperature" in str(exc_info.value)
|
||||
|
||||
|
||||
class TestMCPConfigProperties:
|
||||
"""Test MCPConfig properties."""
|
||||
|
||||
def test_weaviate_url_property(self) -> None:
|
||||
"""Test weaviate_url property returns correct URL."""
|
||||
config = MCPConfig(
|
||||
mistral_api_key="test-key",
|
||||
weaviate_host="my-host",
|
||||
weaviate_port=9090,
|
||||
)
|
||||
assert config.weaviate_url == "http://my-host:9090"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
673
generations/library_rag/tests/mcp/test_parsing_tools.py
Normal file
673
generations/library_rag/tests/mcp/test_parsing_tools.py
Normal file
@@ -0,0 +1,673 @@
|
||||
"""
|
||||
Unit tests for MCP parsing tools.
|
||||
|
||||
Tests the parse_pdf tool handler with mocked dependencies to ensure:
|
||||
- Local file processing works correctly
|
||||
- URL-based PDF downloads work correctly
|
||||
- Error handling is comprehensive
|
||||
- Fixed parameters are used correctly
|
||||
- Cost tracking is accurate
|
||||
|
||||
Uses asyncio for async test support.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from mcp_tools.parsing_tools import (
|
||||
FIXED_LLM_MODEL,
|
||||
FIXED_LLM_PROVIDER,
|
||||
FIXED_USE_LLM,
|
||||
FIXED_USE_OCR_ANNOTATIONS,
|
||||
FIXED_USE_SEMANTIC_CHUNKING,
|
||||
download_pdf,
|
||||
extract_filename_from_url,
|
||||
is_url,
|
||||
parse_pdf_handler,
|
||||
)
|
||||
from mcp_tools.schemas import ParsePdfInput, ParsePdfOutput
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test is_url Helper Function
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestIsUrl:
|
||||
"""Tests for the is_url helper function."""
|
||||
|
||||
def test_https_url(self) -> None:
|
||||
"""Test that HTTPS URLs are recognized."""
|
||||
assert is_url("https://example.com/document.pdf") is True
|
||||
|
||||
def test_http_url(self) -> None:
|
||||
"""Test that HTTP URLs are recognized."""
|
||||
assert is_url("http://example.com/document.pdf") is True
|
||||
|
||||
def test_local_path_unix(self) -> None:
|
||||
"""Test that Unix local paths are not recognized as URLs."""
|
||||
assert is_url("/path/to/document.pdf") is False
|
||||
|
||||
def test_local_path_windows(self) -> None:
|
||||
"""Test that Windows local paths are not recognized as URLs."""
|
||||
assert is_url("C:\\Documents\\document.pdf") is False
|
||||
|
||||
def test_relative_path(self) -> None:
|
||||
"""Test that relative paths are not recognized as URLs."""
|
||||
assert is_url("./documents/document.pdf") is False
|
||||
|
||||
def test_ftp_url_not_supported(self) -> None:
|
||||
"""Test that FTP URLs are not recognized (only HTTP/HTTPS supported)."""
|
||||
assert is_url("ftp://example.com/document.pdf") is False
|
||||
|
||||
def test_empty_string(self) -> None:
|
||||
"""Test that empty strings are not recognized as URLs."""
|
||||
assert is_url("") is False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test extract_filename_from_url Helper Function
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestExtractFilenameFromUrl:
|
||||
"""Tests for the extract_filename_from_url helper function."""
|
||||
|
||||
def test_url_with_pdf_filename(self) -> None:
|
||||
"""Test extraction when URL has a .pdf filename."""
|
||||
result = extract_filename_from_url("https://example.com/docs/aristotle.pdf")
|
||||
assert result == "aristotle.pdf"
|
||||
|
||||
def test_url_with_filename_no_extension(self) -> None:
|
||||
"""Test extraction when URL has a filename without extension."""
|
||||
result = extract_filename_from_url("https://example.com/docs/aristotle")
|
||||
assert result == "aristotle.pdf"
|
||||
|
||||
def test_url_without_path(self) -> None:
|
||||
"""Test extraction when URL has no path."""
|
||||
result = extract_filename_from_url("https://example.com/")
|
||||
assert result == "downloaded.pdf"
|
||||
|
||||
def test_url_with_api_endpoint(self) -> None:
|
||||
"""Test extraction when URL is an API endpoint."""
|
||||
result = extract_filename_from_url("https://api.example.com/download")
|
||||
assert result == "download.pdf"
|
||||
|
||||
def test_url_with_query_params(self) -> None:
|
||||
"""Test extraction when URL has query parameters."""
|
||||
result = extract_filename_from_url(
|
||||
"https://example.com/docs/kant.pdf?token=abc"
|
||||
)
|
||||
assert result == "kant.pdf"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test download_pdf Function
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestDownloadPdf:
|
||||
"""Tests for the download_pdf async function."""
|
||||
|
||||
def test_successful_download(self) -> None:
|
||||
"""Test successful PDF download from URL."""
|
||||
|
||||
async def run_test() -> None:
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = b"%PDF-1.4 test content"
|
||||
mock_response.headers = {"content-type": "application/pdf"}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.httpx.AsyncClient"
|
||||
) as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(
|
||||
return_value=mock_client
|
||||
)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
result = await download_pdf("https://example.com/document.pdf")
|
||||
|
||||
assert result == b"%PDF-1.4 test content"
|
||||
mock_client.get.assert_called_once_with(
|
||||
"https://example.com/document.pdf"
|
||||
)
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_download_with_non_pdf_content_type(self) -> None:
|
||||
"""Test download proceeds with warning when content-type is not PDF."""
|
||||
|
||||
async def run_test() -> None:
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = b"%PDF-1.4 test content"
|
||||
mock_response.headers = {"content-type": "application/octet-stream"}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.httpx.AsyncClient"
|
||||
) as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(
|
||||
return_value=mock_client
|
||||
)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
# Should still succeed, just logs a warning
|
||||
result = await download_pdf("https://example.com/document.pdf")
|
||||
assert result == b"%PDF-1.4 test content"
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_download_http_error(self) -> None:
|
||||
"""Test that HTTP errors are propagated."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.httpx.AsyncClient"
|
||||
) as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(
|
||||
side_effect=httpx.HTTPStatusError(
|
||||
"Not Found",
|
||||
request=MagicMock(),
|
||||
response=MagicMock(status_code=404),
|
||||
)
|
||||
)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(
|
||||
return_value=mock_client
|
||||
)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with pytest.raises(httpx.HTTPStatusError):
|
||||
await download_pdf("https://example.com/nonexistent.pdf")
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test parse_pdf_handler - Local Files
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestParsePdfHandlerLocalFile:
|
||||
"""Tests for parse_pdf_handler with local file paths."""
|
||||
|
||||
def test_successful_local_file_processing(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
successful_pipeline_result: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Test successful processing of a local PDF file."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.return_value = successful_pipeline_result
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.success is True
|
||||
assert result.document_name == "test-document"
|
||||
assert result.pages == 10
|
||||
assert result.chunks_count == 25
|
||||
assert result.cost_ocr == 0.03
|
||||
assert result.cost_llm == 0.05
|
||||
assert result.cost_total == 0.08
|
||||
assert result.metadata["title"] == "Test Document Title"
|
||||
assert result.error is None
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_local_file_uses_fixed_parameters(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
successful_pipeline_result: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Test that local file processing uses the fixed optimal parameters."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.return_value = successful_pipeline_result
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
await parse_pdf_handler(input_data)
|
||||
|
||||
# Verify fixed parameters are passed
|
||||
mock_process_pdf.assert_called_once()
|
||||
call_kwargs = mock_process_pdf.call_args.kwargs
|
||||
|
||||
assert call_kwargs["use_llm"] == FIXED_USE_LLM
|
||||
assert call_kwargs["llm_provider"] == FIXED_LLM_PROVIDER
|
||||
assert call_kwargs["llm_model"] == FIXED_LLM_MODEL
|
||||
assert call_kwargs["use_semantic_chunking"] == FIXED_USE_SEMANTIC_CHUNKING
|
||||
assert call_kwargs["use_ocr_annotations"] == FIXED_USE_OCR_ANNOTATIONS
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_file_not_found_error(self) -> None:
|
||||
"""Test error handling when local file does not exist."""
|
||||
|
||||
async def run_test() -> None:
|
||||
input_data = ParsePdfInput(pdf_path="/nonexistent/path/document.pdf")
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.success is False
|
||||
assert "not found" in result.error.lower()
|
||||
assert result.pages == 0
|
||||
assert result.chunks_count == 0
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_pipeline_failure(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
failed_pipeline_result: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Test handling when the pipeline returns a failure."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.return_value = failed_pipeline_result
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.success is False
|
||||
assert "OCR processing failed" in result.error
|
||||
assert result.pages == 0
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_pipeline_exception(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
) -> None:
|
||||
"""Test handling when the pipeline raises an exception."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.side_effect = RuntimeError("Unexpected error")
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.success is False
|
||||
assert "Processing error" in result.error
|
||||
assert "Unexpected error" in result.error
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test parse_pdf_handler - URL Downloads
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestParsePdfHandlerUrl:
|
||||
"""Tests for parse_pdf_handler with URL inputs."""
|
||||
|
||||
def test_successful_url_processing(
|
||||
self,
|
||||
sample_pdf_bytes: bytes,
|
||||
successful_pipeline_result: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Test successful processing of a PDF from URL."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.download_pdf", new_callable=AsyncMock
|
||||
) as mock_download:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.process_pdf_bytes"
|
||||
) as mock_process:
|
||||
mock_download.return_value = sample_pdf_bytes
|
||||
mock_process.return_value = successful_pipeline_result
|
||||
|
||||
input_data = ParsePdfInput(
|
||||
pdf_path="https://example.com/philosophy/kant.pdf"
|
||||
)
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.success is True
|
||||
assert result.document_name == "test-document"
|
||||
mock_download.assert_called_once_with(
|
||||
"https://example.com/philosophy/kant.pdf"
|
||||
)
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_url_uses_extracted_filename(
|
||||
self,
|
||||
sample_pdf_bytes: bytes,
|
||||
successful_pipeline_result: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Test that filename is extracted from URL for processing."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.download_pdf", new_callable=AsyncMock
|
||||
) as mock_download:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.process_pdf_bytes"
|
||||
) as mock_process:
|
||||
mock_download.return_value = sample_pdf_bytes
|
||||
mock_process.return_value = successful_pipeline_result
|
||||
|
||||
input_data = ParsePdfInput(
|
||||
pdf_path="https://example.com/docs/aristotle-metaphysics.pdf"
|
||||
)
|
||||
await parse_pdf_handler(input_data)
|
||||
|
||||
# Verify filename was extracted and passed
|
||||
mock_process.assert_called_once()
|
||||
call_kwargs = mock_process.call_args.kwargs
|
||||
assert call_kwargs["filename"] == "aristotle-metaphysics.pdf"
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_url_uses_fixed_parameters(
|
||||
self,
|
||||
sample_pdf_bytes: bytes,
|
||||
successful_pipeline_result: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Test that URL processing uses the fixed optimal parameters."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.download_pdf", new_callable=AsyncMock
|
||||
) as mock_download:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.process_pdf_bytes"
|
||||
) as mock_process:
|
||||
mock_download.return_value = sample_pdf_bytes
|
||||
mock_process.return_value = successful_pipeline_result
|
||||
|
||||
input_data = ParsePdfInput(
|
||||
pdf_path="https://example.com/document.pdf"
|
||||
)
|
||||
await parse_pdf_handler(input_data)
|
||||
|
||||
call_kwargs = mock_process.call_args.kwargs
|
||||
assert call_kwargs["llm_provider"] == FIXED_LLM_PROVIDER
|
||||
assert call_kwargs["llm_model"] == FIXED_LLM_MODEL
|
||||
assert (
|
||||
call_kwargs["use_semantic_chunking"]
|
||||
== FIXED_USE_SEMANTIC_CHUNKING
|
||||
)
|
||||
assert (
|
||||
call_kwargs["use_ocr_annotations"] == FIXED_USE_OCR_ANNOTATIONS
|
||||
)
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_url_download_http_error(self) -> None:
|
||||
"""Test error handling when URL download fails with HTTP error."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.download_pdf", new_callable=AsyncMock
|
||||
) as mock_download:
|
||||
mock_download.side_effect = httpx.HTTPStatusError(
|
||||
"Not Found",
|
||||
request=MagicMock(),
|
||||
response=MagicMock(status_code=404),
|
||||
)
|
||||
|
||||
input_data = ParsePdfInput(
|
||||
pdf_path="https://example.com/nonexistent.pdf"
|
||||
)
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.success is False
|
||||
assert "Failed to download PDF" in result.error
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_url_download_network_error(self) -> None:
|
||||
"""Test error handling when URL download fails with network error."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch(
|
||||
"mcp_tools.parsing_tools.download_pdf", new_callable=AsyncMock
|
||||
) as mock_download:
|
||||
mock_download.side_effect = httpx.ConnectError("Connection refused")
|
||||
|
||||
input_data = ParsePdfInput(
|
||||
pdf_path="https://example.com/document.pdf"
|
||||
)
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.success is False
|
||||
assert "Failed to download PDF" in result.error
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Cost Tracking
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestCostTracking:
|
||||
"""Tests for cost tracking in parse_pdf output."""
|
||||
|
||||
def test_costs_are_tracked_correctly(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
) -> None:
|
||||
"""Test that OCR and LLM costs are correctly tracked."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.return_value = {
|
||||
"success": True,
|
||||
"document_name": "test-doc",
|
||||
"source_id": "test-doc",
|
||||
"pages": 50,
|
||||
"chunks_count": 100,
|
||||
"cost_ocr": 0.15, # 50 pages * 0.003€
|
||||
"cost_llm": 0.25,
|
||||
"cost_total": 0.40,
|
||||
"output_dir": Path("output/test-doc"),
|
||||
"metadata": {},
|
||||
"error": None,
|
||||
}
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.cost_ocr == 0.15
|
||||
assert result.cost_llm == 0.25
|
||||
assert result.cost_total == 0.40
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_cost_total_calculated_when_missing(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
) -> None:
|
||||
"""Test that cost_total is calculated if not provided."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.return_value = {
|
||||
"success": True,
|
||||
"document_name": "test-doc",
|
||||
"source_id": "test-doc",
|
||||
"pages": 10,
|
||||
"chunks_count": 20,
|
||||
"cost_ocr": 0.03,
|
||||
"cost_llm": 0.05,
|
||||
# cost_total intentionally missing
|
||||
"output_dir": Path("output/test-doc"),
|
||||
"metadata": {},
|
||||
"error": None,
|
||||
}
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.cost_total == 0.08 # 0.03 + 0.05
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_zero_costs_on_failure(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
) -> None:
|
||||
"""Test that costs are zero when processing fails early."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.side_effect = RuntimeError("Early failure")
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.success is False
|
||||
assert result.cost_ocr == 0.0
|
||||
assert result.cost_llm == 0.0
|
||||
assert result.cost_total == 0.0
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Metadata Handling
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestMetadataHandling:
|
||||
"""Tests for metadata extraction and handling."""
|
||||
|
||||
def test_metadata_extracted_correctly(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
) -> None:
|
||||
"""Test that metadata is correctly passed through."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.return_value = {
|
||||
"success": True,
|
||||
"document_name": "platon-menon",
|
||||
"source_id": "platon-menon",
|
||||
"pages": 80,
|
||||
"chunks_count": 150,
|
||||
"cost_ocr": 0.24,
|
||||
"cost_llm": 0.30,
|
||||
"cost_total": 0.54,
|
||||
"output_dir": Path("output/platon-menon"),
|
||||
"metadata": {
|
||||
"title": "Ménon",
|
||||
"author": "Platon",
|
||||
"language": "fr",
|
||||
"year": -380,
|
||||
"genre": "dialogue",
|
||||
},
|
||||
"error": None,
|
||||
}
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.metadata["title"] == "Ménon"
|
||||
assert result.metadata["author"] == "Platon"
|
||||
assert result.metadata["language"] == "fr"
|
||||
assert result.metadata["year"] == -380
|
||||
assert result.metadata["genre"] == "dialogue"
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_empty_metadata_handled(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
) -> None:
|
||||
"""Test that empty/None metadata is handled gracefully."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.return_value = {
|
||||
"success": True,
|
||||
"document_name": "test-doc",
|
||||
"source_id": "test-doc",
|
||||
"pages": 10,
|
||||
"chunks_count": 20,
|
||||
"cost_ocr": 0.03,
|
||||
"cost_llm": 0.05,
|
||||
"cost_total": 0.08,
|
||||
"output_dir": Path("output/test-doc"),
|
||||
"metadata": None, # Explicitly None
|
||||
"error": None,
|
||||
}
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert result.metadata == {}
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Output Schema Validation
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestOutputSchemaValidation:
|
||||
"""Tests for ParsePdfOutput schema compliance."""
|
||||
|
||||
def test_output_is_valid_schema(
|
||||
self,
|
||||
temp_pdf_file: Path,
|
||||
successful_pipeline_result: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Test that output conforms to ParsePdfOutput schema."""
|
||||
|
||||
async def run_test() -> None:
|
||||
with patch("mcp_tools.parsing_tools.process_pdf") as mock_process_pdf:
|
||||
mock_process_pdf.return_value = successful_pipeline_result
|
||||
|
||||
input_data = ParsePdfInput(pdf_path=str(temp_pdf_file))
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
# Verify it's the correct type
|
||||
assert isinstance(result, ParsePdfOutput)
|
||||
|
||||
# Verify all required fields are present
|
||||
assert hasattr(result, "success")
|
||||
assert hasattr(result, "document_name")
|
||||
assert hasattr(result, "source_id")
|
||||
assert hasattr(result, "pages")
|
||||
assert hasattr(result, "chunks_count")
|
||||
assert hasattr(result, "cost_ocr")
|
||||
assert hasattr(result, "cost_llm")
|
||||
assert hasattr(result, "cost_total")
|
||||
assert hasattr(result, "output_dir")
|
||||
assert hasattr(result, "metadata")
|
||||
assert hasattr(result, "error")
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_error_output_is_valid_schema(self) -> None:
|
||||
"""Test that error output conforms to ParsePdfOutput schema."""
|
||||
|
||||
async def run_test() -> None:
|
||||
input_data = ParsePdfInput(pdf_path="/nonexistent/file.pdf")
|
||||
result = await parse_pdf_handler(input_data)
|
||||
|
||||
assert isinstance(result, ParsePdfOutput)
|
||||
assert result.success is False
|
||||
assert result.error is not None
|
||||
assert isinstance(result.error, str)
|
||||
|
||||
asyncio.run(run_test())
|
||||
1336
generations/library_rag/tests/mcp/test_retrieval_tools.py
Normal file
1336
generations/library_rag/tests/mcp/test_retrieval_tools.py
Normal file
File diff suppressed because it is too large
Load Diff
256
generations/library_rag/tests/mcp/test_schemas.py
Normal file
256
generations/library_rag/tests/mcp/test_schemas.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
Unit tests for MCP Pydantic schemas.
|
||||
|
||||
Tests schema validation, field constraints, and JSON schema generation.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from mcp_tools.schemas import (
|
||||
ParsePdfInput,
|
||||
ParsePdfOutput,
|
||||
SearchChunksInput,
|
||||
SearchChunksOutput,
|
||||
SearchSummariesInput,
|
||||
GetDocumentInput,
|
||||
ListDocumentsInput,
|
||||
GetChunksByDocumentInput,
|
||||
FilterByAuthorInput,
|
||||
DeleteDocumentInput,
|
||||
ChunkResult,
|
||||
DocumentInfo,
|
||||
)
|
||||
|
||||
|
||||
class TestParsePdfInput:
|
||||
"""Test ParsePdfInput schema validation."""
|
||||
|
||||
def test_valid_path(self) -> None:
|
||||
"""Test valid PDF path is accepted."""
|
||||
input_data = ParsePdfInput(pdf_path="/path/to/document.pdf")
|
||||
assert input_data.pdf_path == "/path/to/document.pdf"
|
||||
|
||||
def test_valid_url(self) -> None:
|
||||
"""Test valid URL is accepted."""
|
||||
input_data = ParsePdfInput(pdf_path="https://example.com/doc.pdf")
|
||||
assert input_data.pdf_path == "https://example.com/doc.pdf"
|
||||
|
||||
def test_empty_path_rejected(self) -> None:
|
||||
"""Test empty path raises validation error."""
|
||||
with pytest.raises(ValidationError) as exc_info:
|
||||
ParsePdfInput(pdf_path="")
|
||||
assert "string_too_short" in str(exc_info.value).lower()
|
||||
|
||||
|
||||
class TestParsePdfOutput:
|
||||
"""Test ParsePdfOutput schema."""
|
||||
|
||||
def test_full_output(self) -> None:
|
||||
"""Test creating complete output."""
|
||||
output = ParsePdfOutput(
|
||||
success=True,
|
||||
document_name="test-doc",
|
||||
source_id="test-doc-v1",
|
||||
pages=10,
|
||||
chunks_count=25,
|
||||
cost_ocr=0.03,
|
||||
cost_llm=0.01,
|
||||
cost_total=0.04,
|
||||
output_dir="/output/test-doc",
|
||||
metadata={"title": "Test", "author": "Unknown"},
|
||||
)
|
||||
assert output.success is True
|
||||
assert output.cost_total == 0.04
|
||||
assert output.metadata["title"] == "Test"
|
||||
|
||||
def test_output_with_error(self) -> None:
|
||||
"""Test output with error field set."""
|
||||
output = ParsePdfOutput(
|
||||
success=False,
|
||||
document_name="failed-doc",
|
||||
source_id="",
|
||||
pages=0,
|
||||
chunks_count=0,
|
||||
cost_ocr=0.0,
|
||||
cost_llm=0.0,
|
||||
cost_total=0.0,
|
||||
output_dir="",
|
||||
error="PDF processing failed: corrupted file",
|
||||
)
|
||||
assert output.success is False
|
||||
assert "corrupted" in output.error # type: ignore
|
||||
|
||||
|
||||
class TestSearchChunksInput:
|
||||
"""Test SearchChunksInput schema validation."""
|
||||
|
||||
def test_minimal_input(self) -> None:
|
||||
"""Test minimal valid input."""
|
||||
input_data = SearchChunksInput(query="test query")
|
||||
assert input_data.query == "test query"
|
||||
assert input_data.limit == 10 # default
|
||||
assert input_data.min_similarity == 0.0 # default
|
||||
|
||||
def test_full_input(self) -> None:
|
||||
"""Test input with all fields."""
|
||||
input_data = SearchChunksInput(
|
||||
query="What is justice?",
|
||||
limit=20,
|
||||
min_similarity=0.5,
|
||||
author_filter="Platon",
|
||||
work_filter="Republic",
|
||||
language_filter="fr",
|
||||
)
|
||||
assert input_data.limit == 20
|
||||
assert input_data.author_filter == "Platon"
|
||||
|
||||
def test_empty_query_rejected(self) -> None:
|
||||
"""Test empty query raises error."""
|
||||
with pytest.raises(ValidationError):
|
||||
SearchChunksInput(query="")
|
||||
|
||||
def test_query_too_long_rejected(self) -> None:
|
||||
"""Test query over 1000 chars is rejected."""
|
||||
with pytest.raises(ValidationError):
|
||||
SearchChunksInput(query="a" * 1001)
|
||||
|
||||
def test_limit_bounds(self) -> None:
|
||||
"""Test limit validation bounds."""
|
||||
with pytest.raises(ValidationError):
|
||||
SearchChunksInput(query="test", limit=0)
|
||||
with pytest.raises(ValidationError):
|
||||
SearchChunksInput(query="test", limit=101)
|
||||
|
||||
def test_similarity_bounds(self) -> None:
|
||||
"""Test similarity validation bounds."""
|
||||
with pytest.raises(ValidationError):
|
||||
SearchChunksInput(query="test", min_similarity=-0.1)
|
||||
with pytest.raises(ValidationError):
|
||||
SearchChunksInput(query="test", min_similarity=1.1)
|
||||
|
||||
|
||||
class TestSearchSummariesInput:
|
||||
"""Test SearchSummariesInput schema validation."""
|
||||
|
||||
def test_level_filters(self) -> None:
|
||||
"""Test min/max level filters."""
|
||||
input_data = SearchSummariesInput(
|
||||
query="test",
|
||||
min_level=1,
|
||||
max_level=3,
|
||||
)
|
||||
assert input_data.min_level == 1
|
||||
assert input_data.max_level == 3
|
||||
|
||||
def test_level_bounds(self) -> None:
|
||||
"""Test level validation bounds."""
|
||||
with pytest.raises(ValidationError):
|
||||
SearchSummariesInput(query="test", min_level=0)
|
||||
with pytest.raises(ValidationError):
|
||||
SearchSummariesInput(query="test", max_level=6)
|
||||
|
||||
|
||||
class TestGetDocumentInput:
|
||||
"""Test GetDocumentInput schema validation."""
|
||||
|
||||
def test_defaults(self) -> None:
|
||||
"""Test default values."""
|
||||
input_data = GetDocumentInput(source_id="doc-123")
|
||||
assert input_data.include_chunks is False
|
||||
assert input_data.chunk_limit == 50
|
||||
|
||||
def test_with_chunks(self) -> None:
|
||||
"""Test requesting chunks."""
|
||||
input_data = GetDocumentInput(
|
||||
source_id="doc-123",
|
||||
include_chunks=True,
|
||||
chunk_limit=100,
|
||||
)
|
||||
assert input_data.include_chunks is True
|
||||
assert input_data.chunk_limit == 100
|
||||
|
||||
|
||||
class TestDeleteDocumentInput:
|
||||
"""Test DeleteDocumentInput schema validation."""
|
||||
|
||||
def test_requires_confirmation(self) -> None:
|
||||
"""Test confirm defaults to False."""
|
||||
input_data = DeleteDocumentInput(source_id="doc-to-delete")
|
||||
assert input_data.confirm is False
|
||||
|
||||
def test_with_confirmation(self) -> None:
|
||||
"""Test explicit confirmation."""
|
||||
input_data = DeleteDocumentInput(
|
||||
source_id="doc-to-delete",
|
||||
confirm=True,
|
||||
)
|
||||
assert input_data.confirm is True
|
||||
|
||||
|
||||
class TestChunkResult:
|
||||
"""Test ChunkResult model."""
|
||||
|
||||
def test_full_chunk(self) -> None:
|
||||
"""Test creating full chunk result."""
|
||||
chunk = ChunkResult(
|
||||
text="This is the chunk content.",
|
||||
similarity=0.85,
|
||||
section_path="Chapter 1 > Section 1",
|
||||
chapter_title="Introduction",
|
||||
work_title="The Republic",
|
||||
work_author="Platon",
|
||||
order_index=5,
|
||||
)
|
||||
assert chunk.similarity == 0.85
|
||||
assert chunk.order_index == 5
|
||||
|
||||
|
||||
class TestDocumentInfo:
|
||||
"""Test DocumentInfo model."""
|
||||
|
||||
def test_with_optional_fields(self) -> None:
|
||||
"""Test DocumentInfo with all fields."""
|
||||
doc = DocumentInfo(
|
||||
source_id="platon-republic",
|
||||
work_title="The Republic",
|
||||
work_author="Platon",
|
||||
edition="GF Flammarion",
|
||||
pages=500,
|
||||
language="fr",
|
||||
toc={"chapters": ["I", "II", "III"]},
|
||||
hierarchy={"level": 1},
|
||||
)
|
||||
assert doc.toc is not None
|
||||
assert doc.hierarchy is not None
|
||||
|
||||
|
||||
class TestJsonSchemaGeneration:
|
||||
"""Test JSON schema generation from Pydantic models."""
|
||||
|
||||
def test_schemas_have_descriptions(self) -> None:
|
||||
"""Test all fields have descriptions for JSON schema."""
|
||||
schema = SearchChunksInput.model_json_schema()
|
||||
|
||||
# Check field descriptions exist
|
||||
properties = schema["properties"]
|
||||
assert "description" in properties["query"]
|
||||
assert "description" in properties["limit"]
|
||||
assert "description" in properties["min_similarity"]
|
||||
|
||||
def test_schema_includes_constraints(self) -> None:
|
||||
"""Test validation constraints are in JSON schema."""
|
||||
schema = SearchChunksInput.model_json_schema()
|
||||
props = schema["properties"]
|
||||
|
||||
# Check minLength constraint
|
||||
assert props["query"].get("minLength") == 1
|
||||
assert props["query"].get("maxLength") == 1000
|
||||
|
||||
# Check numeric constraints
|
||||
assert props["limit"].get("minimum") == 1
|
||||
assert props["limit"].get("maximum") == 100
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user