- david_profile_declared.json: David's declared profile values from questionnaire - scripts/embed_david.py: Python script to generate embeddings using BGE-M3 model - questionnaire_david.md: Questionnaire template for profile values Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
209 lines
6.9 KiB
Python
209 lines
6.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tests pour la Phase 0: Backup et restauration Weaviate.
|
|
|
|
Usage:
|
|
pytest tests/test_phase0_backup.py -v
|
|
pytest tests/test_phase0_backup.py -v -k test_backup
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
import requests
|
|
|
|
# Configuration
|
|
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
|
|
|
|
|
|
def weaviate_is_available() -> bool:
|
|
"""Vérifie si Weaviate est accessible."""
|
|
try:
|
|
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
|
|
return response.status_code == 200
|
|
except requests.RequestException:
|
|
return False
|
|
|
|
|
|
# Skip tous les tests si Weaviate n'est pas disponible
|
|
pytestmark = pytest.mark.skipif(
|
|
not weaviate_is_available(),
|
|
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
|
|
)
|
|
|
|
|
|
class TestWeaviateConnection:
|
|
"""Tests de connexion à Weaviate."""
|
|
|
|
def test_weaviate_ready(self):
|
|
"""Weaviate doit être accessible."""
|
|
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready")
|
|
assert response.status_code == 200
|
|
|
|
def test_weaviate_schema_accessible(self):
|
|
"""Le schéma doit être récupérable."""
|
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert "classes" in data
|
|
|
|
def test_weaviate_has_collections(self):
|
|
"""Au moins une collection doit exister (Thought, Conversation, etc.)."""
|
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
|
data = response.json()
|
|
classes = [c["class"] for c in data.get("classes", [])]
|
|
|
|
# Au moins une des collections attendues
|
|
expected = ["Thought", "Conversation", "Message", "Chunk", "Work", "Summary"]
|
|
found = [c for c in classes if c in expected]
|
|
|
|
assert len(found) > 0, f"Aucune collection trouvée parmi {expected}. Classes existantes: {classes}"
|
|
|
|
|
|
class TestBackupScript:
|
|
"""Tests du script de backup."""
|
|
|
|
def test_backup_creates_file(self):
|
|
"""Le backup doit créer un fichier JSON."""
|
|
# Import dynamique pour éviter les erreurs si requests manque
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
|
|
|
from weaviate_backup import backup_weaviate
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
output_path = Path(tmpdir) / "test_backup.json"
|
|
|
|
stats = backup_weaviate(
|
|
output_path=output_path,
|
|
collections=None, # Toutes
|
|
include_vectors=False # Plus rapide pour le test
|
|
)
|
|
|
|
assert output_path.exists(), "Le fichier de backup n'a pas été créé"
|
|
assert output_path.stat().st_size > 0, "Le fichier de backup est vide"
|
|
|
|
def test_backup_structure(self):
|
|
"""Le backup doit avoir la bonne structure."""
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
|
|
|
from weaviate_backup import backup_weaviate
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
output_path = Path(tmpdir) / "test_backup.json"
|
|
|
|
backup_weaviate(
|
|
output_path=output_path,
|
|
collections=["Thought"], # Une seule collection pour le test
|
|
include_vectors=False
|
|
)
|
|
|
|
with open(output_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
# Vérifier la structure
|
|
assert "metadata" in data
|
|
assert "schema" in data
|
|
assert "collections" in data
|
|
|
|
# Vérifier les métadonnées
|
|
assert "timestamp" in data["metadata"]
|
|
assert "weaviate_url" in data["metadata"]
|
|
assert "version" in data["metadata"]
|
|
|
|
def test_backup_with_vectors(self):
|
|
"""Le backup avec vecteurs doit inclure les embeddings."""
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
|
|
|
from weaviate_backup import backup_weaviate
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
output_path = Path(tmpdir) / "test_backup_vectors.json"
|
|
|
|
backup_weaviate(
|
|
output_path=output_path,
|
|
collections=["Thought"],
|
|
include_vectors=True
|
|
)
|
|
|
|
with open(output_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
# Vérifier qu'au moins un objet a un vecteur
|
|
thoughts = data.get("collections", {}).get("Thought", [])
|
|
if thoughts:
|
|
# Au moins un objet devrait avoir un vecteur
|
|
has_vector = any("vector" in obj for obj in thoughts)
|
|
assert has_vector, "Aucun objet n'a de vecteur alors que include_vectors=True"
|
|
|
|
|
|
class TestRestoreScript:
|
|
"""Tests du script de restauration."""
|
|
|
|
def test_restore_dry_run(self):
|
|
"""Le dry-run ne doit pas modifier les données."""
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
|
|
|
from weaviate_backup import backup_weaviate
|
|
from weaviate_restore import restore_weaviate, get_existing_classes
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
# D'abord, faire un backup
|
|
backup_path = Path(tmpdir) / "test_backup.json"
|
|
backup_weaviate(
|
|
output_path=backup_path,
|
|
collections=["Thought"],
|
|
include_vectors=False
|
|
)
|
|
|
|
# Compter les objets avant
|
|
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class=Thought&limit=1")
|
|
count_before = len(response.json().get("objects", []))
|
|
|
|
# Restaurer en dry-run
|
|
stats = restore_weaviate(
|
|
backup_path=backup_path,
|
|
collections=["Thought"],
|
|
clear_existing=False,
|
|
dry_run=True
|
|
)
|
|
|
|
# Compter après
|
|
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class=Thought&limit=1")
|
|
count_after = len(response.json().get("objects", []))
|
|
|
|
# Pas de changement
|
|
assert count_before == count_after, "Le dry-run a modifié les données!"
|
|
|
|
|
|
class TestBackupRestoreCycle:
|
|
"""Tests du cycle complet backup → restore."""
|
|
|
|
def test_backup_restore_roundtrip(self):
|
|
"""
|
|
Test complet: backup → restore → vérification.
|
|
|
|
Ce test utilise une collection temporaire pour ne pas
|
|
affecter les données existantes.
|
|
"""
|
|
# Ce test nécessiterait de créer une collection temporaire
|
|
# Pour l'instant, on vérifie juste que les scripts fonctionnent
|
|
pass
|
|
|
|
|
|
def test_exports_directory_exists():
|
|
"""Le dossier exports doit exister ou être créable."""
|
|
exports_dir = Path(__file__).parent.parent.parent / "exports"
|
|
exports_dir.mkdir(parents=True, exist_ok=True)
|
|
assert exports_dir.exists()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|