Files
David Blanc Brioir 21f5676c7b Add ikario_processual with David profile and embedding script
- david_profile_declared.json: David's declared profile values from questionnaire
- scripts/embed_david.py: Python script to generate embeddings using BGE-M3 model
- questionnaire_david.md: Questionnaire template for profile values

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-31 16:57:03 +01:00

286 lines
8.0 KiB
Python

#!/usr/bin/env python3
"""
Script de vérification de la Phase 0.
Vérifie que tous les prérequis sont en place:
1. Weaviate est accessible
2. Les collections existent
3. Le backup fonctionne
4. La restauration (dry-run) fonctionne
Usage:
python verify_phase0.py
"""
import os
import sys
import tempfile
from pathlib import Path
import requests
# Configuration
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
# Couleurs pour l'output (désactivées sur Windows si problème encodage)
import platform
if platform.system() == "Windows":
GREEN = ""
RED = ""
YELLOW = ""
RESET = ""
CHECK = "[OK]"
CROSS = "[FAIL]"
WARN = "[WARN]"
else:
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
RESET = "\033[0m"
CHECK = "\u2713"
CROSS = "\u2717"
WARN = "\u26A0"
def print_ok(msg: str):
print(f" {GREEN}{CHECK}{RESET} {msg}")
def print_fail(msg: str):
print(f" {RED}{CROSS}{RESET} {msg}")
def print_warn(msg: str):
print(f" {YELLOW}{WARN}{RESET} {msg}")
def check_weaviate_connection() -> bool:
"""Vérifie la connexion à Weaviate."""
print("\n[1/5] Connexion Weaviate...")
try:
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
if response.status_code == 200:
print_ok(f"Weaviate accessible sur {WEAVIATE_URL}")
return True
else:
print_fail(f"Weaviate répond avec status {response.status_code}")
return False
except requests.RequestException as e:
print_fail(f"Impossible de se connecter à Weaviate: {e}")
return False
def check_collections() -> tuple[bool, list[str]]:
"""Vérifie les collections existantes."""
print("\n[2/5] Collections Weaviate...")
try:
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
schema = response.json()
classes = [c["class"] for c in schema.get("classes", [])]
expected = ["Thought", "Conversation", "Message", "Chunk", "Work", "Summary"]
found = [c for c in classes if c in expected]
missing = [c for c in expected if c not in classes]
if found:
print_ok(f"Collections trouvées: {', '.join(found)}")
if missing:
print_warn(f"Collections manquantes: {', '.join(missing)}")
# Compter les objets
for class_name in found:
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class={class_name}&limit=1")
# Note: Pour avoir le count exact, il faudrait utiliser l'API aggregate
objects = response.json().get("objects", [])
if objects:
print_ok(f" {class_name}: contient des objets")
else:
print_warn(f" {class_name}: vide")
return len(found) > 0, found
except Exception as e:
print_fail(f"Erreur lors de la vérification du schéma: {e}")
return False, []
def check_backup_script() -> bool:
"""Vérifie que le script de backup fonctionne."""
print("\n[3/5] Script de backup...")
scripts_dir = Path(__file__).parent
backup_script = scripts_dir / "weaviate_backup.py"
if not backup_script.exists():
print_fail(f"Script non trouvé: {backup_script}")
return False
print_ok("Script weaviate_backup.py présent")
# Tester l'import
try:
sys.path.insert(0, str(scripts_dir))
from weaviate_backup import backup_weaviate, check_weaviate_ready
if check_weaviate_ready():
print_ok("Fonction check_weaviate_ready() fonctionne")
else:
print_fail("check_weaviate_ready() retourne False")
return False
except ImportError as e:
print_fail(f"Erreur d'import: {e}")
return False
# Tester un backup rapide
try:
with tempfile.TemporaryDirectory() as tmpdir:
output_path = Path(tmpdir) / "test_backup.json"
backup_weaviate(
output_path=output_path,
collections=["Thought"],
include_vectors=False
)
if output_path.exists() and output_path.stat().st_size > 0:
print_ok(f"Backup de test créé ({output_path.stat().st_size} bytes)")
return True
else:
print_fail("Backup de test vide ou non créé")
return False
except Exception as e:
print_fail(f"Erreur lors du backup de test: {e}")
return False
def check_restore_script() -> bool:
"""Vérifie que le script de restauration fonctionne."""
print("\n[4/5] Script de restauration...")
scripts_dir = Path(__file__).parent
restore_script = scripts_dir / "weaviate_restore.py"
if not restore_script.exists():
print_fail(f"Script non trouvé: {restore_script}")
return False
print_ok("Script weaviate_restore.py présent")
# Tester l'import
try:
sys.path.insert(0, str(scripts_dir))
from weaviate_restore import restore_weaviate, get_existing_classes
classes = get_existing_classes()
print_ok(f"Fonction get_existing_classes() retourne {len(classes)} classes")
return True
except ImportError as e:
print_fail(f"Erreur d'import: {e}")
return False
def check_directory_structure() -> bool:
"""Vérifie la structure des dossiers."""
print("\n[5/5] Structure des dossiers...")
base_dir = Path(__file__).parent.parent
required_dirs = [
base_dir,
base_dir / "scripts",
base_dir / "tests",
]
optional_dirs = [
base_dir.parent / "exports",
]
all_ok = True
for d in required_dirs:
if d.exists():
print_ok(f"Dossier: {d.relative_to(base_dir.parent)}")
else:
print_fail(f"Dossier manquant: {d.relative_to(base_dir.parent)}")
all_ok = False
for d in optional_dirs:
if d.exists():
print_ok(f"Dossier: {d.relative_to(base_dir.parent)}")
else:
print_warn(f"Dossier optionnel absent: {d.relative_to(base_dir.parent)}")
# Créer le dossier
d.mkdir(parents=True, exist_ok=True)
print_ok(f" → Créé: {d.relative_to(base_dir.parent)}")
return all_ok
def main():
print("=" * 60)
print("VÉRIFICATION PHASE 0 - Préparation et Backup")
print("=" * 60)
results = {}
# 1. Connexion Weaviate
results["weaviate"] = check_weaviate_connection()
if not results["weaviate"]:
print("\n" + "=" * 60)
print(f"{RED}ÉCHEC{RESET}: Weaviate n'est pas accessible.")
print("Assurez-vous que Weaviate tourne:")
print(" docker start weaviate")
print(" # ou")
print(" docker run -d --name weaviate -p 8080:8080 ...")
print("=" * 60)
sys.exit(1)
# 2. Collections
results["collections"], found_collections = check_collections()
# 3. Script backup
results["backup"] = check_backup_script()
# 4. Script restore
results["restore"] = check_restore_script()
# 5. Structure dossiers
results["structure"] = check_directory_structure()
# Résumé
print("\n" + "=" * 60)
print("RÉSUMÉ PHASE 0")
print("=" * 60)
all_passed = all(results.values())
for check, passed in results.items():
status = f"{GREEN}OK{RESET}" if passed else f"{RED}ÉCHEC{RESET}"
print(f" {check}: {status}")
print()
if all_passed:
print(f"{GREEN}{CHECK} PHASE 0 VALIDEE{RESET}")
print("\nProchaines etapes:")
print(" 1. Creer un backup complet:")
print(" python scripts/weaviate_backup.py --output exports/backup_phase0.json")
print(" 2. Creer la branche git:")
print(" git checkout -b feature/processual-v3")
print(" 3. Passer a la Phase 1:")
print(" python scripts/phase1_state_vector.py")
else:
print(f"{RED}{CROSS} PHASE 0 INCOMPLETE{RESET}")
print("\nCorrigez les erreurs ci-dessus avant de continuer.")
print("=" * 60)
sys.exit(0 if all_passed else 1)
if __name__ == "__main__":
main()