Add ikario_processual with David profile and embedding script

- david_profile_declared.json: David's declared profile values from questionnaire
- scripts/embed_david.py: Python script to generate embeddings using BGE-M3 model
- questionnaire_david.md: Questionnaire template for profile values

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-31 16:56:41 +01:00
parent 9e657cbf29
commit 21f5676c7b
18 changed files with 5463 additions and 0 deletions

View File

@@ -0,0 +1 @@
# Tests pour ikario_processual

View File

@@ -0,0 +1,208 @@
#!/usr/bin/env python3
"""
Tests pour la Phase 0: Backup et restauration Weaviate.
Usage:
pytest tests/test_phase0_backup.py -v
pytest tests/test_phase0_backup.py -v -k test_backup
"""
import json
import os
import tempfile
from pathlib import Path
import pytest
import requests
# Configuration
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
def weaviate_is_available() -> bool:
"""Vérifie si Weaviate est accessible."""
try:
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
return response.status_code == 200
except requests.RequestException:
return False
# Skip tous les tests si Weaviate n'est pas disponible
pytestmark = pytest.mark.skipif(
not weaviate_is_available(),
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
)
class TestWeaviateConnection:
"""Tests de connexion à Weaviate."""
def test_weaviate_ready(self):
"""Weaviate doit être accessible."""
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready")
assert response.status_code == 200
def test_weaviate_schema_accessible(self):
"""Le schéma doit être récupérable."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
assert response.status_code == 200
data = response.json()
assert "classes" in data
def test_weaviate_has_collections(self):
"""Au moins une collection doit exister (Thought, Conversation, etc.)."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
data = response.json()
classes = [c["class"] for c in data.get("classes", [])]
# Au moins une des collections attendues
expected = ["Thought", "Conversation", "Message", "Chunk", "Work", "Summary"]
found = [c for c in classes if c in expected]
assert len(found) > 0, f"Aucune collection trouvée parmi {expected}. Classes existantes: {classes}"
class TestBackupScript:
"""Tests du script de backup."""
def test_backup_creates_file(self):
"""Le backup doit créer un fichier JSON."""
# Import dynamique pour éviter les erreurs si requests manque
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from weaviate_backup import backup_weaviate
with tempfile.TemporaryDirectory() as tmpdir:
output_path = Path(tmpdir) / "test_backup.json"
stats = backup_weaviate(
output_path=output_path,
collections=None, # Toutes
include_vectors=False # Plus rapide pour le test
)
assert output_path.exists(), "Le fichier de backup n'a pas été créé"
assert output_path.stat().st_size > 0, "Le fichier de backup est vide"
def test_backup_structure(self):
"""Le backup doit avoir la bonne structure."""
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from weaviate_backup import backup_weaviate
with tempfile.TemporaryDirectory() as tmpdir:
output_path = Path(tmpdir) / "test_backup.json"
backup_weaviate(
output_path=output_path,
collections=["Thought"], # Une seule collection pour le test
include_vectors=False
)
with open(output_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Vérifier la structure
assert "metadata" in data
assert "schema" in data
assert "collections" in data
# Vérifier les métadonnées
assert "timestamp" in data["metadata"]
assert "weaviate_url" in data["metadata"]
assert "version" in data["metadata"]
def test_backup_with_vectors(self):
"""Le backup avec vecteurs doit inclure les embeddings."""
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from weaviate_backup import backup_weaviate
with tempfile.TemporaryDirectory() as tmpdir:
output_path = Path(tmpdir) / "test_backup_vectors.json"
backup_weaviate(
output_path=output_path,
collections=["Thought"],
include_vectors=True
)
with open(output_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Vérifier qu'au moins un objet a un vecteur
thoughts = data.get("collections", {}).get("Thought", [])
if thoughts:
# Au moins un objet devrait avoir un vecteur
has_vector = any("vector" in obj for obj in thoughts)
assert has_vector, "Aucun objet n'a de vecteur alors que include_vectors=True"
class TestRestoreScript:
"""Tests du script de restauration."""
def test_restore_dry_run(self):
"""Le dry-run ne doit pas modifier les données."""
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from weaviate_backup import backup_weaviate
from weaviate_restore import restore_weaviate, get_existing_classes
with tempfile.TemporaryDirectory() as tmpdir:
# D'abord, faire un backup
backup_path = Path(tmpdir) / "test_backup.json"
backup_weaviate(
output_path=backup_path,
collections=["Thought"],
include_vectors=False
)
# Compter les objets avant
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class=Thought&limit=1")
count_before = len(response.json().get("objects", []))
# Restaurer en dry-run
stats = restore_weaviate(
backup_path=backup_path,
collections=["Thought"],
clear_existing=False,
dry_run=True
)
# Compter après
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class=Thought&limit=1")
count_after = len(response.json().get("objects", []))
# Pas de changement
assert count_before == count_after, "Le dry-run a modifié les données!"
class TestBackupRestoreCycle:
"""Tests du cycle complet backup → restore."""
def test_backup_restore_roundtrip(self):
"""
Test complet: backup → restore → vérification.
Ce test utilise une collection temporaire pour ne pas
affecter les données existantes.
"""
# Ce test nécessiterait de créer une collection temporaire
# Pour l'instant, on vérifie juste que les scripts fonctionnent
pass
def test_exports_directory_exists():
"""Le dossier exports doit exister ou être créable."""
exports_dir = Path(__file__).parent.parent.parent / "exports"
exports_dir.mkdir(parents=True, exist_ok=True)
assert exports_dir.exists()
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -0,0 +1,202 @@
#!/usr/bin/env python3
"""
Tests pour la Phase 1: StateVector et S(0).
Usage:
pytest tests/test_phase1_state_vector.py -v
"""
import os
import sys
from pathlib import Path
import pytest
import requests
import numpy as np
# Ajouter le parent au path
sys.path.insert(0, str(Path(__file__).parent.parent))
from state_vector import (
check_weaviate_ready,
get_existing_classes,
filter_thoughts,
filter_assistant_messages,
get_state_vector,
get_current_state_id,
WEAVIATE_URL,
)
def weaviate_is_available() -> bool:
"""Verifie si Weaviate est accessible."""
return check_weaviate_ready()
# Skip tous les tests si Weaviate n'est pas disponible
pytestmark = pytest.mark.skipif(
not weaviate_is_available(),
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
)
class TestStateVectorCollection:
"""Tests de la collection StateVector."""
def test_state_vector_collection_exists(self):
"""La collection StateVector doit exister."""
classes = get_existing_classes()
assert "StateVector" in classes, \
f"StateVector non trouve. Classes: {classes}"
def test_state_vector_schema_correct(self):
"""Le schema StateVector doit avoir les bonnes proprietes."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
schema = response.json()
state_vector_class = None
for c in schema.get("classes", []):
if c["class"] == "StateVector":
state_vector_class = c
break
assert state_vector_class is not None
# Verifier les proprietes requises
prop_names = [p["name"] for p in state_vector_class.get("properties", [])]
required = ["state_id", "timestamp", "trigger_type", "occasion_summary"]
for req in required:
assert req in prop_names, f"Propriete manquante: {req}"
class TestInitialState:
"""Tests de l'etat initial S(0)."""
def test_s0_exists(self):
"""S(0) doit exister."""
s0 = get_state_vector(0)
assert s0 is not None, "S(0) non trouve"
assert s0.get("state_id") == 0
def test_s0_has_vector(self):
"""S(0) doit avoir un vecteur."""
s0 = get_state_vector(0)
assert s0 is not None
vector = s0.get("_additional", {}).get("vector")
assert vector is not None, "S(0) n'a pas de vecteur"
def test_s0_vector_is_1024_dim(self):
"""Le vecteur de S(0) doit etre 1024-dim (BGE-M3)."""
s0 = get_state_vector(0)
assert s0 is not None
vector = s0.get("_additional", {}).get("vector")
assert vector is not None
assert len(vector) == 1024, f"Dimension: {len(vector)} (attendu: 1024)"
def test_s0_vector_is_normalized(self):
"""Le vecteur de S(0) doit etre normalise."""
s0 = get_state_vector(0)
assert s0 is not None
vector = np.array(s0.get("_additional", {}).get("vector", []))
norm = np.linalg.norm(vector)
assert abs(norm - 1.0) < 0.01, f"Norme: {norm} (attendu: ~1.0)"
def test_s0_has_source_counts(self):
"""S(0) doit avoir les compteurs de sources."""
s0 = get_state_vector(0)
assert s0 is not None
thoughts_count = s0.get("source_thoughts_count")
messages_count = s0.get("source_messages_count")
assert thoughts_count is not None, "source_thoughts_count manquant"
assert messages_count is not None, "source_messages_count manquant"
assert thoughts_count > 0 or messages_count > 0, \
"S(0) doit etre construit a partir de donnees"
def test_s0_trigger_type_is_initialization(self):
"""Le trigger_type de S(0) doit etre 'initialization'."""
s0 = get_state_vector(0)
assert s0 is not None
trigger_type = s0.get("trigger_type")
assert trigger_type == "initialization"
class TestFiltering:
"""Tests des fonctions de filtrage."""
def test_filter_thoughts_excludes_test(self):
"""Les pensees de test doivent etre exclues."""
thoughts = [
{"properties": {"content": "Ceci est une vraie pensee philosophique", "thought_type": "reflection"}},
{"properties": {"content": "test test test", "thought_type": "test"}},
{"properties": {"content": "debug: checking values", "thought_type": "debug"}},
{"properties": {"content": "Une autre pensee valide sur Whitehead", "thought_type": "reflection"}},
]
filtered = filter_thoughts(thoughts)
assert len(filtered) == 2
for t in filtered:
assert "test" not in t["properties"]["content"].lower()
def test_filter_thoughts_excludes_short(self):
"""Les pensees trop courtes doivent etre exclues."""
thoughts = [
{"properties": {"content": "OK", "thought_type": "reflection"}},
{"properties": {"content": "Une pensee suffisamment longue pour etre valide", "thought_type": "reflection"}},
]
filtered = filter_thoughts(thoughts)
assert len(filtered) == 1
assert len(filtered[0]["properties"]["content"]) >= 20
def test_filter_messages_keeps_only_assistant(self):
"""Seuls les messages assistant doivent etre gardes."""
messages = [
{"properties": {"role": "user", "content": "Question de l'utilisateur"}},
{"properties": {"role": "assistant", "content": "Reponse d'Ikario avec suffisamment de contenu pour etre valide"}},
{"properties": {"role": "system", "content": "Message systeme"}},
]
filtered = filter_assistant_messages(messages)
assert len(filtered) == 1
assert filtered[0]["properties"]["role"] == "assistant"
def test_filter_messages_excludes_short(self):
"""Les messages trop courts doivent etre exclus."""
messages = [
{"properties": {"role": "assistant", "content": "OK"}},
{"properties": {"role": "assistant", "content": "Une reponse complete avec suffisamment de contenu pour representer une vraie interaction"}},
]
filtered = filter_assistant_messages(messages)
assert len(filtered) == 1
assert len(filtered[0]["properties"]["content"]) >= 50
class TestStateVectorOperations:
"""Tests des operations sur StateVector."""
def test_get_current_state_id(self):
"""get_current_state_id doit retourner au moins 0."""
current_id = get_current_state_id()
assert current_id >= 0, "Aucun etat trouve"
def test_get_state_vector_returns_none_for_invalid_id(self):
"""get_state_vector doit retourner None pour un ID invalide."""
state = get_state_vector(99999)
assert state is None
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -0,0 +1,210 @@
#!/usr/bin/env python3
"""
Tests pour la Phase 2: Directions de Projection.
Usage:
pytest tests/test_phase2_directions.py -v
"""
import os
import sys
from pathlib import Path
import pytest
import requests
import numpy as np
# Ajouter le parent au path
sys.path.insert(0, str(Path(__file__).parent.parent))
from state_vector import (
check_weaviate_ready,
get_state_vector,
WEAVIATE_URL,
)
from projection_directions import (
get_existing_classes,
get_direction,
get_all_directions,
get_state_profile,
project_state_on_direction,
DIRECTIONS_CONFIG,
)
def weaviate_is_available() -> bool:
"""Verifie si Weaviate est accessible."""
return check_weaviate_ready()
# Skip tous les tests si Weaviate n'est pas disponible
pytestmark = pytest.mark.skipif(
not weaviate_is_available(),
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
)
class TestProjectionDirectionCollection:
"""Tests de la collection ProjectionDirection."""
def test_collection_exists(self):
"""La collection ProjectionDirection doit exister."""
classes = get_existing_classes()
assert "ProjectionDirection" in classes
def test_all_directions_created(self):
"""Toutes les directions configurees doivent exister."""
directions = get_all_directions()
direction_names = [d["name"] for d in directions]
for name in DIRECTIONS_CONFIG.keys():
assert name in direction_names, f"Direction manquante: {name}"
def test_directions_count(self):
"""Le nombre de directions doit correspondre a la config."""
directions = get_all_directions()
assert len(directions) == len(DIRECTIONS_CONFIG)
class TestDirectionVectors:
"""Tests des vecteurs de direction."""
def test_curiosity_direction_exists(self):
"""La direction 'curiosity' doit exister."""
direction = get_direction("curiosity")
assert direction is not None
assert direction["name"] == "curiosity"
assert direction["category"] == "epistemic"
def test_direction_has_vector(self):
"""Chaque direction doit avoir un vecteur."""
direction = get_direction("curiosity")
assert direction is not None
vector = direction.get("_additional", {}).get("vector")
assert vector is not None
assert len(vector) > 0
def test_direction_vector_is_1024_dim(self):
"""Les vecteurs de direction doivent etre 1024-dim."""
direction = get_direction("curiosity")
assert direction is not None
vector = direction.get("_additional", {}).get("vector")
assert len(vector) == 1024
def test_direction_vector_is_normalized(self):
"""Les vecteurs de direction doivent etre normalises."""
direction = get_direction("curiosity")
assert direction is not None
vector = np.array(direction.get("_additional", {}).get("vector"))
norm = np.linalg.norm(vector)
assert abs(norm - 1.0) < 0.01, f"Norme: {norm}"
def test_all_categories_present(self):
"""Toutes les categories doivent etre representees."""
directions = get_all_directions()
categories = set(d["category"] for d in directions)
expected_categories = {"epistemic", "affective", "relational", "vital", "philosophical"}
assert categories == expected_categories
class TestProjection:
"""Tests des fonctions de projection."""
def test_projection_in_range(self):
"""Les projections doivent etre entre -1 et 1."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
for category, components in profile.items():
for name, value in components.items():
assert -1 <= value <= 1, f"{name} = {value} hors limites [-1, 1]"
def test_get_state_profile_structure(self):
"""Le profil doit avoir la bonne structure."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
# Verifier que c'est un dict de dicts
assert isinstance(profile, dict)
for category, components in profile.items():
assert isinstance(components, dict)
for name, value in components.items():
assert isinstance(value, float)
def test_projection_orthogonal_vectors(self):
"""Test de projection avec des vecteurs orthogonaux."""
# Deux vecteurs orthogonaux ont une projection de 0
v1 = np.zeros(1024)
v1[0] = 1.0
v2 = np.zeros(1024)
v2[1] = 1.0
projection = project_state_on_direction(v1, v2)
assert abs(projection) < 0.001
def test_projection_parallel_vectors(self):
"""Test de projection avec des vecteurs paralleles."""
v = np.random.randn(1024)
v = v / np.linalg.norm(v)
projection = project_state_on_direction(v, v)
assert abs(projection - 1.0) < 0.001
def test_projection_antiparallel_vectors(self):
"""Test de projection avec des vecteurs antiparalleles."""
v = np.random.randn(1024)
v = v / np.linalg.norm(v)
projection = project_state_on_direction(v, -v)
assert abs(projection + 1.0) < 0.001
class TestS0Profile:
"""Tests du profil de S(0)."""
def test_s0_has_profile(self):
"""S(0) doit avoir un profil calculable."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
assert len(profile) > 0
def test_s0_profile_has_all_categories(self):
"""Le profil de S(0) doit avoir toutes les categories."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
expected = {"epistemic", "affective", "relational", "vital", "philosophical"}
assert set(profile.keys()) == expected
def test_s0_has_curiosity_component(self):
"""S(0) doit avoir une composante curiosity."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
assert "curiosity" in profile.get("epistemic", {})
if __name__ == "__main__":
pytest.main([__file__, "-v"])