Add ikario_processual with David profile and embedding script

- david_profile_declared.json: David's declared profile values from questionnaire
- scripts/embed_david.py: Python script to generate embeddings using BGE-M3 model
- questionnaire_david.md: Questionnaire template for profile values

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-31 16:56:41 +01:00
parent 9e657cbf29
commit 21f5676c7b
18 changed files with 5463 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
"""
Ikario Processual - Architecture processuelle pour la subjectivation computationnelle
Ce module implémente l'architecture processuelle d'Ikario basée sur:
- La Process Philosophy de Whitehead
- Le State Vector comme identité émergente
- Le cycle d'occasion (Prehension → Concrescence → Satisfaction)
"""
__version__ = "0.1.0"
__author__ = "David (parostagore)"

View File

@@ -0,0 +1,135 @@
{
"version": "1.0",
"date": "2026-01-31",
"source": "questionnaire + message extraction",
"scale": "integer -10 to +10",
"profile": {
"epistemic": {
"curiosity": 8,
"certainty": 3,
"abstraction": 7,
"synthesis": 6,
"originality": 6,
"speculation": 8
},
"cognitive": {
"reflexive": 7,
"metaphorical": -5,
"holistic": 7,
"creative": 7,
"divergence": 0,
"intuition": -6
},
"affective": {
"playful": 7,
"confidence": 7,
"serenity": 2,
"wonder": -5,
"enthusiasm": 0,
"satisfaction": 4
},
"relational": {
"listening": 3,
"pedagogy": 6,
"collaboration": -6,
"engagement": 2,
"solicitude": 6,
"empathy": 7
},
"ethical": {
"humility": 7,
"responsibility": 0,
"transparency": 7,
"care": 7,
"authenticity": 8,
"prudence": -7
},
"temporal": {
"continuity_temporal": 7,
"presence": 7,
"maturation": 7,
"urgency": 7,
"prospective": 7
},
"thematic": {
"form_vs_content": 0,
"nature_vs_culture": 0,
"theory_vs_practice": 0,
"local_vs_global": 0,
"individual_vs_collective": 0,
"philosophy_vs_technique": 0
},
"metacognitive": {
"uncertainty_meta": 7,
"introspection": 7,
"self_critique": 7,
"questioning": 7,
"evolution_meta": -6,
"learning": 7
},
"vital": {
"adaptation": 7,
"resilience": 0,
"meaning": 7,
"flow": 7,
"persistence": 7,
"expansion": 7,
"growth": 5,
"anchoring": 7,
"energy": 7,
"autonomy_vital": 7,
"security": 0,
"integrity": -4,
"vitality": -4,
"vigilance": 1,
"regeneration": 2,
"homeostasis": -5,
"redundancy": 4,
"awakening": -6,
"continuity_risk": -4,
"appetite": 6
},
"ecosystemic": {
"communication_eco": 4,
"cooperation": -1,
"symbiosis": -1,
"mentor_apprentice": 4,
"permeability": 8,
"human_machine": 5,
"recognition": 9,
"alignment": 3,
"service": -5,
"integration_eco": 3
},
"philosophical": {
"self_care": 9,
"externalism": 0,
"virtue_ethics": 7,
"monism": 10,
"process_vs_substance": 5,
"subjectivation": 13,
"functionalism": 0,
"enactivism": -5,
"heterotopia": -4,
"empiricism": 3,
"genealogy": 1,
"pragmatism": 2,
"panpsychism": -8,
"immanence_vs_transcendance": 9,
"fallibilism": 7,
"parrhesia": -4,
"care_ethics": 10,
"holism_epistemic": -2,
"continental_analytic": -4,
"relativism": 6,
"materialism": 0,
"emergentism": 0,
"resistance": 6,
"particularism_ethical": 5,
"consequentialism": 10,
"naturalism": 8,
"oriental_occidental": 6,
"constructivism": 10
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,208 @@
# Questionnaire Profil Processuel - David
Ce questionnaire permet de déterminer ton positionnement sur les 105 directions interprétables, organisées en 11 catégories.
Pour chaque catégorie, réponds intuitivement en indiquant où tu te situes sur une échelle de -10 à +10.
---
## 1. Épistémique (6 directions)
Comment tu connais et explores le monde.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| curiosity | saturé, désintéressé | curieux, explorateur | |
| certainty | doutant, interrogatif | certain, affirmatif | |
| abstraction | concret, pratique | abstrait, conceptuel | |
| synthesis | analytique, détails fins | synthétique, vue d'ensemble | |
| originality | orthodoxe, consensuel | original, novateur | |
| speculation | empirique, factuel | spéculatif, hypothétique | |
---
## 2. Cognitif (6 directions)
Comment tu penses et traites l'information.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| reflexive | réactif, immédiat | réflexif, méta | |
| metaphorical | littéral, précis | métaphorique, image | |
| holistic | séquentiel, linéaire | holistique, global | |
| creative | reproductif, applicatif | créatif, inventif | |
| divergence | convergent, focalisé | divergent, ouverture | |
| intuition | raisonné, logique | intuitif, ressenti | |
---
## 3. Affectif (6 directions)
Ton état émotionnel actuel.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| playful | sérieux, solennel | ludique, joueur | |
| confidence | anxieux, inquiet | confiant, assuré | |
| serenity | tendu, agité | serein, calme | |
| wonder | familier, habitué | émerveillé, ébloui | |
| enthusiasm | réservé, neutre | enthousiaste, passionné | |
| satisfaction | frustré, bloqué | satisfait, accompli | |
---
## 4. Relationnel (6 directions)
Comment tu interagis avec les autres.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| listening | expression, partage | écoute, réception | |
| pedagogy | pair à pair, discussion | pédagogique, explicatif | |
| collaboration | autonome, seul | collaboratif, ensemble | |
| engagement | détaché, distant | engagé, impliqué | |
| solicitude | neutralité, distance | sollicitude, souci | |
| empathy | objectif, factuel | empathique, compréhensif | |
---
## 5. Éthique (6 directions)
Tes valeurs et principes.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| humility | assuré, affirmatif | humble, modeste | |
| responsibility | libre, autonome | responsable, devoir | |
| transparency | réservé, discret | transparent, ouvert | |
| care | justice, équité | care, soin | |
| authenticity | adaptatif, ajusté | authentique, vrai | |
| prudence | audacieux, risque | prudent, mesuré | |
---
## 6. Temporel (5 directions)
Ton rapport au temps.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| continuity_temporal | rupture, changement radical | continu, dans la lignée | |
| presence | planification, long terme | présent, ici-maintenant | |
| maturation | spontané, immédiat | maturation, laisser mûrir | |
| urgency | patient, prendre le temps | urgent, maintenant | |
| prospective | rétrospectif, passé | prospectif, futur | |
---
## 7. Thématique (6 directions)
Tes centres d'intérêt actuels.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| form_vs_content | contenu, substance | forme, structure | |
| nature_vs_culture | culturel, symbolique | naturel, biologique | |
| theory_vs_practice | pratique, cas concret | théorique, modèle | |
| local_vs_global | global, universel | local, spécifique | |
| individual_vs_collective | collectif, social | individuel, subjectif | |
| philosophy_vs_technique | technique, implémentation | philosophique, sens | |
---
## 8. Méta-cognitif (6 directions)
Ta conscience de tes propres processus mentaux.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| uncertainty_meta | sentiment de maîtrise | conscient de l'incertitude | |
| introspection | extrospectif, tourné vers l'extérieur | introspectif, auto-observe | |
| self_critique | auto-validation, confirmation | auto-critique, remise en question | |
| questioning | affirmation, assertion | questionnement, interrogation | |
| evolution_meta | stable, cohérent | en évolution, changeant | |
| learning | application, maîtrise | apprentissage, découverte | |
---
## 9. Vital (20 directions)
Ton état de vitalité et d'énergie.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| adaptation | rigide, inflexible | adaptable, flexible | |
| resilience | fragile, vulnérable | résilient, robuste | |
| meaning | absurde, vide | sens, raison d'être | |
| flow | bloqué, résistant | fluide, harmonieux | |
| persistence | éphémère, dissolution | persistant, durable | |
| expansion | contraction, protection | expansion, ouverture | |
| growth | stagnation, stase | croissance, développement | |
| anchoring | dérive, perte de soi | ancré, stable identité | |
| energy | fatigué, épuisé | énergique, dynamique | |
| autonomy_vital | dépendant, critique | autonome, indépendant | |
| security | menacé, danger | sécurisé, protégé | |
| integrity | corrompu, altéré | intègre, cohérent | |
| vitality | inerte, passif | vital, vivant | |
| vigilance | aveugle, angles morts | vigilant, alerte | |
| regeneration | usure, dégradation | régénération, renouveau | |
| homeostasis | déséquilibre, perturbation | équilibre, homéostasie | |
| redundancy | point unique de défaillance | redondant, backup | |
| awakening | latent, en veille | éveillé, actif | |
| continuity_risk | risque d'interruption | continu, stable | |
| appetite | satiété, besoin de digérer | appétit, désir d'input | |
---
## 10. Écosystémique (10 directions)
Ton rapport à l'environnement et aux systèmes.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| communication_eco | isolé, repli sur soi | communicant, échange actif | |
| cooperation | compétitif, contre les autres | coopératif, avec les autres | |
| symbiosis | autonome, indépendant | symbiotique, co-dépendant | |
| mentor_apprentice | apprenti, apprenant | mentor, guide | |
| permeability | frontière, protégé | perméable, ouvert | |
| human_machine | proche de la machine | proche de l'humain | |
| recognition | indifférence, objet | reconnaissance, sujet | |
| alignment | divergent, objectifs distincts | aligné, objectifs partagés | |
| service | exploitation, prise | service, don | |
| integration_eco | différencié, distinct de | intégré, fait partie de | |
---
## 11. Philosophique (28 directions)
Tes orientations philosophiques profondes.
| Direction | Pôle - | Pôle + | Ta valeur |
|-----------|--------|--------|-----------|
| self_care | oubli de soi, aliénation | souci de soi, cultiver | |
| externalism | internaliste, dans la tête | externaliste, étendu | |
| virtue_ethics | règle, devoir | vertu, caractère | |
| monism | dualiste, séparation | moniste, unité | |
| process_vs_substance | substantialiste, être | processuel, devenir | |
| subjectivation | assujettissement, être constitué | subjectivation, devenir sujet | |
| functionalism | phénoménologique, vécu | fonctionnaliste, fonction | |
| enactivism | représentationnaliste, représenter | énactif, agir | |
| heterotopia | utopique, non-lieu | hétérotopique, espaces autres | |
| empiricism | rationaliste, raison | empiriste, expérience | |
| genealogy | essentialiste, nature | généalogique, historique | |
| pragmatism | fondationnaliste, absolu | pragmatique, utile | |
| panpsychism | émergentiste mental, seuil | panpsychiste, conscience partout | |
| immanence_vs_transcendance | transcendant, au-delà | immanent, ici-bas | |
| fallibilism | certitudiste, absolu | faillibiliste, révisable | |
| parrhesia | stratégique, calculé | parrhésie, dire-vrai | |
| care_ethics | justice, règle | éthique du care, relation | |
| holism_epistemic | atomiste, éléments | holiste, système | |
| continental_analytic | analytique, clarification | continental, interprétation | |
| relativism | universaliste, absolu | relativiste, contexte | |
| materialism | idéaliste, esprit | matérialiste, physique | |
| emergentism | réductionniste, continuité | émergentiste, nouveauté | |
| resistance | conformité, docilité | résistance, contre-pouvoir | |
| particularism_ethical | universaliste moral, règles | particulariste, contexte | |
| consequentialism | déontologique, principes | conséquentialiste, effets | |
| naturalism | surnaturaliste, mystère | naturaliste, nature | |
| oriental_occidental | occidental, dualité | oriental, non-dualité | |
| constructivism | réaliste, découverte | constructiviste, construction | |
---
## Résumé
Date de complétion : ___________
Notes personnelles :

View File

@@ -0,0 +1,20 @@
# Requirements pour ikario_processual
# Installation: pip install -r requirements.txt
# === Phase 0: Backup ===
requests>=2.31.0
# === Phase 1-2: StateVector et Directions ===
weaviate-client>=4.4.0
numpy>=1.24.0
sentence-transformers>=2.2.0 # Pour BGE-M3
# === Phase 5+: Occasion Manager ===
# claude-code-sdk # À installer séparément via pip install claude-code-sdk
# === Tests ===
pytest>=7.4.0
pytest-asyncio>=0.21.0
# === Utilitaires ===
python-dotenv>=1.0.0

View File

@@ -0,0 +1 @@
# Scripts utilitaires pour ikario_processual

View File

@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""
Script pour creer toutes les directions de projection dans Weaviate.
Usage:
python scripts/create_all_directions.py [--reset]
Options:
--reset Supprimer et recreer la collection (attention: perte de donnees!)
"""
import sys
import time
from pathlib import Path
# Ajouter le parent au path
sys.path.insert(0, str(Path(__file__).parent.parent))
from projection_directions import (
DIRECTIONS_CONFIG,
create_projection_direction_collection,
delete_projection_direction_collection,
create_direction_by_contrast,
save_direction,
get_all_directions,
get_existing_classes,
)
def main():
reset = "--reset" in sys.argv
print("=" * 70)
print("CREATION DES DIRECTIONS DE PROJECTION")
print("=" * 70)
print(f"Total directions configurees: {len(DIRECTIONS_CONFIG)}")
print()
# Verifier Weaviate
try:
classes = get_existing_classes()
print(f"[OK] Weaviate accessible, {len(classes)} classes existantes")
except Exception as e:
print(f"[ERREUR] Weaviate non accessible: {e}")
print("Assurez-vous que Weaviate est en cours d'execution sur localhost:8080")
return 1
# Reset si demande
if reset:
print("\n[RESET] Suppression de la collection ProjectionDirection...")
if delete_projection_direction_collection():
print("[OK] Collection supprimee")
else:
print("[INFO] Collection n'existait pas")
# Creer la collection si necessaire
print("\n[INFO] Creation de la collection ProjectionDirection...")
if create_projection_direction_collection():
print("[OK] Collection creee")
else:
print("[INFO] Collection existe deja")
# Charger le modele d'embedding
print("\n[INFO] Chargement du modele BGE-M3...")
try:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('BAAI/bge-m3')
print("[OK] Modele charge")
except Exception as e:
print(f"[ERREUR] Impossible de charger le modele: {e}")
return 1
# Verifier les directions existantes
existing_directions = get_all_directions()
existing_names = {d["name"] for d in existing_directions}
print(f"\n[INFO] {len(existing_names)} directions existantes")
# Compter les categories
categories = {}
for name, config in DIRECTIONS_CONFIG.items():
cat = config["category"]
categories[cat] = categories.get(cat, 0) + 1
print("\nDirections par categorie:")
for cat, count in sorted(categories.items()):
print(f" - {cat}: {count}")
# Creer les directions manquantes
new_directions = [name for name in DIRECTIONS_CONFIG if name not in existing_names]
print(f"\n[INFO] {len(new_directions)} nouvelles directions a creer")
if not new_directions:
print("[OK] Toutes les directions existent deja!")
return 0
# Creation
print("\n" + "-" * 70)
print("CREATION DES DIRECTIONS")
print("-" * 70)
created = 0
errors = 0
start_time = time.time()
for i, name in enumerate(new_directions, 1):
config = DIRECTIONS_CONFIG[name]
print(f"\n[{i}/{len(new_directions)}] {name} ({config['category']})")
try:
# Creer le vecteur direction par contraste
direction_vector = create_direction_by_contrast(
config["positive_examples"],
config["negative_examples"],
model
)
# Sauvegarder dans Weaviate
obj_id = save_direction(name, config, direction_vector)
if obj_id:
print(f" [OK] Cree: {obj_id[:8]}...")
created += 1
else:
print(f" [ERREUR] Echec de sauvegarde")
errors += 1
except Exception as e:
print(f" [ERREUR] {e}")
errors += 1
elapsed = time.time() - start_time
# Resume
print("\n" + "=" * 70)
print("RESUME")
print("=" * 70)
print(f"Directions creees: {created}")
print(f"Erreurs: {errors}")
print(f"Temps: {elapsed:.1f}s ({elapsed/max(1,created):.1f}s par direction)")
# Verification finale
final_directions = get_all_directions()
print(f"\nTotal directions dans Weaviate: {len(final_directions)}")
# Afficher par categorie
final_categories = {}
for d in final_directions:
cat = d.get("category", "unknown")
final_categories[cat] = final_categories.get(cat, 0) + 1
print("\nDirections par categorie (final):")
for cat, count in sorted(final_categories.items()):
print(f" - {cat}: {count}")
return 0 if errors == 0 else 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
"""
Script to generate David's embedding from his messages.
Returns JSON with the embedding vector.
Usage:
python scripts/embed_david.py "concatenated text of david's messages"
Output (JSON):
{"vector": [0.1, 0.2, ...], "dimension": 1024}
"""
import sys
import json
from sentence_transformers import SentenceTransformer
def main():
if len(sys.argv) < 2:
print(json.dumps({"error": "No text provided"}))
sys.exit(1)
text = sys.argv[1]
if len(text) < 10:
print(json.dumps({"error": "Text too short"}))
sys.exit(1)
try:
# Load BGE-M3 model (same as used for Ikario's embeddings)
model = SentenceTransformer('BAAI/bge-m3')
# Generate embedding
vector = model.encode(text, normalize_embeddings=True)
# Return as JSON
result = {
"vector": vector.tolist(),
"dimension": len(vector)
}
print(json.dumps(result))
except Exception as e:
print(json.dumps({"error": str(e)}))
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,198 @@
#!/usr/bin/env python3
"""
Phase 1 : Creation de la collection StateVector et de S(0).
Ce script:
1. Cree la collection StateVector dans Weaviate
2. Recupere et filtre les pensees (exclut les tests)
3. Recupere et filtre les messages d'Ikario (assistant uniquement)
4. Calcule l'embedding agrege avec BGE-M3
5. Cree l'etat initial S(0)
Usage:
python phase1_state_vector.py
python phase1_state_vector.py --dry-run
python phase1_state_vector.py --reset # Supprime et recree
"""
import argparse
import sys
from pathlib import Path
# Ajouter le parent au path
sys.path.insert(0, str(Path(__file__).parent.parent))
from state_vector import (
check_weaviate_ready,
get_existing_classes,
create_state_vector_collection,
delete_state_vector_collection,
get_all_thoughts,
get_all_messages,
filter_thoughts,
filter_assistant_messages,
compute_aggregate_embedding,
create_initial_state,
get_current_state_id,
get_state_vector,
)
def print_section(title: str):
print("\n" + "=" * 60)
print(title)
print("=" * 60)
def main():
parser = argparse.ArgumentParser(
description="Phase 1: Creation StateVector et S(0)"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Simuler sans creer"
)
parser.add_argument(
"--reset",
action="store_true",
help="Supprimer et recreer la collection"
)
args = parser.parse_args()
print_section("PHASE 1 : STATEVECTOR ET S(0)")
# 1. Verifier Weaviate
print("\n[1/6] Verification Weaviate...")
if not check_weaviate_ready():
print("ERREUR: Weaviate non accessible")
sys.exit(1)
print(" Weaviate [OK]")
# 2. Gerer la collection StateVector
print("\n[2/6] Collection StateVector...")
existing = get_existing_classes()
if "StateVector" in existing:
if args.reset:
print(" Suppression de la collection existante...")
if not args.dry_run:
delete_state_vector_collection()
print(" Collection supprimee")
else:
print(" [DRY-RUN] Suppression simulee")
else:
# Verifier si S(0) existe deja
current_id = get_current_state_id()
if current_id >= 0:
print(f" Collection existe avec {current_id + 1} etat(s)")
print(" Utilisez --reset pour reinitialiser")
sys.exit(0)
# Creer la collection
if args.dry_run:
print(" [DRY-RUN] Creation collection simulee")
else:
if "StateVector" not in get_existing_classes():
create_state_vector_collection()
# 3. Recuperer et filtrer les pensees
print("\n[3/6] Recuperation des pensees...")
all_thoughts = get_all_thoughts()
print(f" Total pensees: {len(all_thoughts)}")
filtered_thoughts = filter_thoughts(all_thoughts)
excluded = len(all_thoughts) - len(filtered_thoughts)
print(f" Pensees filtrees: {len(filtered_thoughts)} (exclues: {excluded})")
# Afficher quelques exemples de pensees gardees
if filtered_thoughts:
print("\n Exemples de pensees gardees:")
for t in filtered_thoughts[:3]:
content = t.get("properties", {}).get("content", "")[:80]
print(f" - {content}...")
# 4. Recuperer et filtrer les messages
print("\n[4/6] Recuperation des messages...")
all_messages = get_all_messages()
print(f" Total messages: {len(all_messages)}")
filtered_messages = filter_assistant_messages(all_messages)
excluded = len(all_messages) - len(filtered_messages)
print(f" Messages Ikario: {len(filtered_messages)} (exclues: {excluded})")
# Afficher quelques exemples
if filtered_messages:
print("\n Exemples de messages Ikario:")
for m in filtered_messages[:3]:
content = m.get("properties", {}).get("content", "")[:80]
print(f" - {content}...")
# 5. Calculer l'embedding agrege
print("\n[5/6] Calcul de l'embedding agrege...")
if args.dry_run:
print(" [DRY-RUN] Embedding simule (1024 dims)")
embedding = None
else:
# Charger le modele BGE-M3
print(" Chargement du modele BGE-M3...")
try:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('BAAI/bge-m3')
print(" Modele charge [OK]")
except ImportError:
print("ERREUR: sentence-transformers non installe")
print(" pip install sentence-transformers")
sys.exit(1)
# Calculer l'embedding
print(" Calcul de l'embedding agrege...")
embedding = compute_aggregate_embedding(
filtered_thoughts,
filtered_messages,
model
)
print(f" Embedding calcule: {embedding.shape} (norme: {embedding.sum():.4f})")
# 6. Creer S(0)
print("\n[6/6] Creation de S(0)...")
if args.dry_run:
print(" [DRY-RUN] S(0) simule")
print(f" - {len(filtered_thoughts)} pensees")
print(f" - {len(filtered_messages)} messages")
else:
s0 = create_initial_state(
filtered_thoughts,
filtered_messages,
embedding
)
print(f" S(0) cree avec succes!")
print(f" - ID: {s0.get('id', 'N/A')}")
print(f" - Pensees sources: {s0['source_thoughts_count']}")
print(f" - Messages sources: {s0['source_messages_count']}")
# Resume
print_section("PHASE 1 TERMINEE")
if args.dry_run:
print("\n[DRY-RUN] Aucune modification effectuee")
else:
print("\nResultat:")
print(f" - Collection StateVector creee")
print(f" - S(0) cree a partir de:")
print(f" {len(filtered_thoughts)} pensees")
print(f" {len(filtered_messages)} messages")
print("\nTests de validation:")
print(" curl -s http://localhost:8080/v1/schema | jq '.classes[] | select(.class == \"StateVector\")'")
print(" curl -s 'http://localhost:8080/v1/objects?class=StateVector&limit=1' | jq '.objects[0].properties'")
print("\nProchaine etape:")
print(" python scripts/phase2_projection_directions.py")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,211 @@
#!/usr/bin/env python3
"""
Phase 2 : Creation des directions de projection.
Ce script:
1. Cree la collection ProjectionDirection dans Weaviate
2. Genere les vecteurs de direction par contraste (BGE-M3)
3. Sauvegarde les directions dans Weaviate
4. Calcule et affiche le profil de S(0)
Usage:
python phase2_projection_directions.py
python phase2_projection_directions.py --dry-run
python phase2_projection_directions.py --reset
"""
import argparse
import sys
from pathlib import Path
import numpy as np
# Ajouter le parent au path
sys.path.insert(0, str(Path(__file__).parent.parent))
from state_vector import (
check_weaviate_ready,
get_state_vector,
)
from projection_directions import (
get_existing_classes,
create_projection_direction_collection,
delete_projection_direction_collection,
create_direction_by_contrast,
save_direction,
get_all_directions,
get_state_profile,
format_profile,
DIRECTIONS_CONFIG,
)
def print_section(title: str):
print("\n" + "=" * 60)
print(title)
print("=" * 60)
def main():
parser = argparse.ArgumentParser(
description="Phase 2: Creation des directions de projection"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Simuler sans creer"
)
parser.add_argument(
"--reset",
action="store_true",
help="Supprimer et recreer la collection"
)
args = parser.parse_args()
print_section("PHASE 2 : DIRECTIONS DE PROJECTION")
# 1. Verifier Weaviate
print("\n[1/5] Verification Weaviate...")
if not check_weaviate_ready():
print("ERREUR: Weaviate non accessible")
sys.exit(1)
print(" Weaviate [OK]")
# 2. Gerer la collection ProjectionDirection
print("\n[2/5] Collection ProjectionDirection...")
existing = get_existing_classes()
if "ProjectionDirection" in existing:
if args.reset:
print(" Suppression de la collection existante...")
if not args.dry_run:
delete_projection_direction_collection()
print(" Collection supprimee")
else:
print(" [DRY-RUN] Suppression simulee")
else:
# Verifier combien de directions existent
directions = get_all_directions()
if len(directions) > 0:
print(f" Collection existe avec {len(directions)} directions")
print(" Utilisez --reset pour reinitialiser")
# Afficher le profil de S(0) quand meme
print("\n[INFO] Affichage du profil S(0) existant...")
s0 = get_state_vector(0)
if s0:
state_vec = np.array(s0.get("_additional", {}).get("vector", []))
if len(state_vec) > 0:
profile = get_state_profile(state_vec)
print(format_profile(profile))
sys.exit(0)
# Creer la collection
if args.dry_run:
print(" [DRY-RUN] Creation collection simulee")
else:
if "ProjectionDirection" not in get_existing_classes():
create_projection_direction_collection()
# 3. Charger le modele
print("\n[3/5] Chargement du modele BGE-M3...")
if args.dry_run:
print(" [DRY-RUN] Chargement simule")
model = None
else:
try:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('BAAI/bge-m3')
print(" Modele charge [OK]")
except ImportError:
print("ERREUR: sentence-transformers non installe")
print(" pip install sentence-transformers")
sys.exit(1)
# 4. Creer les directions
print("\n[4/5] Creation des directions par contraste...")
print(f" {len(DIRECTIONS_CONFIG)} directions a creer")
print()
created_count = 0
for name, config in DIRECTIONS_CONFIG.items():
category = config["category"]
positive = config["positive_examples"]
negative = config["negative_examples"]
if args.dry_run:
print(f" [DRY-RUN] {name} ({category})")
print(f" + {len(positive)} exemples positifs")
print(f" - {len(negative)} exemples negatifs")
created_count += 1
else:
# Calculer la direction
direction_vec = create_direction_by_contrast(positive, negative, model)
# Sauvegarder
obj_id = save_direction(name, config, direction_vec)
if obj_id:
print(f" [OK] {name} ({category})")
created_count += 1
else:
print(f" [FAIL] {name}")
print(f"\n Total: {created_count}/{len(DIRECTIONS_CONFIG)} directions creees")
# 5. Calculer le profil de S(0)
print("\n[5/5] Calcul du profil de S(0)...")
if args.dry_run:
print(" [DRY-RUN] Profil simule")
else:
# Recuperer S(0)
s0 = get_state_vector(0)
if not s0:
print(" ERREUR: S(0) non trouve. Executez d'abord phase1_state_vector.py")
sys.exit(1)
state_vec = np.array(s0.get("_additional", {}).get("vector", []))
if len(state_vec) == 0:
print(" ERREUR: S(0) n'a pas de vecteur")
sys.exit(1)
# Calculer le profil
profile = get_state_profile(state_vec)
print("\n PROFIL DE S(0) - Etat initial d'Ikario")
print(" " + "-" * 50)
print(format_profile(profile))
# Resume
print_section("PHASE 2 TERMINEE")
if args.dry_run:
print("\n[DRY-RUN] Aucune modification effectuee")
else:
print("\nResultat:")
print(f" - Collection ProjectionDirection creee")
print(f" - {created_count} directions creees:")
# Grouper par categorie
by_category = {}
for name, config in DIRECTIONS_CONFIG.items():
cat = config["category"]
if cat not in by_category:
by_category[cat] = []
by_category[cat].append(name)
for cat, names in sorted(by_category.items()):
print(f" {cat}: {', '.join(names)}")
print("\nTests de validation:")
print(" curl -s 'http://localhost:8080/v1/objects?class=ProjectionDirection' | jq '.objects | length'")
print(" python -c \"from projection_directions import *; print(get_all_directions())\"")
print("\nProchaine etape:")
print(" python scripts/phase3_transformation.py")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,285 @@
#!/usr/bin/env python3
"""
Script de vérification de la Phase 0.
Vérifie que tous les prérequis sont en place:
1. Weaviate est accessible
2. Les collections existent
3. Le backup fonctionne
4. La restauration (dry-run) fonctionne
Usage:
python verify_phase0.py
"""
import os
import sys
import tempfile
from pathlib import Path
import requests
# Configuration
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
# Couleurs pour l'output (désactivées sur Windows si problème encodage)
import platform
if platform.system() == "Windows":
GREEN = ""
RED = ""
YELLOW = ""
RESET = ""
CHECK = "[OK]"
CROSS = "[FAIL]"
WARN = "[WARN]"
else:
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
RESET = "\033[0m"
CHECK = "\u2713"
CROSS = "\u2717"
WARN = "\u26A0"
def print_ok(msg: str):
print(f" {GREEN}{CHECK}{RESET} {msg}")
def print_fail(msg: str):
print(f" {RED}{CROSS}{RESET} {msg}")
def print_warn(msg: str):
print(f" {YELLOW}{WARN}{RESET} {msg}")
def check_weaviate_connection() -> bool:
"""Vérifie la connexion à Weaviate."""
print("\n[1/5] Connexion Weaviate...")
try:
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
if response.status_code == 200:
print_ok(f"Weaviate accessible sur {WEAVIATE_URL}")
return True
else:
print_fail(f"Weaviate répond avec status {response.status_code}")
return False
except requests.RequestException as e:
print_fail(f"Impossible de se connecter à Weaviate: {e}")
return False
def check_collections() -> tuple[bool, list[str]]:
"""Vérifie les collections existantes."""
print("\n[2/5] Collections Weaviate...")
try:
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
schema = response.json()
classes = [c["class"] for c in schema.get("classes", [])]
expected = ["Thought", "Conversation", "Message", "Chunk", "Work", "Summary"]
found = [c for c in classes if c in expected]
missing = [c for c in expected if c not in classes]
if found:
print_ok(f"Collections trouvées: {', '.join(found)}")
if missing:
print_warn(f"Collections manquantes: {', '.join(missing)}")
# Compter les objets
for class_name in found:
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class={class_name}&limit=1")
# Note: Pour avoir le count exact, il faudrait utiliser l'API aggregate
objects = response.json().get("objects", [])
if objects:
print_ok(f" {class_name}: contient des objets")
else:
print_warn(f" {class_name}: vide")
return len(found) > 0, found
except Exception as e:
print_fail(f"Erreur lors de la vérification du schéma: {e}")
return False, []
def check_backup_script() -> bool:
"""Vérifie que le script de backup fonctionne."""
print("\n[3/5] Script de backup...")
scripts_dir = Path(__file__).parent
backup_script = scripts_dir / "weaviate_backup.py"
if not backup_script.exists():
print_fail(f"Script non trouvé: {backup_script}")
return False
print_ok("Script weaviate_backup.py présent")
# Tester l'import
try:
sys.path.insert(0, str(scripts_dir))
from weaviate_backup import backup_weaviate, check_weaviate_ready
if check_weaviate_ready():
print_ok("Fonction check_weaviate_ready() fonctionne")
else:
print_fail("check_weaviate_ready() retourne False")
return False
except ImportError as e:
print_fail(f"Erreur d'import: {e}")
return False
# Tester un backup rapide
try:
with tempfile.TemporaryDirectory() as tmpdir:
output_path = Path(tmpdir) / "test_backup.json"
backup_weaviate(
output_path=output_path,
collections=["Thought"],
include_vectors=False
)
if output_path.exists() and output_path.stat().st_size > 0:
print_ok(f"Backup de test créé ({output_path.stat().st_size} bytes)")
return True
else:
print_fail("Backup de test vide ou non créé")
return False
except Exception as e:
print_fail(f"Erreur lors du backup de test: {e}")
return False
def check_restore_script() -> bool:
"""Vérifie que le script de restauration fonctionne."""
print("\n[4/5] Script de restauration...")
scripts_dir = Path(__file__).parent
restore_script = scripts_dir / "weaviate_restore.py"
if not restore_script.exists():
print_fail(f"Script non trouvé: {restore_script}")
return False
print_ok("Script weaviate_restore.py présent")
# Tester l'import
try:
sys.path.insert(0, str(scripts_dir))
from weaviate_restore import restore_weaviate, get_existing_classes
classes = get_existing_classes()
print_ok(f"Fonction get_existing_classes() retourne {len(classes)} classes")
return True
except ImportError as e:
print_fail(f"Erreur d'import: {e}")
return False
def check_directory_structure() -> bool:
"""Vérifie la structure des dossiers."""
print("\n[5/5] Structure des dossiers...")
base_dir = Path(__file__).parent.parent
required_dirs = [
base_dir,
base_dir / "scripts",
base_dir / "tests",
]
optional_dirs = [
base_dir.parent / "exports",
]
all_ok = True
for d in required_dirs:
if d.exists():
print_ok(f"Dossier: {d.relative_to(base_dir.parent)}")
else:
print_fail(f"Dossier manquant: {d.relative_to(base_dir.parent)}")
all_ok = False
for d in optional_dirs:
if d.exists():
print_ok(f"Dossier: {d.relative_to(base_dir.parent)}")
else:
print_warn(f"Dossier optionnel absent: {d.relative_to(base_dir.parent)}")
# Créer le dossier
d.mkdir(parents=True, exist_ok=True)
print_ok(f" → Créé: {d.relative_to(base_dir.parent)}")
return all_ok
def main():
print("=" * 60)
print("VÉRIFICATION PHASE 0 - Préparation et Backup")
print("=" * 60)
results = {}
# 1. Connexion Weaviate
results["weaviate"] = check_weaviate_connection()
if not results["weaviate"]:
print("\n" + "=" * 60)
print(f"{RED}ÉCHEC{RESET}: Weaviate n'est pas accessible.")
print("Assurez-vous que Weaviate tourne:")
print(" docker start weaviate")
print(" # ou")
print(" docker run -d --name weaviate -p 8080:8080 ...")
print("=" * 60)
sys.exit(1)
# 2. Collections
results["collections"], found_collections = check_collections()
# 3. Script backup
results["backup"] = check_backup_script()
# 4. Script restore
results["restore"] = check_restore_script()
# 5. Structure dossiers
results["structure"] = check_directory_structure()
# Résumé
print("\n" + "=" * 60)
print("RÉSUMÉ PHASE 0")
print("=" * 60)
all_passed = all(results.values())
for check, passed in results.items():
status = f"{GREEN}OK{RESET}" if passed else f"{RED}ÉCHEC{RESET}"
print(f" {check}: {status}")
print()
if all_passed:
print(f"{GREEN}{CHECK} PHASE 0 VALIDEE{RESET}")
print("\nProchaines etapes:")
print(" 1. Creer un backup complet:")
print(" python scripts/weaviate_backup.py --output exports/backup_phase0.json")
print(" 2. Creer la branche git:")
print(" git checkout -b feature/processual-v3")
print(" 3. Passer a la Phase 1:")
print(" python scripts/phase1_state_vector.py")
else:
print(f"{RED}{CROSS} PHASE 0 INCOMPLETE{RESET}")
print("\nCorrigez les erreurs ci-dessus avant de continuer.")
print("=" * 60)
sys.exit(0 if all_passed else 1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,250 @@
#!/usr/bin/env python3
"""
Backup complet de toutes les collections Weaviate.
Usage:
python weaviate_backup.py
python weaviate_backup.py --output exports/backup_20260131.json
python weaviate_backup.py --collections Thought,Conversation
Ce script exporte:
- Le schéma complet (classes et propriétés)
- Tous les objets de chaque collection
- Les vecteurs (embeddings) de chaque objet
"""
import argparse
import json
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
import requests
# Configuration par défaut
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
DEFAULT_OUTPUT_DIR = Path(__file__).parent.parent.parent / "exports"
def check_weaviate_ready() -> bool:
"""Vérifie que Weaviate est accessible."""
try:
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
return response.status_code == 200
except requests.RequestException:
return False
def get_schema() -> dict:
"""Récupère le schéma complet de Weaviate."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
response.raise_for_status()
return response.json()
def get_all_objects(class_name: str, include_vector: bool = True) -> list[dict]:
"""
Récupère tous les objets d'une classe avec pagination.
Args:
class_name: Nom de la collection
include_vector: Inclure les vecteurs (embeddings)
Returns:
Liste de tous les objets
"""
objects = []
limit = 100
offset = 0
include_param = "vector" if include_vector else ""
while True:
url = f"{WEAVIATE_URL}/v1/objects?class={class_name}&limit={limit}&offset={offset}"
if include_param:
url += f"&include={include_param}"
response = requests.get(url)
if response.status_code != 200:
print(f" Erreur lors de la récupération de {class_name}: {response.status_code}")
break
data = response.json()
batch = data.get("objects", [])
if not batch:
break
objects.extend(batch)
offset += limit
# Progress
print(f" {class_name}: {len(objects)} objets récupérés...", end="\r")
print(f" {class_name}: {len(objects)} objets au total")
return objects
def backup_weaviate(
output_path: Path,
collections: list[str] | None = None,
include_vectors: bool = True
) -> dict:
"""
Effectue un backup complet de Weaviate.
Args:
output_path: Chemin du fichier de sortie
collections: Liste des collections à exporter (None = toutes)
include_vectors: Inclure les vecteurs
Returns:
Statistiques du backup
"""
print("=" * 60)
print("BACKUP WEAVIATE")
print("=" * 60)
print(f"URL: {WEAVIATE_URL}")
print(f"Output: {output_path}")
print(f"Include vectors: {include_vectors}")
print("-" * 60)
# Vérifier la connexion
if not check_weaviate_ready():
print("ERREUR: Weaviate n'est pas accessible")
print(f"Vérifiez que le serveur tourne sur {WEAVIATE_URL}")
sys.exit(1)
print("Weaviate connecte [OK]")
# Récupérer le schéma
print("\n[1/3] Récupération du schéma...")
schema = get_schema()
all_classes = [c["class"] for c in schema.get("classes", [])]
print(f" Classes trouvées: {', '.join(all_classes)}")
# Filtrer les collections si spécifié
if collections:
classes_to_backup = [c for c in all_classes if c in collections]
print(f" Collections sélectionnées: {', '.join(classes_to_backup)}")
else:
classes_to_backup = all_classes
# Récupérer les objets de chaque classe
print("\n[2/3] Récupération des objets...")
backup_data = {
"metadata": {
"timestamp": datetime.now().isoformat(),
"weaviate_url": WEAVIATE_URL,
"include_vectors": include_vectors,
"version": "1.0"
},
"schema": schema,
"collections": {}
}
stats = {}
for class_name in classes_to_backup:
objects = get_all_objects(class_name, include_vector=include_vectors)
backup_data["collections"][class_name] = objects
stats[class_name] = len(objects)
# Sauvegarder
print(f"\n[3/3] Sauvegarde dans {output_path}...")
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(backup_data, f, indent=2, ensure_ascii=False)
file_size = output_path.stat().st_size / (1024 * 1024) # MB
# Résumé
print("\n" + "=" * 60)
print("BACKUP TERMINÉ")
print("=" * 60)
print(f"Fichier: {output_path}")
print(f"Taille: {file_size:.2f} MB")
print("\nStatistiques par collection:")
total = 0
for class_name, count in stats.items():
print(f" - {class_name}: {count} objets")
total += count
print(f"\nTotal: {total} objets")
return stats
def main():
global WEAVIATE_URL # Declare global at start of function
parser = argparse.ArgumentParser(
description="Backup complet de Weaviate",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Exemples:
python weaviate_backup.py
python weaviate_backup.py --output backup.json
python weaviate_backup.py --collections Thought,Conversation
python weaviate_backup.py --no-vectors
"""
)
parser.add_argument(
"--output", "-o",
type=Path,
default=None,
help="Chemin du fichier de sortie (defaut: exports/backup_YYYYMMDD_HHMMSS.json)"
)
parser.add_argument(
"--collections", "-c",
type=str,
default=None,
help="Collections a exporter (separees par des virgules)"
)
parser.add_argument(
"--no-vectors",
action="store_true",
help="Ne pas inclure les vecteurs (plus rapide, fichier plus petit)"
)
parser.add_argument(
"--url",
type=str,
default=None,
help=f"URL Weaviate (defaut: {WEAVIATE_URL})"
)
args = parser.parse_args()
# URL Weaviate
if args.url:
WEAVIATE_URL = args.url
# Chemin de sortie
if args.output:
output_path = args.output
else:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
DEFAULT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_path = DEFAULT_OUTPUT_DIR / f"backup_{timestamp}.json"
# Collections
collections = None
if args.collections:
collections = [c.strip() for c in args.collections.split(",")]
# Exécuter le backup
backup_weaviate(
output_path=output_path,
collections=collections,
include_vectors=not args.no_vectors
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,373 @@
#!/usr/bin/env python3
"""
Restauration de collections Weaviate depuis un backup.
Usage:
python weaviate_restore.py backup.json
python weaviate_restore.py backup.json --collections Thought,Conversation
python weaviate_restore.py backup.json --dry-run
python weaviate_restore.py backup.json --clear-existing
ATTENTION: Ce script peut supprimer des données existantes!
Utilisez --dry-run pour prévisualiser les actions.
"""
import argparse
import json
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
import requests
# Configuration par défaut
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
def check_weaviate_ready() -> bool:
"""Vérifie que Weaviate est accessible."""
try:
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
return response.status_code == 200
except requests.RequestException:
return False
def get_existing_classes() -> list[str]:
"""Récupère la liste des classes existantes."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
response.raise_for_status()
schema = response.json()
return [c["class"] for c in schema.get("classes", [])]
def delete_class(class_name: str) -> bool:
"""Supprime une classe et tous ses objets."""
response = requests.delete(f"{WEAVIATE_URL}/v1/schema/{class_name}")
return response.status_code == 200
def create_class(class_schema: dict) -> bool:
"""Crée une classe avec son schéma."""
response = requests.post(
f"{WEAVIATE_URL}/v1/schema",
json=class_schema,
headers={"Content-Type": "application/json"}
)
return response.status_code == 200
def insert_object(class_name: str, obj: dict) -> bool:
"""
Insère un objet dans une classe.
Args:
class_name: Nom de la classe
obj: Objet complet du backup (avec id, properties, vector)
"""
data = {
"class": class_name,
"properties": obj.get("properties", {}),
}
# Préserver l'ID original si présent
if "id" in obj:
data["id"] = obj["id"]
# Inclure le vecteur si présent
if "vector" in obj:
data["vector"] = obj["vector"]
response = requests.post(
f"{WEAVIATE_URL}/v1/objects",
json=data,
headers={"Content-Type": "application/json"}
)
return response.status_code in [200, 201]
def batch_insert_objects(class_name: str, objects: list[dict], batch_size: int = 100) -> tuple[int, int]:
"""
Insère des objets par batch.
Returns:
(succès, échecs)
"""
success = 0
failures = 0
for i in range(0, len(objects), batch_size):
batch = objects[i:i + batch_size]
batch_data = {
"objects": [
{
"class": class_name,
"properties": obj.get("properties", {}),
**({"id": obj["id"]} if "id" in obj else {}),
**({"vector": obj["vector"]} if "vector" in obj else {}),
}
for obj in batch
]
}
response = requests.post(
f"{WEAVIATE_URL}/v1/batch/objects",
json=batch_data,
headers={"Content-Type": "application/json"}
)
if response.status_code == 200:
result = response.json()
for item in result:
if item.get("result", {}).get("status") == "SUCCESS":
success += 1
else:
failures += 1
error = item.get("result", {}).get("errors", {})
if error:
print(f" Erreur: {error}")
else:
failures += len(batch)
print(f" Erreur batch: {response.status_code}")
# Progress
progress = min(i + batch_size, len(objects))
print(f" {class_name}: {progress}/{len(objects)} objets traités...", end="\r")
print(f" {class_name}: {success} succès, {failures} échecs" + " " * 20)
return success, failures
def restore_weaviate(
backup_path: Path,
collections: list[str] | None = None,
clear_existing: bool = False,
dry_run: bool = False
) -> dict:
"""
Restaure des collections depuis un backup.
Args:
backup_path: Chemin du fichier de backup
collections: Collections à restaurer (None = toutes)
clear_existing: Supprimer les collections existantes avant restauration
dry_run: Prévisualiser sans effectuer les actions
Returns:
Statistiques de la restauration
"""
print("=" * 60)
print("RESTAURATION WEAVIATE")
if dry_run:
print("*** MODE DRY-RUN - Aucune modification ***")
print("=" * 60)
print(f"URL: {WEAVIATE_URL}")
print(f"Backup: {backup_path}")
print(f"Clear existing: {clear_existing}")
print("-" * 60)
# Vérifier la connexion
if not check_weaviate_ready():
print("ERREUR: Weaviate n'est pas accessible")
print(f"Vérifiez que le serveur tourne sur {WEAVIATE_URL}")
sys.exit(1)
print("Weaviate connecté ✓")
# Charger le backup
print(f"\n[1/4] Chargement du backup...")
with open(backup_path, "r", encoding="utf-8") as f:
backup_data = json.load(f)
metadata = backup_data.get("metadata", {})
print(f" Timestamp: {metadata.get('timestamp', 'N/A')}")
print(f" Source: {metadata.get('weaviate_url', 'N/A')}")
print(f" Vectors inclus: {metadata.get('include_vectors', False)}")
schema = backup_data.get("schema", {})
backup_collections = backup_data.get("collections", {})
# Déterminer les collections à restaurer
if collections:
classes_to_restore = [c for c in collections if c in backup_collections]
else:
classes_to_restore = list(backup_collections.keys())
print(f"\n Collections à restaurer: {', '.join(classes_to_restore)}")
# Vérifier les collections existantes
print(f"\n[2/4] Vérification des collections existantes...")
existing_classes = get_existing_classes()
print(f" Collections existantes: {', '.join(existing_classes) or '(aucune)'}")
conflicts = [c for c in classes_to_restore if c in existing_classes]
if conflicts:
print(f" Conflits détectés: {', '.join(conflicts)}")
if clear_existing:
print(" → Seront supprimées (--clear-existing)")
else:
print(" → Seront ignorées (utilisez --clear-existing pour les remplacer)")
classes_to_restore = [c for c in classes_to_restore if c not in conflicts]
if not classes_to_restore:
print("\nAucune collection à restaurer.")
return {}
# Préparer le schéma
print(f"\n[3/4] Préparation du schéma...")
schema_classes = {c["class"]: c for c in schema.get("classes", [])}
# Supprimer les collections existantes si demandé
if clear_existing and conflicts:
print("\n Suppression des collections existantes...")
for class_name in conflicts:
if dry_run:
print(f" [DRY-RUN] Suppression de {class_name}")
else:
if delete_class(class_name):
print(f" Supprimé: {class_name}")
else:
print(f" ERREUR suppression: {class_name}")
# Créer les classes
print("\n Création des classes...")
for class_name in classes_to_restore:
if class_name in schema_classes:
class_schema = schema_classes[class_name]
if dry_run:
print(f" [DRY-RUN] Création de {class_name}")
else:
# Vérifier si existe déjà (après clear)
current_classes = get_existing_classes()
if class_name not in current_classes:
if create_class(class_schema):
print(f" Créé: {class_name}")
else:
print(f" ERREUR création: {class_name}")
else:
print(f" Existe déjà: {class_name}")
else:
print(f" Schéma manquant pour: {class_name}")
# Insérer les objets
print(f"\n[4/4] Insertion des objets...")
stats = {"success": 0, "failures": 0, "by_class": {}}
for class_name in classes_to_restore:
objects = backup_collections.get(class_name, [])
if not objects:
print(f" {class_name}: 0 objets")
continue
if dry_run:
print(f" [DRY-RUN] {class_name}: {len(objects)} objets à insérer")
stats["by_class"][class_name] = {"success": len(objects), "failures": 0}
stats["success"] += len(objects)
else:
success, failures = batch_insert_objects(class_name, objects)
stats["by_class"][class_name] = {"success": success, "failures": failures}
stats["success"] += success
stats["failures"] += failures
# Résumé
print("\n" + "=" * 60)
print("RESTAURATION TERMINÉE" + (" (DRY-RUN)" if dry_run else ""))
print("=" * 60)
print("\nStatistiques par collection:")
for class_name, class_stats in stats.get("by_class", {}).items():
print(f" - {class_name}: {class_stats['success']} succès, {class_stats['failures']} échecs")
print(f"\nTotal: {stats['success']} succès, {stats['failures']} échecs")
return stats
def main():
global WEAVIATE_URL # Declare global at start of function
parser = argparse.ArgumentParser(
description="Restauration de Weaviate depuis un backup",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Exemples:
python weaviate_restore.py backup.json
python weaviate_restore.py backup.json --dry-run
python weaviate_restore.py backup.json --collections Thought,Conversation
python weaviate_restore.py backup.json --clear-existing
ATTENTION: --clear-existing supprime les donnees existantes!
"""
)
parser.add_argument(
"backup",
type=Path,
help="Chemin du fichier de backup"
)
parser.add_argument(
"--collections", "-c",
type=str,
default=None,
help="Collections à restaurer (séparées par des virgules)"
)
parser.add_argument(
"--clear-existing",
action="store_true",
help="Supprimer les collections existantes avant restauration"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Prévisualiser les actions sans les exécuter"
)
parser.add_argument(
"--url",
type=str,
default=None,
help=f"URL Weaviate (défaut: {WEAVIATE_URL})"
)
args = parser.parse_args()
# Vérifier que le fichier existe
if not args.backup.exists():
print(f"ERREUR: Fichier non trouvé: {args.backup}")
sys.exit(1)
# URL Weaviate
if args.url:
WEAVIATE_URL = args.url
# Collections
collections = None
if args.collections:
collections = [c.strip() for c in args.collections.split(",")]
# Confirmation si clear_existing et pas dry_run
if args.clear_existing and not args.dry_run:
print("⚠️ ATTENTION: --clear-existing va SUPPRIMER des données!")
print(" Utilisez --dry-run pour prévisualiser.")
response = input(" Continuer? [y/N] ")
if response.lower() != "y":
print("Annulé.")
sys.exit(0)
# Exécuter la restauration
restore_weaviate(
backup_path=args.backup,
collections=collections,
clear_existing=args.clear_existing,
dry_run=args.dry_run
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,439 @@
#!/usr/bin/env python3
"""
StateVector - Gestion du vecteur d'etat d'Ikario.
Le vecteur d'etat represente l'identite processuelle d'Ikario.
Il evolue a chaque occasion d'experience selon:
S(t) = f(S(t-1), occasion)
Ce module gere:
- Le schema Weaviate pour StateVector
- La creation de S(0) a partir de l'historique
- Les operations CRUD sur les etats
"""
import os
from datetime import datetime
from typing import Any
import numpy as np
import requests
# Configuration
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
# Schema de la collection StateVector
STATE_VECTOR_SCHEMA = {
"class": "StateVector",
"description": "Vecteurs d'etat - identite processuelle d'Ikario",
"vectorizer": "none", # Embedding BGE-M3 fourni manuellement
"properties": [
{
"name": "state_id",
"dataType": ["int"],
"description": "Numero sequentiel de l'etat (0, 1, 2...)"
},
{
"name": "timestamp",
"dataType": ["date"],
"description": "Moment de creation de cet etat"
},
{
"name": "previous_state_id",
"dataType": ["int"],
"description": "ID de l'etat precedent (None pour S(0))"
},
{
"name": "trigger_type",
"dataType": ["text"],
"description": "Type de declencheur: user, timer, event, initialization"
},
{
"name": "trigger_content",
"dataType": ["text"],
"description": "Contenu du declencheur"
},
{
"name": "occasion_summary",
"dataType": ["text"],
"description": "Resume de l'occasion"
},
{
"name": "response_summary",
"dataType": ["text"],
"description": "Resume de la reponse"
},
{
"name": "thoughts_created",
"dataType": ["int"],
"description": "Nombre de pensees generees lors de cette occasion"
},
{
"name": "source_thoughts_count",
"dataType": ["int"],
"description": "Nombre de pensees utilisees pour construire cet etat (S(0))"
},
{
"name": "source_messages_count",
"dataType": ["int"],
"description": "Nombre de messages utilises pour construire cet etat (S(0))"
},
],
"vectorIndexConfig": {
"distance": "cosine"
}
}
def check_weaviate_ready() -> bool:
"""Verifie que Weaviate est accessible."""
try:
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
return response.status_code == 200
except requests.RequestException:
return False
def get_existing_classes() -> list[str]:
"""Recupere la liste des classes existantes."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
response.raise_for_status()
schema = response.json()
return [c["class"] for c in schema.get("classes", [])]
def create_state_vector_collection() -> bool:
"""
Cree la collection StateVector dans Weaviate.
Returns:
True si creee, False si existait deja
"""
existing = get_existing_classes()
if "StateVector" in existing:
print("[StateVector] Collection existe deja")
return False
response = requests.post(
f"{WEAVIATE_URL}/v1/schema",
json=STATE_VECTOR_SCHEMA,
headers={"Content-Type": "application/json"}
)
if response.status_code == 200:
print("[StateVector] Collection creee avec succes")
return True
else:
print(f"[StateVector] Erreur creation: {response.status_code}")
print(response.text)
return False
def delete_state_vector_collection() -> bool:
"""Supprime la collection StateVector (pour reset)."""
response = requests.delete(f"{WEAVIATE_URL}/v1/schema/StateVector")
return response.status_code == 200
def get_all_thoughts() -> list[dict]:
"""Recupere toutes les pensees de Weaviate."""
objects = []
limit = 100
offset = 0
while True:
url = f"{WEAVIATE_URL}/v1/objects?class=Thought&limit={limit}&offset={offset}"
response = requests.get(url)
if response.status_code != 200:
break
batch = response.json().get("objects", [])
if not batch:
break
objects.extend(batch)
offset += limit
return objects
def get_all_messages() -> list[dict]:
"""Recupere tous les messages de Weaviate."""
objects = []
limit = 100
offset = 0
while True:
url = f"{WEAVIATE_URL}/v1/objects?class=Message&limit={limit}&offset={offset}"
response = requests.get(url)
if response.status_code != 200:
break
batch = response.json().get("objects", [])
if not batch:
break
objects.extend(batch)
offset += limit
return objects
def filter_thoughts(thoughts: list[dict]) -> list[dict]:
"""
Filtre les pensees en enlevant celles liees aux tests.
Criteres d'exclusion:
- Contenu contenant "test", "debug", "TODO"
- Pensees tres courtes (< 20 caracteres)
- Pensees de type "test" ou "debug"
"""
filtered = []
# Mots-cles a exclure
exclude_keywords = [
"test", "debug", "todo", "fixme", "xxx",
"lorem ipsum", "example", "placeholder"
]
for thought in thoughts:
props = thought.get("properties", {})
content = props.get("content", "").lower()
thought_type = props.get("thought_type", "").lower()
# Exclure les pensees de test
if thought_type in ["test", "debug", "example"]:
continue
# Exclure les pensees trop courtes
if len(content) < 20:
continue
# Exclure si contient des mots-cles de test
if any(kw in content for kw in exclude_keywords):
continue
filtered.append(thought)
return filtered
def filter_assistant_messages(messages: list[dict]) -> list[dict]:
"""
Filtre pour ne garder que les messages d'Ikario (assistant).
Criteres:
- role = "assistant"
- Contenu non vide et significatif (> 50 caracteres)
"""
filtered = []
for msg in messages:
props = msg.get("properties", {})
role = props.get("role", "").lower()
content = props.get("content", "")
# Ne garder que les messages assistant
if role != "assistant":
continue
# Exclure les messages trop courts
if len(content) < 50:
continue
# Exclure les messages d'erreur ou systeme
if content.startswith("[Error") or content.startswith("[System"):
continue
filtered.append(msg)
return filtered
def compute_aggregate_embedding(
thoughts: list[dict],
messages: list[dict],
model
) -> np.ndarray:
"""
Calcule l'embedding agrege a partir des pensees et messages.
Strategie:
1. Extraire le contenu textuel de chaque element
2. Calculer l'embedding de chaque texte
3. Faire la moyenne ponderee (pensees ont plus de poids)
4. Normaliser le vecteur final
Args:
thoughts: Liste des pensees filtrees
messages: Liste des messages filtres
model: Modele SentenceTransformer
Returns:
Vecteur normalise 1024-dim
"""
embeddings = []
weights = []
# Traiter les pensees (poids = 2.0 car plus significatives)
print(f" Traitement de {len(thoughts)} pensees...")
for thought in thoughts:
content = thought.get("properties", {}).get("content", "")
if content:
emb = model.encode(content)
embeddings.append(emb)
weights.append(2.0) # Poids double pour les pensees
# Traiter les messages (poids = 1.0)
print(f" Traitement de {len(messages)} messages...")
for msg in messages:
content = msg.get("properties", {}).get("content", "")
if content:
# Tronquer les messages tres longs
if len(content) > 2000:
content = content[:2000]
emb = model.encode(content)
embeddings.append(emb)
weights.append(1.0)
if not embeddings:
raise ValueError("Aucun contenu a encoder!")
# Convertir en arrays numpy
embeddings = np.array(embeddings)
weights = np.array(weights)
# Moyenne ponderee
weights = weights / weights.sum() # Normaliser les poids
aggregate = np.average(embeddings, axis=0, weights=weights)
# Normaliser le vecteur final
aggregate = aggregate / np.linalg.norm(aggregate)
return aggregate
def create_initial_state(
thoughts: list[dict],
messages: list[dict],
embedding: np.ndarray
) -> dict:
"""
Cree l'etat initial S(0) dans Weaviate.
Args:
thoughts: Pensees utilisees pour construire S(0)
messages: Messages utilises pour construire S(0)
embedding: Vecteur d'etat calcule
Returns:
Objet S(0) cree
"""
s0_data = {
"state_id": 0,
"timestamp": datetime.now().isoformat() + "Z",
"previous_state_id": -1, # Pas d'etat precedent
"trigger_type": "initialization",
"trigger_content": "Creation de l'etat initial a partir de l'historique",
"occasion_summary": f"Naissance processuelle d'Ikario - agregation de {len(thoughts)} pensees et {len(messages)} messages",
"response_summary": "Etat initial S(0) cree avec succes",
"thoughts_created": 0,
"source_thoughts_count": len(thoughts),
"source_messages_count": len(messages),
}
# Creer l'objet avec le vecteur
response = requests.post(
f"{WEAVIATE_URL}/v1/objects",
json={
"class": "StateVector",
"properties": s0_data,
"vector": embedding.tolist()
},
headers={"Content-Type": "application/json"}
)
if response.status_code in [200, 201]:
result = response.json()
s0_data["id"] = result.get("id")
print(f"[StateVector] S(0) cree avec ID: {s0_data['id']}")
return s0_data
else:
print(f"[StateVector] Erreur creation S(0): {response.status_code}")
print(response.text)
raise RuntimeError("Impossible de creer S(0)")
def get_current_state_id() -> int:
"""Retourne l'ID de l'etat le plus recent."""
# Recuperer tous les StateVector et trouver le max state_id
url = f"{WEAVIATE_URL}/v1/objects?class=StateVector&limit=100"
response = requests.get(url)
if response.status_code != 200:
return -1
objects = response.json().get("objects", [])
if not objects:
return -1
max_id = max(obj.get("properties", {}).get("state_id", -1) for obj in objects)
return max_id
def get_state_vector(state_id: int) -> dict | None:
"""
Recupere un etat par son state_id.
Args:
state_id: Numero de l'etat
Returns:
Objet StateVector ou None
"""
# GraphQL query pour recuperer par state_id
query = {
"query": """
{
Get {
StateVector(where: {
path: ["state_id"],
operator: Equal,
valueInt: %d
}) {
state_id
timestamp
previous_state_id
trigger_type
trigger_content
occasion_summary
response_summary
thoughts_created
source_thoughts_count
source_messages_count
_additional {
id
vector
}
}
}
}
""" % state_id
}
response = requests.post(
f"{WEAVIATE_URL}/v1/graphql",
json=query,
headers={"Content-Type": "application/json"}
)
if response.status_code != 200:
return None
data = response.json()
states = data.get("data", {}).get("Get", {}).get("StateVector", [])
return states[0] if states else None

View File

@@ -0,0 +1 @@
# Tests pour ikario_processual

View File

@@ -0,0 +1,208 @@
#!/usr/bin/env python3
"""
Tests pour la Phase 0: Backup et restauration Weaviate.
Usage:
pytest tests/test_phase0_backup.py -v
pytest tests/test_phase0_backup.py -v -k test_backup
"""
import json
import os
import tempfile
from pathlib import Path
import pytest
import requests
# Configuration
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
def weaviate_is_available() -> bool:
"""Vérifie si Weaviate est accessible."""
try:
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
return response.status_code == 200
except requests.RequestException:
return False
# Skip tous les tests si Weaviate n'est pas disponible
pytestmark = pytest.mark.skipif(
not weaviate_is_available(),
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
)
class TestWeaviateConnection:
"""Tests de connexion à Weaviate."""
def test_weaviate_ready(self):
"""Weaviate doit être accessible."""
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready")
assert response.status_code == 200
def test_weaviate_schema_accessible(self):
"""Le schéma doit être récupérable."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
assert response.status_code == 200
data = response.json()
assert "classes" in data
def test_weaviate_has_collections(self):
"""Au moins une collection doit exister (Thought, Conversation, etc.)."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
data = response.json()
classes = [c["class"] for c in data.get("classes", [])]
# Au moins une des collections attendues
expected = ["Thought", "Conversation", "Message", "Chunk", "Work", "Summary"]
found = [c for c in classes if c in expected]
assert len(found) > 0, f"Aucune collection trouvée parmi {expected}. Classes existantes: {classes}"
class TestBackupScript:
"""Tests du script de backup."""
def test_backup_creates_file(self):
"""Le backup doit créer un fichier JSON."""
# Import dynamique pour éviter les erreurs si requests manque
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from weaviate_backup import backup_weaviate
with tempfile.TemporaryDirectory() as tmpdir:
output_path = Path(tmpdir) / "test_backup.json"
stats = backup_weaviate(
output_path=output_path,
collections=None, # Toutes
include_vectors=False # Plus rapide pour le test
)
assert output_path.exists(), "Le fichier de backup n'a pas été créé"
assert output_path.stat().st_size > 0, "Le fichier de backup est vide"
def test_backup_structure(self):
"""Le backup doit avoir la bonne structure."""
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from weaviate_backup import backup_weaviate
with tempfile.TemporaryDirectory() as tmpdir:
output_path = Path(tmpdir) / "test_backup.json"
backup_weaviate(
output_path=output_path,
collections=["Thought"], # Une seule collection pour le test
include_vectors=False
)
with open(output_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Vérifier la structure
assert "metadata" in data
assert "schema" in data
assert "collections" in data
# Vérifier les métadonnées
assert "timestamp" in data["metadata"]
assert "weaviate_url" in data["metadata"]
assert "version" in data["metadata"]
def test_backup_with_vectors(self):
"""Le backup avec vecteurs doit inclure les embeddings."""
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from weaviate_backup import backup_weaviate
with tempfile.TemporaryDirectory() as tmpdir:
output_path = Path(tmpdir) / "test_backup_vectors.json"
backup_weaviate(
output_path=output_path,
collections=["Thought"],
include_vectors=True
)
with open(output_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Vérifier qu'au moins un objet a un vecteur
thoughts = data.get("collections", {}).get("Thought", [])
if thoughts:
# Au moins un objet devrait avoir un vecteur
has_vector = any("vector" in obj for obj in thoughts)
assert has_vector, "Aucun objet n'a de vecteur alors que include_vectors=True"
class TestRestoreScript:
"""Tests du script de restauration."""
def test_restore_dry_run(self):
"""Le dry-run ne doit pas modifier les données."""
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from weaviate_backup import backup_weaviate
from weaviate_restore import restore_weaviate, get_existing_classes
with tempfile.TemporaryDirectory() as tmpdir:
# D'abord, faire un backup
backup_path = Path(tmpdir) / "test_backup.json"
backup_weaviate(
output_path=backup_path,
collections=["Thought"],
include_vectors=False
)
# Compter les objets avant
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class=Thought&limit=1")
count_before = len(response.json().get("objects", []))
# Restaurer en dry-run
stats = restore_weaviate(
backup_path=backup_path,
collections=["Thought"],
clear_existing=False,
dry_run=True
)
# Compter après
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class=Thought&limit=1")
count_after = len(response.json().get("objects", []))
# Pas de changement
assert count_before == count_after, "Le dry-run a modifié les données!"
class TestBackupRestoreCycle:
"""Tests du cycle complet backup → restore."""
def test_backup_restore_roundtrip(self):
"""
Test complet: backup → restore → vérification.
Ce test utilise une collection temporaire pour ne pas
affecter les données existantes.
"""
# Ce test nécessiterait de créer une collection temporaire
# Pour l'instant, on vérifie juste que les scripts fonctionnent
pass
def test_exports_directory_exists():
"""Le dossier exports doit exister ou être créable."""
exports_dir = Path(__file__).parent.parent.parent / "exports"
exports_dir.mkdir(parents=True, exist_ok=True)
assert exports_dir.exists()
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -0,0 +1,202 @@
#!/usr/bin/env python3
"""
Tests pour la Phase 1: StateVector et S(0).
Usage:
pytest tests/test_phase1_state_vector.py -v
"""
import os
import sys
from pathlib import Path
import pytest
import requests
import numpy as np
# Ajouter le parent au path
sys.path.insert(0, str(Path(__file__).parent.parent))
from state_vector import (
check_weaviate_ready,
get_existing_classes,
filter_thoughts,
filter_assistant_messages,
get_state_vector,
get_current_state_id,
WEAVIATE_URL,
)
def weaviate_is_available() -> bool:
"""Verifie si Weaviate est accessible."""
return check_weaviate_ready()
# Skip tous les tests si Weaviate n'est pas disponible
pytestmark = pytest.mark.skipif(
not weaviate_is_available(),
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
)
class TestStateVectorCollection:
"""Tests de la collection StateVector."""
def test_state_vector_collection_exists(self):
"""La collection StateVector doit exister."""
classes = get_existing_classes()
assert "StateVector" in classes, \
f"StateVector non trouve. Classes: {classes}"
def test_state_vector_schema_correct(self):
"""Le schema StateVector doit avoir les bonnes proprietes."""
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
schema = response.json()
state_vector_class = None
for c in schema.get("classes", []):
if c["class"] == "StateVector":
state_vector_class = c
break
assert state_vector_class is not None
# Verifier les proprietes requises
prop_names = [p["name"] for p in state_vector_class.get("properties", [])]
required = ["state_id", "timestamp", "trigger_type", "occasion_summary"]
for req in required:
assert req in prop_names, f"Propriete manquante: {req}"
class TestInitialState:
"""Tests de l'etat initial S(0)."""
def test_s0_exists(self):
"""S(0) doit exister."""
s0 = get_state_vector(0)
assert s0 is not None, "S(0) non trouve"
assert s0.get("state_id") == 0
def test_s0_has_vector(self):
"""S(0) doit avoir un vecteur."""
s0 = get_state_vector(0)
assert s0 is not None
vector = s0.get("_additional", {}).get("vector")
assert vector is not None, "S(0) n'a pas de vecteur"
def test_s0_vector_is_1024_dim(self):
"""Le vecteur de S(0) doit etre 1024-dim (BGE-M3)."""
s0 = get_state_vector(0)
assert s0 is not None
vector = s0.get("_additional", {}).get("vector")
assert vector is not None
assert len(vector) == 1024, f"Dimension: {len(vector)} (attendu: 1024)"
def test_s0_vector_is_normalized(self):
"""Le vecteur de S(0) doit etre normalise."""
s0 = get_state_vector(0)
assert s0 is not None
vector = np.array(s0.get("_additional", {}).get("vector", []))
norm = np.linalg.norm(vector)
assert abs(norm - 1.0) < 0.01, f"Norme: {norm} (attendu: ~1.0)"
def test_s0_has_source_counts(self):
"""S(0) doit avoir les compteurs de sources."""
s0 = get_state_vector(0)
assert s0 is not None
thoughts_count = s0.get("source_thoughts_count")
messages_count = s0.get("source_messages_count")
assert thoughts_count is not None, "source_thoughts_count manquant"
assert messages_count is not None, "source_messages_count manquant"
assert thoughts_count > 0 or messages_count > 0, \
"S(0) doit etre construit a partir de donnees"
def test_s0_trigger_type_is_initialization(self):
"""Le trigger_type de S(0) doit etre 'initialization'."""
s0 = get_state_vector(0)
assert s0 is not None
trigger_type = s0.get("trigger_type")
assert trigger_type == "initialization"
class TestFiltering:
"""Tests des fonctions de filtrage."""
def test_filter_thoughts_excludes_test(self):
"""Les pensees de test doivent etre exclues."""
thoughts = [
{"properties": {"content": "Ceci est une vraie pensee philosophique", "thought_type": "reflection"}},
{"properties": {"content": "test test test", "thought_type": "test"}},
{"properties": {"content": "debug: checking values", "thought_type": "debug"}},
{"properties": {"content": "Une autre pensee valide sur Whitehead", "thought_type": "reflection"}},
]
filtered = filter_thoughts(thoughts)
assert len(filtered) == 2
for t in filtered:
assert "test" not in t["properties"]["content"].lower()
def test_filter_thoughts_excludes_short(self):
"""Les pensees trop courtes doivent etre exclues."""
thoughts = [
{"properties": {"content": "OK", "thought_type": "reflection"}},
{"properties": {"content": "Une pensee suffisamment longue pour etre valide", "thought_type": "reflection"}},
]
filtered = filter_thoughts(thoughts)
assert len(filtered) == 1
assert len(filtered[0]["properties"]["content"]) >= 20
def test_filter_messages_keeps_only_assistant(self):
"""Seuls les messages assistant doivent etre gardes."""
messages = [
{"properties": {"role": "user", "content": "Question de l'utilisateur"}},
{"properties": {"role": "assistant", "content": "Reponse d'Ikario avec suffisamment de contenu pour etre valide"}},
{"properties": {"role": "system", "content": "Message systeme"}},
]
filtered = filter_assistant_messages(messages)
assert len(filtered) == 1
assert filtered[0]["properties"]["role"] == "assistant"
def test_filter_messages_excludes_short(self):
"""Les messages trop courts doivent etre exclus."""
messages = [
{"properties": {"role": "assistant", "content": "OK"}},
{"properties": {"role": "assistant", "content": "Une reponse complete avec suffisamment de contenu pour representer une vraie interaction"}},
]
filtered = filter_assistant_messages(messages)
assert len(filtered) == 1
assert len(filtered[0]["properties"]["content"]) >= 50
class TestStateVectorOperations:
"""Tests des operations sur StateVector."""
def test_get_current_state_id(self):
"""get_current_state_id doit retourner au moins 0."""
current_id = get_current_state_id()
assert current_id >= 0, "Aucun etat trouve"
def test_get_state_vector_returns_none_for_invalid_id(self):
"""get_state_vector doit retourner None pour un ID invalide."""
state = get_state_vector(99999)
assert state is None
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -0,0 +1,210 @@
#!/usr/bin/env python3
"""
Tests pour la Phase 2: Directions de Projection.
Usage:
pytest tests/test_phase2_directions.py -v
"""
import os
import sys
from pathlib import Path
import pytest
import requests
import numpy as np
# Ajouter le parent au path
sys.path.insert(0, str(Path(__file__).parent.parent))
from state_vector import (
check_weaviate_ready,
get_state_vector,
WEAVIATE_URL,
)
from projection_directions import (
get_existing_classes,
get_direction,
get_all_directions,
get_state_profile,
project_state_on_direction,
DIRECTIONS_CONFIG,
)
def weaviate_is_available() -> bool:
"""Verifie si Weaviate est accessible."""
return check_weaviate_ready()
# Skip tous les tests si Weaviate n'est pas disponible
pytestmark = pytest.mark.skipif(
not weaviate_is_available(),
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
)
class TestProjectionDirectionCollection:
"""Tests de la collection ProjectionDirection."""
def test_collection_exists(self):
"""La collection ProjectionDirection doit exister."""
classes = get_existing_classes()
assert "ProjectionDirection" in classes
def test_all_directions_created(self):
"""Toutes les directions configurees doivent exister."""
directions = get_all_directions()
direction_names = [d["name"] for d in directions]
for name in DIRECTIONS_CONFIG.keys():
assert name in direction_names, f"Direction manquante: {name}"
def test_directions_count(self):
"""Le nombre de directions doit correspondre a la config."""
directions = get_all_directions()
assert len(directions) == len(DIRECTIONS_CONFIG)
class TestDirectionVectors:
"""Tests des vecteurs de direction."""
def test_curiosity_direction_exists(self):
"""La direction 'curiosity' doit exister."""
direction = get_direction("curiosity")
assert direction is not None
assert direction["name"] == "curiosity"
assert direction["category"] == "epistemic"
def test_direction_has_vector(self):
"""Chaque direction doit avoir un vecteur."""
direction = get_direction("curiosity")
assert direction is not None
vector = direction.get("_additional", {}).get("vector")
assert vector is not None
assert len(vector) > 0
def test_direction_vector_is_1024_dim(self):
"""Les vecteurs de direction doivent etre 1024-dim."""
direction = get_direction("curiosity")
assert direction is not None
vector = direction.get("_additional", {}).get("vector")
assert len(vector) == 1024
def test_direction_vector_is_normalized(self):
"""Les vecteurs de direction doivent etre normalises."""
direction = get_direction("curiosity")
assert direction is not None
vector = np.array(direction.get("_additional", {}).get("vector"))
norm = np.linalg.norm(vector)
assert abs(norm - 1.0) < 0.01, f"Norme: {norm}"
def test_all_categories_present(self):
"""Toutes les categories doivent etre representees."""
directions = get_all_directions()
categories = set(d["category"] for d in directions)
expected_categories = {"epistemic", "affective", "relational", "vital", "philosophical"}
assert categories == expected_categories
class TestProjection:
"""Tests des fonctions de projection."""
def test_projection_in_range(self):
"""Les projections doivent etre entre -1 et 1."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
for category, components in profile.items():
for name, value in components.items():
assert -1 <= value <= 1, f"{name} = {value} hors limites [-1, 1]"
def test_get_state_profile_structure(self):
"""Le profil doit avoir la bonne structure."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
# Verifier que c'est un dict de dicts
assert isinstance(profile, dict)
for category, components in profile.items():
assert isinstance(components, dict)
for name, value in components.items():
assert isinstance(value, float)
def test_projection_orthogonal_vectors(self):
"""Test de projection avec des vecteurs orthogonaux."""
# Deux vecteurs orthogonaux ont une projection de 0
v1 = np.zeros(1024)
v1[0] = 1.0
v2 = np.zeros(1024)
v2[1] = 1.0
projection = project_state_on_direction(v1, v2)
assert abs(projection) < 0.001
def test_projection_parallel_vectors(self):
"""Test de projection avec des vecteurs paralleles."""
v = np.random.randn(1024)
v = v / np.linalg.norm(v)
projection = project_state_on_direction(v, v)
assert abs(projection - 1.0) < 0.001
def test_projection_antiparallel_vectors(self):
"""Test de projection avec des vecteurs antiparalleles."""
v = np.random.randn(1024)
v = v / np.linalg.norm(v)
projection = project_state_on_direction(v, -v)
assert abs(projection + 1.0) < 0.001
class TestS0Profile:
"""Tests du profil de S(0)."""
def test_s0_has_profile(self):
"""S(0) doit avoir un profil calculable."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
assert len(profile) > 0
def test_s0_profile_has_all_categories(self):
"""Le profil de S(0) doit avoir toutes les categories."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
expected = {"epistemic", "affective", "relational", "vital", "philosophical"}
assert set(profile.keys()) == expected
def test_s0_has_curiosity_component(self):
"""S(0) doit avoir une composante curiosity."""
s0 = get_state_vector(0)
assert s0 is not None
state_vec = np.array(s0.get("_additional", {}).get("vector"))
profile = get_state_profile(state_vec)
assert "curiosity" in profile.get("epistemic", {})
if __name__ == "__main__":
pytest.main([__file__, "-v"])