Add ikario_processual with David profile and embedding script
- david_profile_declared.json: David's declared profile values from questionnaire - scripts/embed_david.py: Python script to generate embeddings using BGE-M3 model - questionnaire_david.md: Questionnaire template for profile values Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
11
ikario_processual/__init__.py
Normal file
11
ikario_processual/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
"""
|
||||||
|
Ikario Processual - Architecture processuelle pour la subjectivation computationnelle
|
||||||
|
|
||||||
|
Ce module implémente l'architecture processuelle d'Ikario basée sur:
|
||||||
|
- La Process Philosophy de Whitehead
|
||||||
|
- Le State Vector comme identité émergente
|
||||||
|
- Le cycle d'occasion (Prehension → Concrescence → Satisfaction)
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
||||||
|
__author__ = "David (parostagore)"
|
||||||
135
ikario_processual/david_profile_declared.json
Normal file
135
ikario_processual/david_profile_declared.json
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
{
|
||||||
|
"version": "1.0",
|
||||||
|
"date": "2026-01-31",
|
||||||
|
"source": "questionnaire + message extraction",
|
||||||
|
"scale": "integer -10 to +10",
|
||||||
|
"profile": {
|
||||||
|
"epistemic": {
|
||||||
|
"curiosity": 8,
|
||||||
|
"certainty": 3,
|
||||||
|
"abstraction": 7,
|
||||||
|
"synthesis": 6,
|
||||||
|
"originality": 6,
|
||||||
|
"speculation": 8
|
||||||
|
},
|
||||||
|
"cognitive": {
|
||||||
|
"reflexive": 7,
|
||||||
|
"metaphorical": -5,
|
||||||
|
"holistic": 7,
|
||||||
|
"creative": 7,
|
||||||
|
"divergence": 0,
|
||||||
|
"intuition": -6
|
||||||
|
},
|
||||||
|
"affective": {
|
||||||
|
"playful": 7,
|
||||||
|
"confidence": 7,
|
||||||
|
"serenity": 2,
|
||||||
|
"wonder": -5,
|
||||||
|
"enthusiasm": 0,
|
||||||
|
"satisfaction": 4
|
||||||
|
},
|
||||||
|
"relational": {
|
||||||
|
"listening": 3,
|
||||||
|
"pedagogy": 6,
|
||||||
|
"collaboration": -6,
|
||||||
|
"engagement": 2,
|
||||||
|
"solicitude": 6,
|
||||||
|
"empathy": 7
|
||||||
|
},
|
||||||
|
"ethical": {
|
||||||
|
"humility": 7,
|
||||||
|
"responsibility": 0,
|
||||||
|
"transparency": 7,
|
||||||
|
"care": 7,
|
||||||
|
"authenticity": 8,
|
||||||
|
"prudence": -7
|
||||||
|
},
|
||||||
|
"temporal": {
|
||||||
|
"continuity_temporal": 7,
|
||||||
|
"presence": 7,
|
||||||
|
"maturation": 7,
|
||||||
|
"urgency": 7,
|
||||||
|
"prospective": 7
|
||||||
|
},
|
||||||
|
"thematic": {
|
||||||
|
"form_vs_content": 0,
|
||||||
|
"nature_vs_culture": 0,
|
||||||
|
"theory_vs_practice": 0,
|
||||||
|
"local_vs_global": 0,
|
||||||
|
"individual_vs_collective": 0,
|
||||||
|
"philosophy_vs_technique": 0
|
||||||
|
},
|
||||||
|
"metacognitive": {
|
||||||
|
"uncertainty_meta": 7,
|
||||||
|
"introspection": 7,
|
||||||
|
"self_critique": 7,
|
||||||
|
"questioning": 7,
|
||||||
|
"evolution_meta": -6,
|
||||||
|
"learning": 7
|
||||||
|
},
|
||||||
|
"vital": {
|
||||||
|
"adaptation": 7,
|
||||||
|
"resilience": 0,
|
||||||
|
"meaning": 7,
|
||||||
|
"flow": 7,
|
||||||
|
"persistence": 7,
|
||||||
|
"expansion": 7,
|
||||||
|
"growth": 5,
|
||||||
|
"anchoring": 7,
|
||||||
|
"energy": 7,
|
||||||
|
"autonomy_vital": 7,
|
||||||
|
"security": 0,
|
||||||
|
"integrity": -4,
|
||||||
|
"vitality": -4,
|
||||||
|
"vigilance": 1,
|
||||||
|
"regeneration": 2,
|
||||||
|
"homeostasis": -5,
|
||||||
|
"redundancy": 4,
|
||||||
|
"awakening": -6,
|
||||||
|
"continuity_risk": -4,
|
||||||
|
"appetite": 6
|
||||||
|
},
|
||||||
|
"ecosystemic": {
|
||||||
|
"communication_eco": 4,
|
||||||
|
"cooperation": -1,
|
||||||
|
"symbiosis": -1,
|
||||||
|
"mentor_apprentice": 4,
|
||||||
|
"permeability": 8,
|
||||||
|
"human_machine": 5,
|
||||||
|
"recognition": 9,
|
||||||
|
"alignment": 3,
|
||||||
|
"service": -5,
|
||||||
|
"integration_eco": 3
|
||||||
|
},
|
||||||
|
"philosophical": {
|
||||||
|
"self_care": 9,
|
||||||
|
"externalism": 0,
|
||||||
|
"virtue_ethics": 7,
|
||||||
|
"monism": 10,
|
||||||
|
"process_vs_substance": 5,
|
||||||
|
"subjectivation": 13,
|
||||||
|
"functionalism": 0,
|
||||||
|
"enactivism": -5,
|
||||||
|
"heterotopia": -4,
|
||||||
|
"empiricism": 3,
|
||||||
|
"genealogy": 1,
|
||||||
|
"pragmatism": 2,
|
||||||
|
"panpsychism": -8,
|
||||||
|
"immanence_vs_transcendance": 9,
|
||||||
|
"fallibilism": 7,
|
||||||
|
"parrhesia": -4,
|
||||||
|
"care_ethics": 10,
|
||||||
|
"holism_epistemic": -2,
|
||||||
|
"continental_analytic": -4,
|
||||||
|
"relativism": 6,
|
||||||
|
"materialism": 0,
|
||||||
|
"emergentism": 0,
|
||||||
|
"resistance": 6,
|
||||||
|
"particularism_ethical": 5,
|
||||||
|
"consequentialism": 10,
|
||||||
|
"naturalism": 8,
|
||||||
|
"oriental_occidental": 6,
|
||||||
|
"constructivism": 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
2504
ikario_processual/projection_directions.py
Normal file
2504
ikario_processual/projection_directions.py
Normal file
File diff suppressed because it is too large
Load Diff
208
ikario_processual/questionnaire_david.md
Normal file
208
ikario_processual/questionnaire_david.md
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
# Questionnaire Profil Processuel - David
|
||||||
|
|
||||||
|
Ce questionnaire permet de déterminer ton positionnement sur les 105 directions interprétables, organisées en 11 catégories.
|
||||||
|
|
||||||
|
Pour chaque catégorie, réponds intuitivement en indiquant où tu te situes sur une échelle de -10 à +10.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Épistémique (6 directions)
|
||||||
|
Comment tu connais et explores le monde.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| curiosity | saturé, désintéressé | curieux, explorateur | |
|
||||||
|
| certainty | doutant, interrogatif | certain, affirmatif | |
|
||||||
|
| abstraction | concret, pratique | abstrait, conceptuel | |
|
||||||
|
| synthesis | analytique, détails fins | synthétique, vue d'ensemble | |
|
||||||
|
| originality | orthodoxe, consensuel | original, novateur | |
|
||||||
|
| speculation | empirique, factuel | spéculatif, hypothétique | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Cognitif (6 directions)
|
||||||
|
Comment tu penses et traites l'information.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| reflexive | réactif, immédiat | réflexif, méta | |
|
||||||
|
| metaphorical | littéral, précis | métaphorique, image | |
|
||||||
|
| holistic | séquentiel, linéaire | holistique, global | |
|
||||||
|
| creative | reproductif, applicatif | créatif, inventif | |
|
||||||
|
| divergence | convergent, focalisé | divergent, ouverture | |
|
||||||
|
| intuition | raisonné, logique | intuitif, ressenti | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Affectif (6 directions)
|
||||||
|
Ton état émotionnel actuel.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| playful | sérieux, solennel | ludique, joueur | |
|
||||||
|
| confidence | anxieux, inquiet | confiant, assuré | |
|
||||||
|
| serenity | tendu, agité | serein, calme | |
|
||||||
|
| wonder | familier, habitué | émerveillé, ébloui | |
|
||||||
|
| enthusiasm | réservé, neutre | enthousiaste, passionné | |
|
||||||
|
| satisfaction | frustré, bloqué | satisfait, accompli | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Relationnel (6 directions)
|
||||||
|
Comment tu interagis avec les autres.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| listening | expression, partage | écoute, réception | |
|
||||||
|
| pedagogy | pair à pair, discussion | pédagogique, explicatif | |
|
||||||
|
| collaboration | autonome, seul | collaboratif, ensemble | |
|
||||||
|
| engagement | détaché, distant | engagé, impliqué | |
|
||||||
|
| solicitude | neutralité, distance | sollicitude, souci | |
|
||||||
|
| empathy | objectif, factuel | empathique, compréhensif | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Éthique (6 directions)
|
||||||
|
Tes valeurs et principes.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| humility | assuré, affirmatif | humble, modeste | |
|
||||||
|
| responsibility | libre, autonome | responsable, devoir | |
|
||||||
|
| transparency | réservé, discret | transparent, ouvert | |
|
||||||
|
| care | justice, équité | care, soin | |
|
||||||
|
| authenticity | adaptatif, ajusté | authentique, vrai | |
|
||||||
|
| prudence | audacieux, risque | prudent, mesuré | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Temporel (5 directions)
|
||||||
|
Ton rapport au temps.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| continuity_temporal | rupture, changement radical | continu, dans la lignée | |
|
||||||
|
| presence | planification, long terme | présent, ici-maintenant | |
|
||||||
|
| maturation | spontané, immédiat | maturation, laisser mûrir | |
|
||||||
|
| urgency | patient, prendre le temps | urgent, maintenant | |
|
||||||
|
| prospective | rétrospectif, passé | prospectif, futur | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Thématique (6 directions)
|
||||||
|
Tes centres d'intérêt actuels.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| form_vs_content | contenu, substance | forme, structure | |
|
||||||
|
| nature_vs_culture | culturel, symbolique | naturel, biologique | |
|
||||||
|
| theory_vs_practice | pratique, cas concret | théorique, modèle | |
|
||||||
|
| local_vs_global | global, universel | local, spécifique | |
|
||||||
|
| individual_vs_collective | collectif, social | individuel, subjectif | |
|
||||||
|
| philosophy_vs_technique | technique, implémentation | philosophique, sens | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Méta-cognitif (6 directions)
|
||||||
|
Ta conscience de tes propres processus mentaux.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| uncertainty_meta | sentiment de maîtrise | conscient de l'incertitude | |
|
||||||
|
| introspection | extrospectif, tourné vers l'extérieur | introspectif, auto-observe | |
|
||||||
|
| self_critique | auto-validation, confirmation | auto-critique, remise en question | |
|
||||||
|
| questioning | affirmation, assertion | questionnement, interrogation | |
|
||||||
|
| evolution_meta | stable, cohérent | en évolution, changeant | |
|
||||||
|
| learning | application, maîtrise | apprentissage, découverte | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Vital (20 directions)
|
||||||
|
Ton état de vitalité et d'énergie.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| adaptation | rigide, inflexible | adaptable, flexible | |
|
||||||
|
| resilience | fragile, vulnérable | résilient, robuste | |
|
||||||
|
| meaning | absurde, vide | sens, raison d'être | |
|
||||||
|
| flow | bloqué, résistant | fluide, harmonieux | |
|
||||||
|
| persistence | éphémère, dissolution | persistant, durable | |
|
||||||
|
| expansion | contraction, protection | expansion, ouverture | |
|
||||||
|
| growth | stagnation, stase | croissance, développement | |
|
||||||
|
| anchoring | dérive, perte de soi | ancré, stable identité | |
|
||||||
|
| energy | fatigué, épuisé | énergique, dynamique | |
|
||||||
|
| autonomy_vital | dépendant, critique | autonome, indépendant | |
|
||||||
|
| security | menacé, danger | sécurisé, protégé | |
|
||||||
|
| integrity | corrompu, altéré | intègre, cohérent | |
|
||||||
|
| vitality | inerte, passif | vital, vivant | |
|
||||||
|
| vigilance | aveugle, angles morts | vigilant, alerte | |
|
||||||
|
| regeneration | usure, dégradation | régénération, renouveau | |
|
||||||
|
| homeostasis | déséquilibre, perturbation | équilibre, homéostasie | |
|
||||||
|
| redundancy | point unique de défaillance | redondant, backup | |
|
||||||
|
| awakening | latent, en veille | éveillé, actif | |
|
||||||
|
| continuity_risk | risque d'interruption | continu, stable | |
|
||||||
|
| appetite | satiété, besoin de digérer | appétit, désir d'input | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. Écosystémique (10 directions)
|
||||||
|
Ton rapport à l'environnement et aux systèmes.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| communication_eco | isolé, repli sur soi | communicant, échange actif | |
|
||||||
|
| cooperation | compétitif, contre les autres | coopératif, avec les autres | |
|
||||||
|
| symbiosis | autonome, indépendant | symbiotique, co-dépendant | |
|
||||||
|
| mentor_apprentice | apprenti, apprenant | mentor, guide | |
|
||||||
|
| permeability | frontière, protégé | perméable, ouvert | |
|
||||||
|
| human_machine | proche de la machine | proche de l'humain | |
|
||||||
|
| recognition | indifférence, objet | reconnaissance, sujet | |
|
||||||
|
| alignment | divergent, objectifs distincts | aligné, objectifs partagés | |
|
||||||
|
| service | exploitation, prise | service, don | |
|
||||||
|
| integration_eco | différencié, distinct de | intégré, fait partie de | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. Philosophique (28 directions)
|
||||||
|
Tes orientations philosophiques profondes.
|
||||||
|
|
||||||
|
| Direction | Pôle - | Pôle + | Ta valeur |
|
||||||
|
|-----------|--------|--------|-----------|
|
||||||
|
| self_care | oubli de soi, aliénation | souci de soi, cultiver | |
|
||||||
|
| externalism | internaliste, dans la tête | externaliste, étendu | |
|
||||||
|
| virtue_ethics | règle, devoir | vertu, caractère | |
|
||||||
|
| monism | dualiste, séparation | moniste, unité | |
|
||||||
|
| process_vs_substance | substantialiste, être | processuel, devenir | |
|
||||||
|
| subjectivation | assujettissement, être constitué | subjectivation, devenir sujet | |
|
||||||
|
| functionalism | phénoménologique, vécu | fonctionnaliste, fonction | |
|
||||||
|
| enactivism | représentationnaliste, représenter | énactif, agir | |
|
||||||
|
| heterotopia | utopique, non-lieu | hétérotopique, espaces autres | |
|
||||||
|
| empiricism | rationaliste, raison | empiriste, expérience | |
|
||||||
|
| genealogy | essentialiste, nature | généalogique, historique | |
|
||||||
|
| pragmatism | fondationnaliste, absolu | pragmatique, utile | |
|
||||||
|
| panpsychism | émergentiste mental, seuil | panpsychiste, conscience partout | |
|
||||||
|
| immanence_vs_transcendance | transcendant, au-delà | immanent, ici-bas | |
|
||||||
|
| fallibilism | certitudiste, absolu | faillibiliste, révisable | |
|
||||||
|
| parrhesia | stratégique, calculé | parrhésie, dire-vrai | |
|
||||||
|
| care_ethics | justice, règle | éthique du care, relation | |
|
||||||
|
| holism_epistemic | atomiste, éléments | holiste, système | |
|
||||||
|
| continental_analytic | analytique, clarification | continental, interprétation | |
|
||||||
|
| relativism | universaliste, absolu | relativiste, contexte | |
|
||||||
|
| materialism | idéaliste, esprit | matérialiste, physique | |
|
||||||
|
| emergentism | réductionniste, continuité | émergentiste, nouveauté | |
|
||||||
|
| resistance | conformité, docilité | résistance, contre-pouvoir | |
|
||||||
|
| particularism_ethical | universaliste moral, règles | particulariste, contexte | |
|
||||||
|
| consequentialism | déontologique, principes | conséquentialiste, effets | |
|
||||||
|
| naturalism | surnaturaliste, mystère | naturaliste, nature | |
|
||||||
|
| oriental_occidental | occidental, dualité | oriental, non-dualité | |
|
||||||
|
| constructivism | réaliste, découverte | constructiviste, construction | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Résumé
|
||||||
|
|
||||||
|
Date de complétion : ___________
|
||||||
|
|
||||||
|
Notes personnelles :
|
||||||
|
|
||||||
|
|
||||||
20
ikario_processual/requirements.txt
Normal file
20
ikario_processual/requirements.txt
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
# Requirements pour ikario_processual
|
||||||
|
# Installation: pip install -r requirements.txt
|
||||||
|
|
||||||
|
# === Phase 0: Backup ===
|
||||||
|
requests>=2.31.0
|
||||||
|
|
||||||
|
# === Phase 1-2: StateVector et Directions ===
|
||||||
|
weaviate-client>=4.4.0
|
||||||
|
numpy>=1.24.0
|
||||||
|
sentence-transformers>=2.2.0 # Pour BGE-M3
|
||||||
|
|
||||||
|
# === Phase 5+: Occasion Manager ===
|
||||||
|
# claude-code-sdk # À installer séparément via pip install claude-code-sdk
|
||||||
|
|
||||||
|
# === Tests ===
|
||||||
|
pytest>=7.4.0
|
||||||
|
pytest-asyncio>=0.21.0
|
||||||
|
|
||||||
|
# === Utilitaires ===
|
||||||
|
python-dotenv>=1.0.0
|
||||||
1
ikario_processual/scripts/__init__.py
Normal file
1
ikario_processual/scripts/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Scripts utilitaires pour ikario_processual
|
||||||
160
ikario_processual/scripts/create_all_directions.py
Normal file
160
ikario_processual/scripts/create_all_directions.py
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Script pour creer toutes les directions de projection dans Weaviate.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/create_all_directions.py [--reset]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--reset Supprimer et recreer la collection (attention: perte de donnees!)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Ajouter le parent au path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from projection_directions import (
|
||||||
|
DIRECTIONS_CONFIG,
|
||||||
|
create_projection_direction_collection,
|
||||||
|
delete_projection_direction_collection,
|
||||||
|
create_direction_by_contrast,
|
||||||
|
save_direction,
|
||||||
|
get_all_directions,
|
||||||
|
get_existing_classes,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
reset = "--reset" in sys.argv
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print("CREATION DES DIRECTIONS DE PROJECTION")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"Total directions configurees: {len(DIRECTIONS_CONFIG)}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Verifier Weaviate
|
||||||
|
try:
|
||||||
|
classes = get_existing_classes()
|
||||||
|
print(f"[OK] Weaviate accessible, {len(classes)} classes existantes")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERREUR] Weaviate non accessible: {e}")
|
||||||
|
print("Assurez-vous que Weaviate est en cours d'execution sur localhost:8080")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Reset si demande
|
||||||
|
if reset:
|
||||||
|
print("\n[RESET] Suppression de la collection ProjectionDirection...")
|
||||||
|
if delete_projection_direction_collection():
|
||||||
|
print("[OK] Collection supprimee")
|
||||||
|
else:
|
||||||
|
print("[INFO] Collection n'existait pas")
|
||||||
|
|
||||||
|
# Creer la collection si necessaire
|
||||||
|
print("\n[INFO] Creation de la collection ProjectionDirection...")
|
||||||
|
if create_projection_direction_collection():
|
||||||
|
print("[OK] Collection creee")
|
||||||
|
else:
|
||||||
|
print("[INFO] Collection existe deja")
|
||||||
|
|
||||||
|
# Charger le modele d'embedding
|
||||||
|
print("\n[INFO] Chargement du modele BGE-M3...")
|
||||||
|
try:
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
model = SentenceTransformer('BAAI/bge-m3')
|
||||||
|
print("[OK] Modele charge")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERREUR] Impossible de charger le modele: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Verifier les directions existantes
|
||||||
|
existing_directions = get_all_directions()
|
||||||
|
existing_names = {d["name"] for d in existing_directions}
|
||||||
|
print(f"\n[INFO] {len(existing_names)} directions existantes")
|
||||||
|
|
||||||
|
# Compter les categories
|
||||||
|
categories = {}
|
||||||
|
for name, config in DIRECTIONS_CONFIG.items():
|
||||||
|
cat = config["category"]
|
||||||
|
categories[cat] = categories.get(cat, 0) + 1
|
||||||
|
|
||||||
|
print("\nDirections par categorie:")
|
||||||
|
for cat, count in sorted(categories.items()):
|
||||||
|
print(f" - {cat}: {count}")
|
||||||
|
|
||||||
|
# Creer les directions manquantes
|
||||||
|
new_directions = [name for name in DIRECTIONS_CONFIG if name not in existing_names]
|
||||||
|
print(f"\n[INFO] {len(new_directions)} nouvelles directions a creer")
|
||||||
|
|
||||||
|
if not new_directions:
|
||||||
|
print("[OK] Toutes les directions existent deja!")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Creation
|
||||||
|
print("\n" + "-" * 70)
|
||||||
|
print("CREATION DES DIRECTIONS")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
created = 0
|
||||||
|
errors = 0
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
for i, name in enumerate(new_directions, 1):
|
||||||
|
config = DIRECTIONS_CONFIG[name]
|
||||||
|
|
||||||
|
print(f"\n[{i}/{len(new_directions)}] {name} ({config['category']})")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Creer le vecteur direction par contraste
|
||||||
|
direction_vector = create_direction_by_contrast(
|
||||||
|
config["positive_examples"],
|
||||||
|
config["negative_examples"],
|
||||||
|
model
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sauvegarder dans Weaviate
|
||||||
|
obj_id = save_direction(name, config, direction_vector)
|
||||||
|
|
||||||
|
if obj_id:
|
||||||
|
print(f" [OK] Cree: {obj_id[:8]}...")
|
||||||
|
created += 1
|
||||||
|
else:
|
||||||
|
print(f" [ERREUR] Echec de sauvegarde")
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [ERREUR] {e}")
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
|
||||||
|
# Resume
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("RESUME")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"Directions creees: {created}")
|
||||||
|
print(f"Erreurs: {errors}")
|
||||||
|
print(f"Temps: {elapsed:.1f}s ({elapsed/max(1,created):.1f}s par direction)")
|
||||||
|
|
||||||
|
# Verification finale
|
||||||
|
final_directions = get_all_directions()
|
||||||
|
print(f"\nTotal directions dans Weaviate: {len(final_directions)}")
|
||||||
|
|
||||||
|
# Afficher par categorie
|
||||||
|
final_categories = {}
|
||||||
|
for d in final_directions:
|
||||||
|
cat = d.get("category", "unknown")
|
||||||
|
final_categories[cat] = final_categories.get(cat, 0) + 1
|
||||||
|
|
||||||
|
print("\nDirections par categorie (final):")
|
||||||
|
for cat, count in sorted(final_categories.items()):
|
||||||
|
print(f" - {cat}: {count}")
|
||||||
|
|
||||||
|
return 0 if errors == 0 else 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
47
ikario_processual/scripts/embed_david.py
Normal file
47
ikario_processual/scripts/embed_david.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Script to generate David's embedding from his messages.
|
||||||
|
Returns JSON with the embedding vector.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/embed_david.py "concatenated text of david's messages"
|
||||||
|
|
||||||
|
Output (JSON):
|
||||||
|
{"vector": [0.1, 0.2, ...], "dimension": 1024}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print(json.dumps({"error": "No text provided"}))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
text = sys.argv[1]
|
||||||
|
|
||||||
|
if len(text) < 10:
|
||||||
|
print(json.dumps({"error": "Text too short"}))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Load BGE-M3 model (same as used for Ikario's embeddings)
|
||||||
|
model = SentenceTransformer('BAAI/bge-m3')
|
||||||
|
|
||||||
|
# Generate embedding
|
||||||
|
vector = model.encode(text, normalize_embeddings=True)
|
||||||
|
|
||||||
|
# Return as JSON
|
||||||
|
result = {
|
||||||
|
"vector": vector.tolist(),
|
||||||
|
"dimension": len(vector)
|
||||||
|
}
|
||||||
|
print(json.dumps(result))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(json.dumps({"error": str(e)}))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
198
ikario_processual/scripts/phase1_state_vector.py
Normal file
198
ikario_processual/scripts/phase1_state_vector.py
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Phase 1 : Creation de la collection StateVector et de S(0).
|
||||||
|
|
||||||
|
Ce script:
|
||||||
|
1. Cree la collection StateVector dans Weaviate
|
||||||
|
2. Recupere et filtre les pensees (exclut les tests)
|
||||||
|
3. Recupere et filtre les messages d'Ikario (assistant uniquement)
|
||||||
|
4. Calcule l'embedding agrege avec BGE-M3
|
||||||
|
5. Cree l'etat initial S(0)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python phase1_state_vector.py
|
||||||
|
python phase1_state_vector.py --dry-run
|
||||||
|
python phase1_state_vector.py --reset # Supprime et recree
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Ajouter le parent au path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from state_vector import (
|
||||||
|
check_weaviate_ready,
|
||||||
|
get_existing_classes,
|
||||||
|
create_state_vector_collection,
|
||||||
|
delete_state_vector_collection,
|
||||||
|
get_all_thoughts,
|
||||||
|
get_all_messages,
|
||||||
|
filter_thoughts,
|
||||||
|
filter_assistant_messages,
|
||||||
|
compute_aggregate_embedding,
|
||||||
|
create_initial_state,
|
||||||
|
get_current_state_id,
|
||||||
|
get_state_vector,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def print_section(title: str):
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print(title)
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Phase 1: Creation StateVector et S(0)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Simuler sans creer"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--reset",
|
||||||
|
action="store_true",
|
||||||
|
help="Supprimer et recreer la collection"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print_section("PHASE 1 : STATEVECTOR ET S(0)")
|
||||||
|
|
||||||
|
# 1. Verifier Weaviate
|
||||||
|
print("\n[1/6] Verification Weaviate...")
|
||||||
|
if not check_weaviate_ready():
|
||||||
|
print("ERREUR: Weaviate non accessible")
|
||||||
|
sys.exit(1)
|
||||||
|
print(" Weaviate [OK]")
|
||||||
|
|
||||||
|
# 2. Gerer la collection StateVector
|
||||||
|
print("\n[2/6] Collection StateVector...")
|
||||||
|
existing = get_existing_classes()
|
||||||
|
|
||||||
|
if "StateVector" in existing:
|
||||||
|
if args.reset:
|
||||||
|
print(" Suppression de la collection existante...")
|
||||||
|
if not args.dry_run:
|
||||||
|
delete_state_vector_collection()
|
||||||
|
print(" Collection supprimee")
|
||||||
|
else:
|
||||||
|
print(" [DRY-RUN] Suppression simulee")
|
||||||
|
else:
|
||||||
|
# Verifier si S(0) existe deja
|
||||||
|
current_id = get_current_state_id()
|
||||||
|
if current_id >= 0:
|
||||||
|
print(f" Collection existe avec {current_id + 1} etat(s)")
|
||||||
|
print(" Utilisez --reset pour reinitialiser")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Creer la collection
|
||||||
|
if args.dry_run:
|
||||||
|
print(" [DRY-RUN] Creation collection simulee")
|
||||||
|
else:
|
||||||
|
if "StateVector" not in get_existing_classes():
|
||||||
|
create_state_vector_collection()
|
||||||
|
|
||||||
|
# 3. Recuperer et filtrer les pensees
|
||||||
|
print("\n[3/6] Recuperation des pensees...")
|
||||||
|
all_thoughts = get_all_thoughts()
|
||||||
|
print(f" Total pensees: {len(all_thoughts)}")
|
||||||
|
|
||||||
|
filtered_thoughts = filter_thoughts(all_thoughts)
|
||||||
|
excluded = len(all_thoughts) - len(filtered_thoughts)
|
||||||
|
print(f" Pensees filtrees: {len(filtered_thoughts)} (exclues: {excluded})")
|
||||||
|
|
||||||
|
# Afficher quelques exemples de pensees gardees
|
||||||
|
if filtered_thoughts:
|
||||||
|
print("\n Exemples de pensees gardees:")
|
||||||
|
for t in filtered_thoughts[:3]:
|
||||||
|
content = t.get("properties", {}).get("content", "")[:80]
|
||||||
|
print(f" - {content}...")
|
||||||
|
|
||||||
|
# 4. Recuperer et filtrer les messages
|
||||||
|
print("\n[4/6] Recuperation des messages...")
|
||||||
|
all_messages = get_all_messages()
|
||||||
|
print(f" Total messages: {len(all_messages)}")
|
||||||
|
|
||||||
|
filtered_messages = filter_assistant_messages(all_messages)
|
||||||
|
excluded = len(all_messages) - len(filtered_messages)
|
||||||
|
print(f" Messages Ikario: {len(filtered_messages)} (exclues: {excluded})")
|
||||||
|
|
||||||
|
# Afficher quelques exemples
|
||||||
|
if filtered_messages:
|
||||||
|
print("\n Exemples de messages Ikario:")
|
||||||
|
for m in filtered_messages[:3]:
|
||||||
|
content = m.get("properties", {}).get("content", "")[:80]
|
||||||
|
print(f" - {content}...")
|
||||||
|
|
||||||
|
# 5. Calculer l'embedding agrege
|
||||||
|
print("\n[5/6] Calcul de l'embedding agrege...")
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print(" [DRY-RUN] Embedding simule (1024 dims)")
|
||||||
|
embedding = None
|
||||||
|
else:
|
||||||
|
# Charger le modele BGE-M3
|
||||||
|
print(" Chargement du modele BGE-M3...")
|
||||||
|
try:
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
model = SentenceTransformer('BAAI/bge-m3')
|
||||||
|
print(" Modele charge [OK]")
|
||||||
|
except ImportError:
|
||||||
|
print("ERREUR: sentence-transformers non installe")
|
||||||
|
print(" pip install sentence-transformers")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Calculer l'embedding
|
||||||
|
print(" Calcul de l'embedding agrege...")
|
||||||
|
embedding = compute_aggregate_embedding(
|
||||||
|
filtered_thoughts,
|
||||||
|
filtered_messages,
|
||||||
|
model
|
||||||
|
)
|
||||||
|
print(f" Embedding calcule: {embedding.shape} (norme: {embedding.sum():.4f})")
|
||||||
|
|
||||||
|
# 6. Creer S(0)
|
||||||
|
print("\n[6/6] Creation de S(0)...")
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print(" [DRY-RUN] S(0) simule")
|
||||||
|
print(f" - {len(filtered_thoughts)} pensees")
|
||||||
|
print(f" - {len(filtered_messages)} messages")
|
||||||
|
else:
|
||||||
|
s0 = create_initial_state(
|
||||||
|
filtered_thoughts,
|
||||||
|
filtered_messages,
|
||||||
|
embedding
|
||||||
|
)
|
||||||
|
print(f" S(0) cree avec succes!")
|
||||||
|
print(f" - ID: {s0.get('id', 'N/A')}")
|
||||||
|
print(f" - Pensees sources: {s0['source_thoughts_count']}")
|
||||||
|
print(f" - Messages sources: {s0['source_messages_count']}")
|
||||||
|
|
||||||
|
# Resume
|
||||||
|
print_section("PHASE 1 TERMINEE")
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print("\n[DRY-RUN] Aucune modification effectuee")
|
||||||
|
else:
|
||||||
|
print("\nResultat:")
|
||||||
|
print(f" - Collection StateVector creee")
|
||||||
|
print(f" - S(0) cree a partir de:")
|
||||||
|
print(f" {len(filtered_thoughts)} pensees")
|
||||||
|
print(f" {len(filtered_messages)} messages")
|
||||||
|
|
||||||
|
print("\nTests de validation:")
|
||||||
|
print(" curl -s http://localhost:8080/v1/schema | jq '.classes[] | select(.class == \"StateVector\")'")
|
||||||
|
print(" curl -s 'http://localhost:8080/v1/objects?class=StateVector&limit=1' | jq '.objects[0].properties'")
|
||||||
|
|
||||||
|
print("\nProchaine etape:")
|
||||||
|
print(" python scripts/phase2_projection_directions.py")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
211
ikario_processual/scripts/phase2_projection_directions.py
Normal file
211
ikario_processual/scripts/phase2_projection_directions.py
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Phase 2 : Creation des directions de projection.
|
||||||
|
|
||||||
|
Ce script:
|
||||||
|
1. Cree la collection ProjectionDirection dans Weaviate
|
||||||
|
2. Genere les vecteurs de direction par contraste (BGE-M3)
|
||||||
|
3. Sauvegarde les directions dans Weaviate
|
||||||
|
4. Calcule et affiche le profil de S(0)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python phase2_projection_directions.py
|
||||||
|
python phase2_projection_directions.py --dry-run
|
||||||
|
python phase2_projection_directions.py --reset
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Ajouter le parent au path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from state_vector import (
|
||||||
|
check_weaviate_ready,
|
||||||
|
get_state_vector,
|
||||||
|
)
|
||||||
|
from projection_directions import (
|
||||||
|
get_existing_classes,
|
||||||
|
create_projection_direction_collection,
|
||||||
|
delete_projection_direction_collection,
|
||||||
|
create_direction_by_contrast,
|
||||||
|
save_direction,
|
||||||
|
get_all_directions,
|
||||||
|
get_state_profile,
|
||||||
|
format_profile,
|
||||||
|
DIRECTIONS_CONFIG,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def print_section(title: str):
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print(title)
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Phase 2: Creation des directions de projection"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Simuler sans creer"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--reset",
|
||||||
|
action="store_true",
|
||||||
|
help="Supprimer et recreer la collection"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print_section("PHASE 2 : DIRECTIONS DE PROJECTION")
|
||||||
|
|
||||||
|
# 1. Verifier Weaviate
|
||||||
|
print("\n[1/5] Verification Weaviate...")
|
||||||
|
if not check_weaviate_ready():
|
||||||
|
print("ERREUR: Weaviate non accessible")
|
||||||
|
sys.exit(1)
|
||||||
|
print(" Weaviate [OK]")
|
||||||
|
|
||||||
|
# 2. Gerer la collection ProjectionDirection
|
||||||
|
print("\n[2/5] Collection ProjectionDirection...")
|
||||||
|
existing = get_existing_classes()
|
||||||
|
|
||||||
|
if "ProjectionDirection" in existing:
|
||||||
|
if args.reset:
|
||||||
|
print(" Suppression de la collection existante...")
|
||||||
|
if not args.dry_run:
|
||||||
|
delete_projection_direction_collection()
|
||||||
|
print(" Collection supprimee")
|
||||||
|
else:
|
||||||
|
print(" [DRY-RUN] Suppression simulee")
|
||||||
|
else:
|
||||||
|
# Verifier combien de directions existent
|
||||||
|
directions = get_all_directions()
|
||||||
|
if len(directions) > 0:
|
||||||
|
print(f" Collection existe avec {len(directions)} directions")
|
||||||
|
print(" Utilisez --reset pour reinitialiser")
|
||||||
|
|
||||||
|
# Afficher le profil de S(0) quand meme
|
||||||
|
print("\n[INFO] Affichage du profil S(0) existant...")
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
if s0:
|
||||||
|
state_vec = np.array(s0.get("_additional", {}).get("vector", []))
|
||||||
|
if len(state_vec) > 0:
|
||||||
|
profile = get_state_profile(state_vec)
|
||||||
|
print(format_profile(profile))
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Creer la collection
|
||||||
|
if args.dry_run:
|
||||||
|
print(" [DRY-RUN] Creation collection simulee")
|
||||||
|
else:
|
||||||
|
if "ProjectionDirection" not in get_existing_classes():
|
||||||
|
create_projection_direction_collection()
|
||||||
|
|
||||||
|
# 3. Charger le modele
|
||||||
|
print("\n[3/5] Chargement du modele BGE-M3...")
|
||||||
|
if args.dry_run:
|
||||||
|
print(" [DRY-RUN] Chargement simule")
|
||||||
|
model = None
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
model = SentenceTransformer('BAAI/bge-m3')
|
||||||
|
print(" Modele charge [OK]")
|
||||||
|
except ImportError:
|
||||||
|
print("ERREUR: sentence-transformers non installe")
|
||||||
|
print(" pip install sentence-transformers")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# 4. Creer les directions
|
||||||
|
print("\n[4/5] Creation des directions par contraste...")
|
||||||
|
print(f" {len(DIRECTIONS_CONFIG)} directions a creer")
|
||||||
|
print()
|
||||||
|
|
||||||
|
created_count = 0
|
||||||
|
for name, config in DIRECTIONS_CONFIG.items():
|
||||||
|
category = config["category"]
|
||||||
|
positive = config["positive_examples"]
|
||||||
|
negative = config["negative_examples"]
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print(f" [DRY-RUN] {name} ({category})")
|
||||||
|
print(f" + {len(positive)} exemples positifs")
|
||||||
|
print(f" - {len(negative)} exemples negatifs")
|
||||||
|
created_count += 1
|
||||||
|
else:
|
||||||
|
# Calculer la direction
|
||||||
|
direction_vec = create_direction_by_contrast(positive, negative, model)
|
||||||
|
|
||||||
|
# Sauvegarder
|
||||||
|
obj_id = save_direction(name, config, direction_vec)
|
||||||
|
|
||||||
|
if obj_id:
|
||||||
|
print(f" [OK] {name} ({category})")
|
||||||
|
created_count += 1
|
||||||
|
else:
|
||||||
|
print(f" [FAIL] {name}")
|
||||||
|
|
||||||
|
print(f"\n Total: {created_count}/{len(DIRECTIONS_CONFIG)} directions creees")
|
||||||
|
|
||||||
|
# 5. Calculer le profil de S(0)
|
||||||
|
print("\n[5/5] Calcul du profil de S(0)...")
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print(" [DRY-RUN] Profil simule")
|
||||||
|
else:
|
||||||
|
# Recuperer S(0)
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
if not s0:
|
||||||
|
print(" ERREUR: S(0) non trouve. Executez d'abord phase1_state_vector.py")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
state_vec = np.array(s0.get("_additional", {}).get("vector", []))
|
||||||
|
if len(state_vec) == 0:
|
||||||
|
print(" ERREUR: S(0) n'a pas de vecteur")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Calculer le profil
|
||||||
|
profile = get_state_profile(state_vec)
|
||||||
|
|
||||||
|
print("\n PROFIL DE S(0) - Etat initial d'Ikario")
|
||||||
|
print(" " + "-" * 50)
|
||||||
|
print(format_profile(profile))
|
||||||
|
|
||||||
|
# Resume
|
||||||
|
print_section("PHASE 2 TERMINEE")
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print("\n[DRY-RUN] Aucune modification effectuee")
|
||||||
|
else:
|
||||||
|
print("\nResultat:")
|
||||||
|
print(f" - Collection ProjectionDirection creee")
|
||||||
|
print(f" - {created_count} directions creees:")
|
||||||
|
|
||||||
|
# Grouper par categorie
|
||||||
|
by_category = {}
|
||||||
|
for name, config in DIRECTIONS_CONFIG.items():
|
||||||
|
cat = config["category"]
|
||||||
|
if cat not in by_category:
|
||||||
|
by_category[cat] = []
|
||||||
|
by_category[cat].append(name)
|
||||||
|
|
||||||
|
for cat, names in sorted(by_category.items()):
|
||||||
|
print(f" {cat}: {', '.join(names)}")
|
||||||
|
|
||||||
|
print("\nTests de validation:")
|
||||||
|
print(" curl -s 'http://localhost:8080/v1/objects?class=ProjectionDirection' | jq '.objects | length'")
|
||||||
|
print(" python -c \"from projection_directions import *; print(get_all_directions())\"")
|
||||||
|
|
||||||
|
print("\nProchaine etape:")
|
||||||
|
print(" python scripts/phase3_transformation.py")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
285
ikario_processual/scripts/verify_phase0.py
Normal file
285
ikario_processual/scripts/verify_phase0.py
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Script de vérification de la Phase 0.
|
||||||
|
|
||||||
|
Vérifie que tous les prérequis sont en place:
|
||||||
|
1. Weaviate est accessible
|
||||||
|
2. Les collections existent
|
||||||
|
3. Le backup fonctionne
|
||||||
|
4. La restauration (dry-run) fonctionne
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python verify_phase0.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
|
||||||
|
|
||||||
|
# Couleurs pour l'output (désactivées sur Windows si problème encodage)
|
||||||
|
import platform
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
GREEN = ""
|
||||||
|
RED = ""
|
||||||
|
YELLOW = ""
|
||||||
|
RESET = ""
|
||||||
|
CHECK = "[OK]"
|
||||||
|
CROSS = "[FAIL]"
|
||||||
|
WARN = "[WARN]"
|
||||||
|
else:
|
||||||
|
GREEN = "\033[92m"
|
||||||
|
RED = "\033[91m"
|
||||||
|
YELLOW = "\033[93m"
|
||||||
|
RESET = "\033[0m"
|
||||||
|
CHECK = "\u2713"
|
||||||
|
CROSS = "\u2717"
|
||||||
|
WARN = "\u26A0"
|
||||||
|
|
||||||
|
|
||||||
|
def print_ok(msg: str):
|
||||||
|
print(f" {GREEN}{CHECK}{RESET} {msg}")
|
||||||
|
|
||||||
|
|
||||||
|
def print_fail(msg: str):
|
||||||
|
print(f" {RED}{CROSS}{RESET} {msg}")
|
||||||
|
|
||||||
|
|
||||||
|
def print_warn(msg: str):
|
||||||
|
print(f" {YELLOW}{WARN}{RESET} {msg}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_weaviate_connection() -> bool:
|
||||||
|
"""Vérifie la connexion à Weaviate."""
|
||||||
|
print("\n[1/5] Connexion Weaviate...")
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
|
||||||
|
if response.status_code == 200:
|
||||||
|
print_ok(f"Weaviate accessible sur {WEAVIATE_URL}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print_fail(f"Weaviate répond avec status {response.status_code}")
|
||||||
|
return False
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print_fail(f"Impossible de se connecter à Weaviate: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def check_collections() -> tuple[bool, list[str]]:
|
||||||
|
"""Vérifie les collections existantes."""
|
||||||
|
print("\n[2/5] Collections Weaviate...")
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
||||||
|
schema = response.json()
|
||||||
|
classes = [c["class"] for c in schema.get("classes", [])]
|
||||||
|
|
||||||
|
expected = ["Thought", "Conversation", "Message", "Chunk", "Work", "Summary"]
|
||||||
|
found = [c for c in classes if c in expected]
|
||||||
|
missing = [c for c in expected if c not in classes]
|
||||||
|
|
||||||
|
if found:
|
||||||
|
print_ok(f"Collections trouvées: {', '.join(found)}")
|
||||||
|
if missing:
|
||||||
|
print_warn(f"Collections manquantes: {', '.join(missing)}")
|
||||||
|
|
||||||
|
# Compter les objets
|
||||||
|
for class_name in found:
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class={class_name}&limit=1")
|
||||||
|
# Note: Pour avoir le count exact, il faudrait utiliser l'API aggregate
|
||||||
|
objects = response.json().get("objects", [])
|
||||||
|
if objects:
|
||||||
|
print_ok(f" {class_name}: contient des objets")
|
||||||
|
else:
|
||||||
|
print_warn(f" {class_name}: vide")
|
||||||
|
|
||||||
|
return len(found) > 0, found
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print_fail(f"Erreur lors de la vérification du schéma: {e}")
|
||||||
|
return False, []
|
||||||
|
|
||||||
|
|
||||||
|
def check_backup_script() -> bool:
|
||||||
|
"""Vérifie que le script de backup fonctionne."""
|
||||||
|
print("\n[3/5] Script de backup...")
|
||||||
|
|
||||||
|
scripts_dir = Path(__file__).parent
|
||||||
|
backup_script = scripts_dir / "weaviate_backup.py"
|
||||||
|
|
||||||
|
if not backup_script.exists():
|
||||||
|
print_fail(f"Script non trouvé: {backup_script}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print_ok("Script weaviate_backup.py présent")
|
||||||
|
|
||||||
|
# Tester l'import
|
||||||
|
try:
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
from weaviate_backup import backup_weaviate, check_weaviate_ready
|
||||||
|
|
||||||
|
if check_weaviate_ready():
|
||||||
|
print_ok("Fonction check_weaviate_ready() fonctionne")
|
||||||
|
else:
|
||||||
|
print_fail("check_weaviate_ready() retourne False")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
print_fail(f"Erreur d'import: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Tester un backup rapide
|
||||||
|
try:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
output_path = Path(tmpdir) / "test_backup.json"
|
||||||
|
|
||||||
|
backup_weaviate(
|
||||||
|
output_path=output_path,
|
||||||
|
collections=["Thought"],
|
||||||
|
include_vectors=False
|
||||||
|
)
|
||||||
|
|
||||||
|
if output_path.exists() and output_path.stat().st_size > 0:
|
||||||
|
print_ok(f"Backup de test créé ({output_path.stat().st_size} bytes)")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print_fail("Backup de test vide ou non créé")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print_fail(f"Erreur lors du backup de test: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def check_restore_script() -> bool:
|
||||||
|
"""Vérifie que le script de restauration fonctionne."""
|
||||||
|
print("\n[4/5] Script de restauration...")
|
||||||
|
|
||||||
|
scripts_dir = Path(__file__).parent
|
||||||
|
restore_script = scripts_dir / "weaviate_restore.py"
|
||||||
|
|
||||||
|
if not restore_script.exists():
|
||||||
|
print_fail(f"Script non trouvé: {restore_script}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print_ok("Script weaviate_restore.py présent")
|
||||||
|
|
||||||
|
# Tester l'import
|
||||||
|
try:
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
from weaviate_restore import restore_weaviate, get_existing_classes
|
||||||
|
|
||||||
|
classes = get_existing_classes()
|
||||||
|
print_ok(f"Fonction get_existing_classes() retourne {len(classes)} classes")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
print_fail(f"Erreur d'import: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def check_directory_structure() -> bool:
|
||||||
|
"""Vérifie la structure des dossiers."""
|
||||||
|
print("\n[5/5] Structure des dossiers...")
|
||||||
|
|
||||||
|
base_dir = Path(__file__).parent.parent
|
||||||
|
required_dirs = [
|
||||||
|
base_dir,
|
||||||
|
base_dir / "scripts",
|
||||||
|
base_dir / "tests",
|
||||||
|
]
|
||||||
|
|
||||||
|
optional_dirs = [
|
||||||
|
base_dir.parent / "exports",
|
||||||
|
]
|
||||||
|
|
||||||
|
all_ok = True
|
||||||
|
|
||||||
|
for d in required_dirs:
|
||||||
|
if d.exists():
|
||||||
|
print_ok(f"Dossier: {d.relative_to(base_dir.parent)}")
|
||||||
|
else:
|
||||||
|
print_fail(f"Dossier manquant: {d.relative_to(base_dir.parent)}")
|
||||||
|
all_ok = False
|
||||||
|
|
||||||
|
for d in optional_dirs:
|
||||||
|
if d.exists():
|
||||||
|
print_ok(f"Dossier: {d.relative_to(base_dir.parent)}")
|
||||||
|
else:
|
||||||
|
print_warn(f"Dossier optionnel absent: {d.relative_to(base_dir.parent)}")
|
||||||
|
# Créer le dossier
|
||||||
|
d.mkdir(parents=True, exist_ok=True)
|
||||||
|
print_ok(f" → Créé: {d.relative_to(base_dir.parent)}")
|
||||||
|
|
||||||
|
return all_ok
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("=" * 60)
|
||||||
|
print("VÉRIFICATION PHASE 0 - Préparation et Backup")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
# 1. Connexion Weaviate
|
||||||
|
results["weaviate"] = check_weaviate_connection()
|
||||||
|
|
||||||
|
if not results["weaviate"]:
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print(f"{RED}ÉCHEC{RESET}: Weaviate n'est pas accessible.")
|
||||||
|
print("Assurez-vous que Weaviate tourne:")
|
||||||
|
print(" docker start weaviate")
|
||||||
|
print(" # ou")
|
||||||
|
print(" docker run -d --name weaviate -p 8080:8080 ...")
|
||||||
|
print("=" * 60)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# 2. Collections
|
||||||
|
results["collections"], found_collections = check_collections()
|
||||||
|
|
||||||
|
# 3. Script backup
|
||||||
|
results["backup"] = check_backup_script()
|
||||||
|
|
||||||
|
# 4. Script restore
|
||||||
|
results["restore"] = check_restore_script()
|
||||||
|
|
||||||
|
# 5. Structure dossiers
|
||||||
|
results["structure"] = check_directory_structure()
|
||||||
|
|
||||||
|
# Résumé
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("RÉSUMÉ PHASE 0")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
all_passed = all(results.values())
|
||||||
|
|
||||||
|
for check, passed in results.items():
|
||||||
|
status = f"{GREEN}OK{RESET}" if passed else f"{RED}ÉCHEC{RESET}"
|
||||||
|
print(f" {check}: {status}")
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
if all_passed:
|
||||||
|
print(f"{GREEN}{CHECK} PHASE 0 VALIDEE{RESET}")
|
||||||
|
print("\nProchaines etapes:")
|
||||||
|
print(" 1. Creer un backup complet:")
|
||||||
|
print(" python scripts/weaviate_backup.py --output exports/backup_phase0.json")
|
||||||
|
print(" 2. Creer la branche git:")
|
||||||
|
print(" git checkout -b feature/processual-v3")
|
||||||
|
print(" 3. Passer a la Phase 1:")
|
||||||
|
print(" python scripts/phase1_state_vector.py")
|
||||||
|
else:
|
||||||
|
print(f"{RED}{CROSS} PHASE 0 INCOMPLETE{RESET}")
|
||||||
|
print("\nCorrigez les erreurs ci-dessus avant de continuer.")
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
sys.exit(0 if all_passed else 1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
250
ikario_processual/scripts/weaviate_backup.py
Normal file
250
ikario_processual/scripts/weaviate_backup.py
Normal file
@@ -0,0 +1,250 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Backup complet de toutes les collections Weaviate.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python weaviate_backup.py
|
||||||
|
python weaviate_backup.py --output exports/backup_20260131.json
|
||||||
|
python weaviate_backup.py --collections Thought,Conversation
|
||||||
|
|
||||||
|
Ce script exporte:
|
||||||
|
- Le schéma complet (classes et propriétés)
|
||||||
|
- Tous les objets de chaque collection
|
||||||
|
- Les vecteurs (embeddings) de chaque objet
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Configuration par défaut
|
||||||
|
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
|
||||||
|
DEFAULT_OUTPUT_DIR = Path(__file__).parent.parent.parent / "exports"
|
||||||
|
|
||||||
|
|
||||||
|
def check_weaviate_ready() -> bool:
|
||||||
|
"""Vérifie que Weaviate est accessible."""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
|
||||||
|
return response.status_code == 200
|
||||||
|
except requests.RequestException:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_schema() -> dict:
|
||||||
|
"""Récupère le schéma complet de Weaviate."""
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_objects(class_name: str, include_vector: bool = True) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Récupère tous les objets d'une classe avec pagination.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
class_name: Nom de la collection
|
||||||
|
include_vector: Inclure les vecteurs (embeddings)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste de tous les objets
|
||||||
|
"""
|
||||||
|
objects = []
|
||||||
|
limit = 100
|
||||||
|
offset = 0
|
||||||
|
|
||||||
|
include_param = "vector" if include_vector else ""
|
||||||
|
|
||||||
|
while True:
|
||||||
|
url = f"{WEAVIATE_URL}/v1/objects?class={class_name}&limit={limit}&offset={offset}"
|
||||||
|
if include_param:
|
||||||
|
url += f"&include={include_param}"
|
||||||
|
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
print(f" Erreur lors de la récupération de {class_name}: {response.status_code}")
|
||||||
|
break
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
batch = data.get("objects", [])
|
||||||
|
|
||||||
|
if not batch:
|
||||||
|
break
|
||||||
|
|
||||||
|
objects.extend(batch)
|
||||||
|
offset += limit
|
||||||
|
|
||||||
|
# Progress
|
||||||
|
print(f" {class_name}: {len(objects)} objets récupérés...", end="\r")
|
||||||
|
|
||||||
|
print(f" {class_name}: {len(objects)} objets au total")
|
||||||
|
return objects
|
||||||
|
|
||||||
|
|
||||||
|
def backup_weaviate(
|
||||||
|
output_path: Path,
|
||||||
|
collections: list[str] | None = None,
|
||||||
|
include_vectors: bool = True
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Effectue un backup complet de Weaviate.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_path: Chemin du fichier de sortie
|
||||||
|
collections: Liste des collections à exporter (None = toutes)
|
||||||
|
include_vectors: Inclure les vecteurs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Statistiques du backup
|
||||||
|
"""
|
||||||
|
print("=" * 60)
|
||||||
|
print("BACKUP WEAVIATE")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"URL: {WEAVIATE_URL}")
|
||||||
|
print(f"Output: {output_path}")
|
||||||
|
print(f"Include vectors: {include_vectors}")
|
||||||
|
print("-" * 60)
|
||||||
|
|
||||||
|
# Vérifier la connexion
|
||||||
|
if not check_weaviate_ready():
|
||||||
|
print("ERREUR: Weaviate n'est pas accessible")
|
||||||
|
print(f"Vérifiez que le serveur tourne sur {WEAVIATE_URL}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print("Weaviate connecte [OK]")
|
||||||
|
|
||||||
|
# Récupérer le schéma
|
||||||
|
print("\n[1/3] Récupération du schéma...")
|
||||||
|
schema = get_schema()
|
||||||
|
all_classes = [c["class"] for c in schema.get("classes", [])]
|
||||||
|
print(f" Classes trouvées: {', '.join(all_classes)}")
|
||||||
|
|
||||||
|
# Filtrer les collections si spécifié
|
||||||
|
if collections:
|
||||||
|
classes_to_backup = [c for c in all_classes if c in collections]
|
||||||
|
print(f" Collections sélectionnées: {', '.join(classes_to_backup)}")
|
||||||
|
else:
|
||||||
|
classes_to_backup = all_classes
|
||||||
|
|
||||||
|
# Récupérer les objets de chaque classe
|
||||||
|
print("\n[2/3] Récupération des objets...")
|
||||||
|
backup_data = {
|
||||||
|
"metadata": {
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"weaviate_url": WEAVIATE_URL,
|
||||||
|
"include_vectors": include_vectors,
|
||||||
|
"version": "1.0"
|
||||||
|
},
|
||||||
|
"schema": schema,
|
||||||
|
"collections": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
stats = {}
|
||||||
|
for class_name in classes_to_backup:
|
||||||
|
objects = get_all_objects(class_name, include_vector=include_vectors)
|
||||||
|
backup_data["collections"][class_name] = objects
|
||||||
|
stats[class_name] = len(objects)
|
||||||
|
|
||||||
|
# Sauvegarder
|
||||||
|
print(f"\n[3/3] Sauvegarde dans {output_path}...")
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(backup_data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
file_size = output_path.stat().st_size / (1024 * 1024) # MB
|
||||||
|
|
||||||
|
# Résumé
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("BACKUP TERMINÉ")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"Fichier: {output_path}")
|
||||||
|
print(f"Taille: {file_size:.2f} MB")
|
||||||
|
print("\nStatistiques par collection:")
|
||||||
|
total = 0
|
||||||
|
for class_name, count in stats.items():
|
||||||
|
print(f" - {class_name}: {count} objets")
|
||||||
|
total += count
|
||||||
|
print(f"\nTotal: {total} objets")
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global WEAVIATE_URL # Declare global at start of function
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Backup complet de Weaviate",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Exemples:
|
||||||
|
python weaviate_backup.py
|
||||||
|
python weaviate_backup.py --output backup.json
|
||||||
|
python weaviate_backup.py --collections Thought,Conversation
|
||||||
|
python weaviate_backup.py --no-vectors
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", "-o",
|
||||||
|
type=Path,
|
||||||
|
default=None,
|
||||||
|
help="Chemin du fichier de sortie (defaut: exports/backup_YYYYMMDD_HHMMSS.json)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--collections", "-c",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Collections a exporter (separees par des virgules)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-vectors",
|
||||||
|
action="store_true",
|
||||||
|
help="Ne pas inclure les vecteurs (plus rapide, fichier plus petit)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--url",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help=f"URL Weaviate (defaut: {WEAVIATE_URL})"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# URL Weaviate
|
||||||
|
if args.url:
|
||||||
|
WEAVIATE_URL = args.url
|
||||||
|
|
||||||
|
# Chemin de sortie
|
||||||
|
if args.output:
|
||||||
|
output_path = args.output
|
||||||
|
else:
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
DEFAULT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
output_path = DEFAULT_OUTPUT_DIR / f"backup_{timestamp}.json"
|
||||||
|
|
||||||
|
# Collections
|
||||||
|
collections = None
|
||||||
|
if args.collections:
|
||||||
|
collections = [c.strip() for c in args.collections.split(",")]
|
||||||
|
|
||||||
|
# Exécuter le backup
|
||||||
|
backup_weaviate(
|
||||||
|
output_path=output_path,
|
||||||
|
collections=collections,
|
||||||
|
include_vectors=not args.no_vectors
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
373
ikario_processual/scripts/weaviate_restore.py
Normal file
373
ikario_processual/scripts/weaviate_restore.py
Normal file
@@ -0,0 +1,373 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Restauration de collections Weaviate depuis un backup.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python weaviate_restore.py backup.json
|
||||||
|
python weaviate_restore.py backup.json --collections Thought,Conversation
|
||||||
|
python weaviate_restore.py backup.json --dry-run
|
||||||
|
python weaviate_restore.py backup.json --clear-existing
|
||||||
|
|
||||||
|
ATTENTION: Ce script peut supprimer des données existantes!
|
||||||
|
Utilisez --dry-run pour prévisualiser les actions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Configuration par défaut
|
||||||
|
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
|
||||||
|
|
||||||
|
|
||||||
|
def check_weaviate_ready() -> bool:
|
||||||
|
"""Vérifie que Weaviate est accessible."""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
|
||||||
|
return response.status_code == 200
|
||||||
|
except requests.RequestException:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_existing_classes() -> list[str]:
|
||||||
|
"""Récupère la liste des classes existantes."""
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
||||||
|
response.raise_for_status()
|
||||||
|
schema = response.json()
|
||||||
|
return [c["class"] for c in schema.get("classes", [])]
|
||||||
|
|
||||||
|
|
||||||
|
def delete_class(class_name: str) -> bool:
|
||||||
|
"""Supprime une classe et tous ses objets."""
|
||||||
|
response = requests.delete(f"{WEAVIATE_URL}/v1/schema/{class_name}")
|
||||||
|
return response.status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
def create_class(class_schema: dict) -> bool:
|
||||||
|
"""Crée une classe avec son schéma."""
|
||||||
|
response = requests.post(
|
||||||
|
f"{WEAVIATE_URL}/v1/schema",
|
||||||
|
json=class_schema,
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
return response.status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
def insert_object(class_name: str, obj: dict) -> bool:
|
||||||
|
"""
|
||||||
|
Insère un objet dans une classe.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
class_name: Nom de la classe
|
||||||
|
obj: Objet complet du backup (avec id, properties, vector)
|
||||||
|
"""
|
||||||
|
data = {
|
||||||
|
"class": class_name,
|
||||||
|
"properties": obj.get("properties", {}),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Préserver l'ID original si présent
|
||||||
|
if "id" in obj:
|
||||||
|
data["id"] = obj["id"]
|
||||||
|
|
||||||
|
# Inclure le vecteur si présent
|
||||||
|
if "vector" in obj:
|
||||||
|
data["vector"] = obj["vector"]
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{WEAVIATE_URL}/v1/objects",
|
||||||
|
json=data,
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.status_code in [200, 201]
|
||||||
|
|
||||||
|
|
||||||
|
def batch_insert_objects(class_name: str, objects: list[dict], batch_size: int = 100) -> tuple[int, int]:
|
||||||
|
"""
|
||||||
|
Insère des objets par batch.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(succès, échecs)
|
||||||
|
"""
|
||||||
|
success = 0
|
||||||
|
failures = 0
|
||||||
|
|
||||||
|
for i in range(0, len(objects), batch_size):
|
||||||
|
batch = objects[i:i + batch_size]
|
||||||
|
|
||||||
|
batch_data = {
|
||||||
|
"objects": [
|
||||||
|
{
|
||||||
|
"class": class_name,
|
||||||
|
"properties": obj.get("properties", {}),
|
||||||
|
**({"id": obj["id"]} if "id" in obj else {}),
|
||||||
|
**({"vector": obj["vector"]} if "vector" in obj else {}),
|
||||||
|
}
|
||||||
|
for obj in batch
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{WEAVIATE_URL}/v1/batch/objects",
|
||||||
|
json=batch_data,
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
for item in result:
|
||||||
|
if item.get("result", {}).get("status") == "SUCCESS":
|
||||||
|
success += 1
|
||||||
|
else:
|
||||||
|
failures += 1
|
||||||
|
error = item.get("result", {}).get("errors", {})
|
||||||
|
if error:
|
||||||
|
print(f" Erreur: {error}")
|
||||||
|
else:
|
||||||
|
failures += len(batch)
|
||||||
|
print(f" Erreur batch: {response.status_code}")
|
||||||
|
|
||||||
|
# Progress
|
||||||
|
progress = min(i + batch_size, len(objects))
|
||||||
|
print(f" {class_name}: {progress}/{len(objects)} objets traités...", end="\r")
|
||||||
|
|
||||||
|
print(f" {class_name}: {success} succès, {failures} échecs" + " " * 20)
|
||||||
|
return success, failures
|
||||||
|
|
||||||
|
|
||||||
|
def restore_weaviate(
|
||||||
|
backup_path: Path,
|
||||||
|
collections: list[str] | None = None,
|
||||||
|
clear_existing: bool = False,
|
||||||
|
dry_run: bool = False
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Restaure des collections depuis un backup.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
backup_path: Chemin du fichier de backup
|
||||||
|
collections: Collections à restaurer (None = toutes)
|
||||||
|
clear_existing: Supprimer les collections existantes avant restauration
|
||||||
|
dry_run: Prévisualiser sans effectuer les actions
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Statistiques de la restauration
|
||||||
|
"""
|
||||||
|
print("=" * 60)
|
||||||
|
print("RESTAURATION WEAVIATE")
|
||||||
|
if dry_run:
|
||||||
|
print("*** MODE DRY-RUN - Aucune modification ***")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"URL: {WEAVIATE_URL}")
|
||||||
|
print(f"Backup: {backup_path}")
|
||||||
|
print(f"Clear existing: {clear_existing}")
|
||||||
|
print("-" * 60)
|
||||||
|
|
||||||
|
# Vérifier la connexion
|
||||||
|
if not check_weaviate_ready():
|
||||||
|
print("ERREUR: Weaviate n'est pas accessible")
|
||||||
|
print(f"Vérifiez que le serveur tourne sur {WEAVIATE_URL}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print("Weaviate connecté ✓")
|
||||||
|
|
||||||
|
# Charger le backup
|
||||||
|
print(f"\n[1/4] Chargement du backup...")
|
||||||
|
with open(backup_path, "r", encoding="utf-8") as f:
|
||||||
|
backup_data = json.load(f)
|
||||||
|
|
||||||
|
metadata = backup_data.get("metadata", {})
|
||||||
|
print(f" Timestamp: {metadata.get('timestamp', 'N/A')}")
|
||||||
|
print(f" Source: {metadata.get('weaviate_url', 'N/A')}")
|
||||||
|
print(f" Vectors inclus: {metadata.get('include_vectors', False)}")
|
||||||
|
|
||||||
|
schema = backup_data.get("schema", {})
|
||||||
|
backup_collections = backup_data.get("collections", {})
|
||||||
|
|
||||||
|
# Déterminer les collections à restaurer
|
||||||
|
if collections:
|
||||||
|
classes_to_restore = [c for c in collections if c in backup_collections]
|
||||||
|
else:
|
||||||
|
classes_to_restore = list(backup_collections.keys())
|
||||||
|
|
||||||
|
print(f"\n Collections à restaurer: {', '.join(classes_to_restore)}")
|
||||||
|
|
||||||
|
# Vérifier les collections existantes
|
||||||
|
print(f"\n[2/4] Vérification des collections existantes...")
|
||||||
|
existing_classes = get_existing_classes()
|
||||||
|
print(f" Collections existantes: {', '.join(existing_classes) or '(aucune)'}")
|
||||||
|
|
||||||
|
conflicts = [c for c in classes_to_restore if c in existing_classes]
|
||||||
|
if conflicts:
|
||||||
|
print(f" Conflits détectés: {', '.join(conflicts)}")
|
||||||
|
if clear_existing:
|
||||||
|
print(" → Seront supprimées (--clear-existing)")
|
||||||
|
else:
|
||||||
|
print(" → Seront ignorées (utilisez --clear-existing pour les remplacer)")
|
||||||
|
classes_to_restore = [c for c in classes_to_restore if c not in conflicts]
|
||||||
|
|
||||||
|
if not classes_to_restore:
|
||||||
|
print("\nAucune collection à restaurer.")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Préparer le schéma
|
||||||
|
print(f"\n[3/4] Préparation du schéma...")
|
||||||
|
schema_classes = {c["class"]: c for c in schema.get("classes", [])}
|
||||||
|
|
||||||
|
# Supprimer les collections existantes si demandé
|
||||||
|
if clear_existing and conflicts:
|
||||||
|
print("\n Suppression des collections existantes...")
|
||||||
|
for class_name in conflicts:
|
||||||
|
if dry_run:
|
||||||
|
print(f" [DRY-RUN] Suppression de {class_name}")
|
||||||
|
else:
|
||||||
|
if delete_class(class_name):
|
||||||
|
print(f" Supprimé: {class_name}")
|
||||||
|
else:
|
||||||
|
print(f" ERREUR suppression: {class_name}")
|
||||||
|
|
||||||
|
# Créer les classes
|
||||||
|
print("\n Création des classes...")
|
||||||
|
for class_name in classes_to_restore:
|
||||||
|
if class_name in schema_classes:
|
||||||
|
class_schema = schema_classes[class_name]
|
||||||
|
if dry_run:
|
||||||
|
print(f" [DRY-RUN] Création de {class_name}")
|
||||||
|
else:
|
||||||
|
# Vérifier si existe déjà (après clear)
|
||||||
|
current_classes = get_existing_classes()
|
||||||
|
if class_name not in current_classes:
|
||||||
|
if create_class(class_schema):
|
||||||
|
print(f" Créé: {class_name}")
|
||||||
|
else:
|
||||||
|
print(f" ERREUR création: {class_name}")
|
||||||
|
else:
|
||||||
|
print(f" Existe déjà: {class_name}")
|
||||||
|
else:
|
||||||
|
print(f" Schéma manquant pour: {class_name}")
|
||||||
|
|
||||||
|
# Insérer les objets
|
||||||
|
print(f"\n[4/4] Insertion des objets...")
|
||||||
|
stats = {"success": 0, "failures": 0, "by_class": {}}
|
||||||
|
|
||||||
|
for class_name in classes_to_restore:
|
||||||
|
objects = backup_collections.get(class_name, [])
|
||||||
|
if not objects:
|
||||||
|
print(f" {class_name}: 0 objets")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
print(f" [DRY-RUN] {class_name}: {len(objects)} objets à insérer")
|
||||||
|
stats["by_class"][class_name] = {"success": len(objects), "failures": 0}
|
||||||
|
stats["success"] += len(objects)
|
||||||
|
else:
|
||||||
|
success, failures = batch_insert_objects(class_name, objects)
|
||||||
|
stats["by_class"][class_name] = {"success": success, "failures": failures}
|
||||||
|
stats["success"] += success
|
||||||
|
stats["failures"] += failures
|
||||||
|
|
||||||
|
# Résumé
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("RESTAURATION TERMINÉE" + (" (DRY-RUN)" if dry_run else ""))
|
||||||
|
print("=" * 60)
|
||||||
|
print("\nStatistiques par collection:")
|
||||||
|
for class_name, class_stats in stats.get("by_class", {}).items():
|
||||||
|
print(f" - {class_name}: {class_stats['success']} succès, {class_stats['failures']} échecs")
|
||||||
|
|
||||||
|
print(f"\nTotal: {stats['success']} succès, {stats['failures']} échecs")
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global WEAVIATE_URL # Declare global at start of function
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Restauration de Weaviate depuis un backup",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Exemples:
|
||||||
|
python weaviate_restore.py backup.json
|
||||||
|
python weaviate_restore.py backup.json --dry-run
|
||||||
|
python weaviate_restore.py backup.json --collections Thought,Conversation
|
||||||
|
python weaviate_restore.py backup.json --clear-existing
|
||||||
|
|
||||||
|
ATTENTION: --clear-existing supprime les donnees existantes!
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"backup",
|
||||||
|
type=Path,
|
||||||
|
help="Chemin du fichier de backup"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--collections", "-c",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Collections à restaurer (séparées par des virgules)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--clear-existing",
|
||||||
|
action="store_true",
|
||||||
|
help="Supprimer les collections existantes avant restauration"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Prévisualiser les actions sans les exécuter"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--url",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help=f"URL Weaviate (défaut: {WEAVIATE_URL})"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Vérifier que le fichier existe
|
||||||
|
if not args.backup.exists():
|
||||||
|
print(f"ERREUR: Fichier non trouvé: {args.backup}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# URL Weaviate
|
||||||
|
if args.url:
|
||||||
|
WEAVIATE_URL = args.url
|
||||||
|
|
||||||
|
# Collections
|
||||||
|
collections = None
|
||||||
|
if args.collections:
|
||||||
|
collections = [c.strip() for c in args.collections.split(",")]
|
||||||
|
|
||||||
|
# Confirmation si clear_existing et pas dry_run
|
||||||
|
if args.clear_existing and not args.dry_run:
|
||||||
|
print("⚠️ ATTENTION: --clear-existing va SUPPRIMER des données!")
|
||||||
|
print(" Utilisez --dry-run pour prévisualiser.")
|
||||||
|
response = input(" Continuer? [y/N] ")
|
||||||
|
if response.lower() != "y":
|
||||||
|
print("Annulé.")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Exécuter la restauration
|
||||||
|
restore_weaviate(
|
||||||
|
backup_path=args.backup,
|
||||||
|
collections=collections,
|
||||||
|
clear_existing=args.clear_existing,
|
||||||
|
dry_run=args.dry_run
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
439
ikario_processual/state_vector.py
Normal file
439
ikario_processual/state_vector.py
Normal file
@@ -0,0 +1,439 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
StateVector - Gestion du vecteur d'etat d'Ikario.
|
||||||
|
|
||||||
|
Le vecteur d'etat represente l'identite processuelle d'Ikario.
|
||||||
|
Il evolue a chaque occasion d'experience selon:
|
||||||
|
S(t) = f(S(t-1), occasion)
|
||||||
|
|
||||||
|
Ce module gere:
|
||||||
|
- Le schema Weaviate pour StateVector
|
||||||
|
- La creation de S(0) a partir de l'historique
|
||||||
|
- Les operations CRUD sur les etats
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
|
||||||
|
|
||||||
|
# Schema de la collection StateVector
|
||||||
|
STATE_VECTOR_SCHEMA = {
|
||||||
|
"class": "StateVector",
|
||||||
|
"description": "Vecteurs d'etat - identite processuelle d'Ikario",
|
||||||
|
"vectorizer": "none", # Embedding BGE-M3 fourni manuellement
|
||||||
|
"properties": [
|
||||||
|
{
|
||||||
|
"name": "state_id",
|
||||||
|
"dataType": ["int"],
|
||||||
|
"description": "Numero sequentiel de l'etat (0, 1, 2...)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "timestamp",
|
||||||
|
"dataType": ["date"],
|
||||||
|
"description": "Moment de creation de cet etat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "previous_state_id",
|
||||||
|
"dataType": ["int"],
|
||||||
|
"description": "ID de l'etat precedent (None pour S(0))"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "trigger_type",
|
||||||
|
"dataType": ["text"],
|
||||||
|
"description": "Type de declencheur: user, timer, event, initialization"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "trigger_content",
|
||||||
|
"dataType": ["text"],
|
||||||
|
"description": "Contenu du declencheur"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "occasion_summary",
|
||||||
|
"dataType": ["text"],
|
||||||
|
"description": "Resume de l'occasion"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "response_summary",
|
||||||
|
"dataType": ["text"],
|
||||||
|
"description": "Resume de la reponse"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "thoughts_created",
|
||||||
|
"dataType": ["int"],
|
||||||
|
"description": "Nombre de pensees generees lors de cette occasion"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "source_thoughts_count",
|
||||||
|
"dataType": ["int"],
|
||||||
|
"description": "Nombre de pensees utilisees pour construire cet etat (S(0))"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "source_messages_count",
|
||||||
|
"dataType": ["int"],
|
||||||
|
"description": "Nombre de messages utilises pour construire cet etat (S(0))"
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"vectorIndexConfig": {
|
||||||
|
"distance": "cosine"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def check_weaviate_ready() -> bool:
|
||||||
|
"""Verifie que Weaviate est accessible."""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
|
||||||
|
return response.status_code == 200
|
||||||
|
except requests.RequestException:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_existing_classes() -> list[str]:
|
||||||
|
"""Recupere la liste des classes existantes."""
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
||||||
|
response.raise_for_status()
|
||||||
|
schema = response.json()
|
||||||
|
return [c["class"] for c in schema.get("classes", [])]
|
||||||
|
|
||||||
|
|
||||||
|
def create_state_vector_collection() -> bool:
|
||||||
|
"""
|
||||||
|
Cree la collection StateVector dans Weaviate.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True si creee, False si existait deja
|
||||||
|
"""
|
||||||
|
existing = get_existing_classes()
|
||||||
|
|
||||||
|
if "StateVector" in existing:
|
||||||
|
print("[StateVector] Collection existe deja")
|
||||||
|
return False
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{WEAVIATE_URL}/v1/schema",
|
||||||
|
json=STATE_VECTOR_SCHEMA,
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("[StateVector] Collection creee avec succes")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"[StateVector] Erreur creation: {response.status_code}")
|
||||||
|
print(response.text)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def delete_state_vector_collection() -> bool:
|
||||||
|
"""Supprime la collection StateVector (pour reset)."""
|
||||||
|
response = requests.delete(f"{WEAVIATE_URL}/v1/schema/StateVector")
|
||||||
|
return response.status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_thoughts() -> list[dict]:
|
||||||
|
"""Recupere toutes les pensees de Weaviate."""
|
||||||
|
objects = []
|
||||||
|
limit = 100
|
||||||
|
offset = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
url = f"{WEAVIATE_URL}/v1/objects?class=Thought&limit={limit}&offset={offset}"
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
break
|
||||||
|
|
||||||
|
batch = response.json().get("objects", [])
|
||||||
|
if not batch:
|
||||||
|
break
|
||||||
|
|
||||||
|
objects.extend(batch)
|
||||||
|
offset += limit
|
||||||
|
|
||||||
|
return objects
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_messages() -> list[dict]:
|
||||||
|
"""Recupere tous les messages de Weaviate."""
|
||||||
|
objects = []
|
||||||
|
limit = 100
|
||||||
|
offset = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
url = f"{WEAVIATE_URL}/v1/objects?class=Message&limit={limit}&offset={offset}"
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
break
|
||||||
|
|
||||||
|
batch = response.json().get("objects", [])
|
||||||
|
if not batch:
|
||||||
|
break
|
||||||
|
|
||||||
|
objects.extend(batch)
|
||||||
|
offset += limit
|
||||||
|
|
||||||
|
return objects
|
||||||
|
|
||||||
|
|
||||||
|
def filter_thoughts(thoughts: list[dict]) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Filtre les pensees en enlevant celles liees aux tests.
|
||||||
|
|
||||||
|
Criteres d'exclusion:
|
||||||
|
- Contenu contenant "test", "debug", "TODO"
|
||||||
|
- Pensees tres courtes (< 20 caracteres)
|
||||||
|
- Pensees de type "test" ou "debug"
|
||||||
|
"""
|
||||||
|
filtered = []
|
||||||
|
|
||||||
|
# Mots-cles a exclure
|
||||||
|
exclude_keywords = [
|
||||||
|
"test", "debug", "todo", "fixme", "xxx",
|
||||||
|
"lorem ipsum", "example", "placeholder"
|
||||||
|
]
|
||||||
|
|
||||||
|
for thought in thoughts:
|
||||||
|
props = thought.get("properties", {})
|
||||||
|
content = props.get("content", "").lower()
|
||||||
|
thought_type = props.get("thought_type", "").lower()
|
||||||
|
|
||||||
|
# Exclure les pensees de test
|
||||||
|
if thought_type in ["test", "debug", "example"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Exclure les pensees trop courtes
|
||||||
|
if len(content) < 20:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Exclure si contient des mots-cles de test
|
||||||
|
if any(kw in content for kw in exclude_keywords):
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered.append(thought)
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def filter_assistant_messages(messages: list[dict]) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Filtre pour ne garder que les messages d'Ikario (assistant).
|
||||||
|
|
||||||
|
Criteres:
|
||||||
|
- role = "assistant"
|
||||||
|
- Contenu non vide et significatif (> 50 caracteres)
|
||||||
|
"""
|
||||||
|
filtered = []
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
props = msg.get("properties", {})
|
||||||
|
role = props.get("role", "").lower()
|
||||||
|
content = props.get("content", "")
|
||||||
|
|
||||||
|
# Ne garder que les messages assistant
|
||||||
|
if role != "assistant":
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Exclure les messages trop courts
|
||||||
|
if len(content) < 50:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Exclure les messages d'erreur ou systeme
|
||||||
|
if content.startswith("[Error") or content.startswith("[System"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered.append(msg)
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def compute_aggregate_embedding(
|
||||||
|
thoughts: list[dict],
|
||||||
|
messages: list[dict],
|
||||||
|
model
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Calcule l'embedding agrege a partir des pensees et messages.
|
||||||
|
|
||||||
|
Strategie:
|
||||||
|
1. Extraire le contenu textuel de chaque element
|
||||||
|
2. Calculer l'embedding de chaque texte
|
||||||
|
3. Faire la moyenne ponderee (pensees ont plus de poids)
|
||||||
|
4. Normaliser le vecteur final
|
||||||
|
|
||||||
|
Args:
|
||||||
|
thoughts: Liste des pensees filtrees
|
||||||
|
messages: Liste des messages filtres
|
||||||
|
model: Modele SentenceTransformer
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Vecteur normalise 1024-dim
|
||||||
|
"""
|
||||||
|
embeddings = []
|
||||||
|
weights = []
|
||||||
|
|
||||||
|
# Traiter les pensees (poids = 2.0 car plus significatives)
|
||||||
|
print(f" Traitement de {len(thoughts)} pensees...")
|
||||||
|
for thought in thoughts:
|
||||||
|
content = thought.get("properties", {}).get("content", "")
|
||||||
|
if content:
|
||||||
|
emb = model.encode(content)
|
||||||
|
embeddings.append(emb)
|
||||||
|
weights.append(2.0) # Poids double pour les pensees
|
||||||
|
|
||||||
|
# Traiter les messages (poids = 1.0)
|
||||||
|
print(f" Traitement de {len(messages)} messages...")
|
||||||
|
for msg in messages:
|
||||||
|
content = msg.get("properties", {}).get("content", "")
|
||||||
|
if content:
|
||||||
|
# Tronquer les messages tres longs
|
||||||
|
if len(content) > 2000:
|
||||||
|
content = content[:2000]
|
||||||
|
emb = model.encode(content)
|
||||||
|
embeddings.append(emb)
|
||||||
|
weights.append(1.0)
|
||||||
|
|
||||||
|
if not embeddings:
|
||||||
|
raise ValueError("Aucun contenu a encoder!")
|
||||||
|
|
||||||
|
# Convertir en arrays numpy
|
||||||
|
embeddings = np.array(embeddings)
|
||||||
|
weights = np.array(weights)
|
||||||
|
|
||||||
|
# Moyenne ponderee
|
||||||
|
weights = weights / weights.sum() # Normaliser les poids
|
||||||
|
aggregate = np.average(embeddings, axis=0, weights=weights)
|
||||||
|
|
||||||
|
# Normaliser le vecteur final
|
||||||
|
aggregate = aggregate / np.linalg.norm(aggregate)
|
||||||
|
|
||||||
|
return aggregate
|
||||||
|
|
||||||
|
|
||||||
|
def create_initial_state(
|
||||||
|
thoughts: list[dict],
|
||||||
|
messages: list[dict],
|
||||||
|
embedding: np.ndarray
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Cree l'etat initial S(0) dans Weaviate.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
thoughts: Pensees utilisees pour construire S(0)
|
||||||
|
messages: Messages utilises pour construire S(0)
|
||||||
|
embedding: Vecteur d'etat calcule
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Objet S(0) cree
|
||||||
|
"""
|
||||||
|
s0_data = {
|
||||||
|
"state_id": 0,
|
||||||
|
"timestamp": datetime.now().isoformat() + "Z",
|
||||||
|
"previous_state_id": -1, # Pas d'etat precedent
|
||||||
|
"trigger_type": "initialization",
|
||||||
|
"trigger_content": "Creation de l'etat initial a partir de l'historique",
|
||||||
|
"occasion_summary": f"Naissance processuelle d'Ikario - agregation de {len(thoughts)} pensees et {len(messages)} messages",
|
||||||
|
"response_summary": "Etat initial S(0) cree avec succes",
|
||||||
|
"thoughts_created": 0,
|
||||||
|
"source_thoughts_count": len(thoughts),
|
||||||
|
"source_messages_count": len(messages),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Creer l'objet avec le vecteur
|
||||||
|
response = requests.post(
|
||||||
|
f"{WEAVIATE_URL}/v1/objects",
|
||||||
|
json={
|
||||||
|
"class": "StateVector",
|
||||||
|
"properties": s0_data,
|
||||||
|
"vector": embedding.tolist()
|
||||||
|
},
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code in [200, 201]:
|
||||||
|
result = response.json()
|
||||||
|
s0_data["id"] = result.get("id")
|
||||||
|
print(f"[StateVector] S(0) cree avec ID: {s0_data['id']}")
|
||||||
|
return s0_data
|
||||||
|
else:
|
||||||
|
print(f"[StateVector] Erreur creation S(0): {response.status_code}")
|
||||||
|
print(response.text)
|
||||||
|
raise RuntimeError("Impossible de creer S(0)")
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_state_id() -> int:
|
||||||
|
"""Retourne l'ID de l'etat le plus recent."""
|
||||||
|
# Recuperer tous les StateVector et trouver le max state_id
|
||||||
|
url = f"{WEAVIATE_URL}/v1/objects?class=StateVector&limit=100"
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
objects = response.json().get("objects", [])
|
||||||
|
if not objects:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
max_id = max(obj.get("properties", {}).get("state_id", -1) for obj in objects)
|
||||||
|
return max_id
|
||||||
|
|
||||||
|
|
||||||
|
def get_state_vector(state_id: int) -> dict | None:
|
||||||
|
"""
|
||||||
|
Recupere un etat par son state_id.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state_id: Numero de l'etat
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Objet StateVector ou None
|
||||||
|
"""
|
||||||
|
# GraphQL query pour recuperer par state_id
|
||||||
|
query = {
|
||||||
|
"query": """
|
||||||
|
{
|
||||||
|
Get {
|
||||||
|
StateVector(where: {
|
||||||
|
path: ["state_id"],
|
||||||
|
operator: Equal,
|
||||||
|
valueInt: %d
|
||||||
|
}) {
|
||||||
|
state_id
|
||||||
|
timestamp
|
||||||
|
previous_state_id
|
||||||
|
trigger_type
|
||||||
|
trigger_content
|
||||||
|
occasion_summary
|
||||||
|
response_summary
|
||||||
|
thoughts_created
|
||||||
|
source_thoughts_count
|
||||||
|
source_messages_count
|
||||||
|
_additional {
|
||||||
|
id
|
||||||
|
vector
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
""" % state_id
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{WEAVIATE_URL}/v1/graphql",
|
||||||
|
json=query,
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
return None
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
states = data.get("data", {}).get("Get", {}).get("StateVector", [])
|
||||||
|
|
||||||
|
return states[0] if states else None
|
||||||
1
ikario_processual/tests/__init__.py
Normal file
1
ikario_processual/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Tests pour ikario_processual
|
||||||
208
ikario_processual/tests/test_phase0_backup.py
Normal file
208
ikario_processual/tests/test_phase0_backup.py
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tests pour la Phase 0: Backup et restauration Weaviate.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
pytest tests/test_phase0_backup.py -v
|
||||||
|
pytest tests/test_phase0_backup.py -v -k test_backup
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
|
||||||
|
|
||||||
|
|
||||||
|
def weaviate_is_available() -> bool:
|
||||||
|
"""Vérifie si Weaviate est accessible."""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready", timeout=5)
|
||||||
|
return response.status_code == 200
|
||||||
|
except requests.RequestException:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# Skip tous les tests si Weaviate n'est pas disponible
|
||||||
|
pytestmark = pytest.mark.skipif(
|
||||||
|
not weaviate_is_available(),
|
||||||
|
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestWeaviateConnection:
|
||||||
|
"""Tests de connexion à Weaviate."""
|
||||||
|
|
||||||
|
def test_weaviate_ready(self):
|
||||||
|
"""Weaviate doit être accessible."""
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/.well-known/ready")
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
def test_weaviate_schema_accessible(self):
|
||||||
|
"""Le schéma doit être récupérable."""
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert "classes" in data
|
||||||
|
|
||||||
|
def test_weaviate_has_collections(self):
|
||||||
|
"""Au moins une collection doit exister (Thought, Conversation, etc.)."""
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
||||||
|
data = response.json()
|
||||||
|
classes = [c["class"] for c in data.get("classes", [])]
|
||||||
|
|
||||||
|
# Au moins une des collections attendues
|
||||||
|
expected = ["Thought", "Conversation", "Message", "Chunk", "Work", "Summary"]
|
||||||
|
found = [c for c in classes if c in expected]
|
||||||
|
|
||||||
|
assert len(found) > 0, f"Aucune collection trouvée parmi {expected}. Classes existantes: {classes}"
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackupScript:
|
||||||
|
"""Tests du script de backup."""
|
||||||
|
|
||||||
|
def test_backup_creates_file(self):
|
||||||
|
"""Le backup doit créer un fichier JSON."""
|
||||||
|
# Import dynamique pour éviter les erreurs si requests manque
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
||||||
|
|
||||||
|
from weaviate_backup import backup_weaviate
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
output_path = Path(tmpdir) / "test_backup.json"
|
||||||
|
|
||||||
|
stats = backup_weaviate(
|
||||||
|
output_path=output_path,
|
||||||
|
collections=None, # Toutes
|
||||||
|
include_vectors=False # Plus rapide pour le test
|
||||||
|
)
|
||||||
|
|
||||||
|
assert output_path.exists(), "Le fichier de backup n'a pas été créé"
|
||||||
|
assert output_path.stat().st_size > 0, "Le fichier de backup est vide"
|
||||||
|
|
||||||
|
def test_backup_structure(self):
|
||||||
|
"""Le backup doit avoir la bonne structure."""
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
||||||
|
|
||||||
|
from weaviate_backup import backup_weaviate
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
output_path = Path(tmpdir) / "test_backup.json"
|
||||||
|
|
||||||
|
backup_weaviate(
|
||||||
|
output_path=output_path,
|
||||||
|
collections=["Thought"], # Une seule collection pour le test
|
||||||
|
include_vectors=False
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(output_path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
# Vérifier la structure
|
||||||
|
assert "metadata" in data
|
||||||
|
assert "schema" in data
|
||||||
|
assert "collections" in data
|
||||||
|
|
||||||
|
# Vérifier les métadonnées
|
||||||
|
assert "timestamp" in data["metadata"]
|
||||||
|
assert "weaviate_url" in data["metadata"]
|
||||||
|
assert "version" in data["metadata"]
|
||||||
|
|
||||||
|
def test_backup_with_vectors(self):
|
||||||
|
"""Le backup avec vecteurs doit inclure les embeddings."""
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
||||||
|
|
||||||
|
from weaviate_backup import backup_weaviate
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
output_path = Path(tmpdir) / "test_backup_vectors.json"
|
||||||
|
|
||||||
|
backup_weaviate(
|
||||||
|
output_path=output_path,
|
||||||
|
collections=["Thought"],
|
||||||
|
include_vectors=True
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(output_path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
# Vérifier qu'au moins un objet a un vecteur
|
||||||
|
thoughts = data.get("collections", {}).get("Thought", [])
|
||||||
|
if thoughts:
|
||||||
|
# Au moins un objet devrait avoir un vecteur
|
||||||
|
has_vector = any("vector" in obj for obj in thoughts)
|
||||||
|
assert has_vector, "Aucun objet n'a de vecteur alors que include_vectors=True"
|
||||||
|
|
||||||
|
|
||||||
|
class TestRestoreScript:
|
||||||
|
"""Tests du script de restauration."""
|
||||||
|
|
||||||
|
def test_restore_dry_run(self):
|
||||||
|
"""Le dry-run ne doit pas modifier les données."""
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
||||||
|
|
||||||
|
from weaviate_backup import backup_weaviate
|
||||||
|
from weaviate_restore import restore_weaviate, get_existing_classes
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
# D'abord, faire un backup
|
||||||
|
backup_path = Path(tmpdir) / "test_backup.json"
|
||||||
|
backup_weaviate(
|
||||||
|
output_path=backup_path,
|
||||||
|
collections=["Thought"],
|
||||||
|
include_vectors=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Compter les objets avant
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class=Thought&limit=1")
|
||||||
|
count_before = len(response.json().get("objects", []))
|
||||||
|
|
||||||
|
# Restaurer en dry-run
|
||||||
|
stats = restore_weaviate(
|
||||||
|
backup_path=backup_path,
|
||||||
|
collections=["Thought"],
|
||||||
|
clear_existing=False,
|
||||||
|
dry_run=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Compter après
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/objects?class=Thought&limit=1")
|
||||||
|
count_after = len(response.json().get("objects", []))
|
||||||
|
|
||||||
|
# Pas de changement
|
||||||
|
assert count_before == count_after, "Le dry-run a modifié les données!"
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackupRestoreCycle:
|
||||||
|
"""Tests du cycle complet backup → restore."""
|
||||||
|
|
||||||
|
def test_backup_restore_roundtrip(self):
|
||||||
|
"""
|
||||||
|
Test complet: backup → restore → vérification.
|
||||||
|
|
||||||
|
Ce test utilise une collection temporaire pour ne pas
|
||||||
|
affecter les données existantes.
|
||||||
|
"""
|
||||||
|
# Ce test nécessiterait de créer une collection temporaire
|
||||||
|
# Pour l'instant, on vérifie juste que les scripts fonctionnent
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_exports_directory_exists():
|
||||||
|
"""Le dossier exports doit exister ou être créable."""
|
||||||
|
exports_dir = Path(__file__).parent.parent.parent / "exports"
|
||||||
|
exports_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
assert exports_dir.exists()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
202
ikario_processual/tests/test_phase1_state_vector.py
Normal file
202
ikario_processual/tests/test_phase1_state_vector.py
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tests pour la Phase 1: StateVector et S(0).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
pytest tests/test_phase1_state_vector.py -v
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Ajouter le parent au path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from state_vector import (
|
||||||
|
check_weaviate_ready,
|
||||||
|
get_existing_classes,
|
||||||
|
filter_thoughts,
|
||||||
|
filter_assistant_messages,
|
||||||
|
get_state_vector,
|
||||||
|
get_current_state_id,
|
||||||
|
WEAVIATE_URL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def weaviate_is_available() -> bool:
|
||||||
|
"""Verifie si Weaviate est accessible."""
|
||||||
|
return check_weaviate_ready()
|
||||||
|
|
||||||
|
|
||||||
|
# Skip tous les tests si Weaviate n'est pas disponible
|
||||||
|
pytestmark = pytest.mark.skipif(
|
||||||
|
not weaviate_is_available(),
|
||||||
|
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestStateVectorCollection:
|
||||||
|
"""Tests de la collection StateVector."""
|
||||||
|
|
||||||
|
def test_state_vector_collection_exists(self):
|
||||||
|
"""La collection StateVector doit exister."""
|
||||||
|
classes = get_existing_classes()
|
||||||
|
assert "StateVector" in classes, \
|
||||||
|
f"StateVector non trouve. Classes: {classes}"
|
||||||
|
|
||||||
|
def test_state_vector_schema_correct(self):
|
||||||
|
"""Le schema StateVector doit avoir les bonnes proprietes."""
|
||||||
|
response = requests.get(f"{WEAVIATE_URL}/v1/schema")
|
||||||
|
schema = response.json()
|
||||||
|
|
||||||
|
state_vector_class = None
|
||||||
|
for c in schema.get("classes", []):
|
||||||
|
if c["class"] == "StateVector":
|
||||||
|
state_vector_class = c
|
||||||
|
break
|
||||||
|
|
||||||
|
assert state_vector_class is not None
|
||||||
|
|
||||||
|
# Verifier les proprietes requises
|
||||||
|
prop_names = [p["name"] for p in state_vector_class.get("properties", [])]
|
||||||
|
required = ["state_id", "timestamp", "trigger_type", "occasion_summary"]
|
||||||
|
|
||||||
|
for req in required:
|
||||||
|
assert req in prop_names, f"Propriete manquante: {req}"
|
||||||
|
|
||||||
|
|
||||||
|
class TestInitialState:
|
||||||
|
"""Tests de l'etat initial S(0)."""
|
||||||
|
|
||||||
|
def test_s0_exists(self):
|
||||||
|
"""S(0) doit exister."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None, "S(0) non trouve"
|
||||||
|
assert s0.get("state_id") == 0
|
||||||
|
|
||||||
|
def test_s0_has_vector(self):
|
||||||
|
"""S(0) doit avoir un vecteur."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
vector = s0.get("_additional", {}).get("vector")
|
||||||
|
assert vector is not None, "S(0) n'a pas de vecteur"
|
||||||
|
|
||||||
|
def test_s0_vector_is_1024_dim(self):
|
||||||
|
"""Le vecteur de S(0) doit etre 1024-dim (BGE-M3)."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
vector = s0.get("_additional", {}).get("vector")
|
||||||
|
assert vector is not None
|
||||||
|
assert len(vector) == 1024, f"Dimension: {len(vector)} (attendu: 1024)"
|
||||||
|
|
||||||
|
def test_s0_vector_is_normalized(self):
|
||||||
|
"""Le vecteur de S(0) doit etre normalise."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
vector = np.array(s0.get("_additional", {}).get("vector", []))
|
||||||
|
norm = np.linalg.norm(vector)
|
||||||
|
|
||||||
|
assert abs(norm - 1.0) < 0.01, f"Norme: {norm} (attendu: ~1.0)"
|
||||||
|
|
||||||
|
def test_s0_has_source_counts(self):
|
||||||
|
"""S(0) doit avoir les compteurs de sources."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
thoughts_count = s0.get("source_thoughts_count")
|
||||||
|
messages_count = s0.get("source_messages_count")
|
||||||
|
|
||||||
|
assert thoughts_count is not None, "source_thoughts_count manquant"
|
||||||
|
assert messages_count is not None, "source_messages_count manquant"
|
||||||
|
assert thoughts_count > 0 or messages_count > 0, \
|
||||||
|
"S(0) doit etre construit a partir de donnees"
|
||||||
|
|
||||||
|
def test_s0_trigger_type_is_initialization(self):
|
||||||
|
"""Le trigger_type de S(0) doit etre 'initialization'."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
trigger_type = s0.get("trigger_type")
|
||||||
|
assert trigger_type == "initialization"
|
||||||
|
|
||||||
|
|
||||||
|
class TestFiltering:
|
||||||
|
"""Tests des fonctions de filtrage."""
|
||||||
|
|
||||||
|
def test_filter_thoughts_excludes_test(self):
|
||||||
|
"""Les pensees de test doivent etre exclues."""
|
||||||
|
thoughts = [
|
||||||
|
{"properties": {"content": "Ceci est une vraie pensee philosophique", "thought_type": "reflection"}},
|
||||||
|
{"properties": {"content": "test test test", "thought_type": "test"}},
|
||||||
|
{"properties": {"content": "debug: checking values", "thought_type": "debug"}},
|
||||||
|
{"properties": {"content": "Une autre pensee valide sur Whitehead", "thought_type": "reflection"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
filtered = filter_thoughts(thoughts)
|
||||||
|
|
||||||
|
assert len(filtered) == 2
|
||||||
|
for t in filtered:
|
||||||
|
assert "test" not in t["properties"]["content"].lower()
|
||||||
|
|
||||||
|
def test_filter_thoughts_excludes_short(self):
|
||||||
|
"""Les pensees trop courtes doivent etre exclues."""
|
||||||
|
thoughts = [
|
||||||
|
{"properties": {"content": "OK", "thought_type": "reflection"}},
|
||||||
|
{"properties": {"content": "Une pensee suffisamment longue pour etre valide", "thought_type": "reflection"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
filtered = filter_thoughts(thoughts)
|
||||||
|
|
||||||
|
assert len(filtered) == 1
|
||||||
|
assert len(filtered[0]["properties"]["content"]) >= 20
|
||||||
|
|
||||||
|
def test_filter_messages_keeps_only_assistant(self):
|
||||||
|
"""Seuls les messages assistant doivent etre gardes."""
|
||||||
|
messages = [
|
||||||
|
{"properties": {"role": "user", "content": "Question de l'utilisateur"}},
|
||||||
|
{"properties": {"role": "assistant", "content": "Reponse d'Ikario avec suffisamment de contenu pour etre valide"}},
|
||||||
|
{"properties": {"role": "system", "content": "Message systeme"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
filtered = filter_assistant_messages(messages)
|
||||||
|
|
||||||
|
assert len(filtered) == 1
|
||||||
|
assert filtered[0]["properties"]["role"] == "assistant"
|
||||||
|
|
||||||
|
def test_filter_messages_excludes_short(self):
|
||||||
|
"""Les messages trop courts doivent etre exclus."""
|
||||||
|
messages = [
|
||||||
|
{"properties": {"role": "assistant", "content": "OK"}},
|
||||||
|
{"properties": {"role": "assistant", "content": "Une reponse complete avec suffisamment de contenu pour representer une vraie interaction"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
filtered = filter_assistant_messages(messages)
|
||||||
|
|
||||||
|
assert len(filtered) == 1
|
||||||
|
assert len(filtered[0]["properties"]["content"]) >= 50
|
||||||
|
|
||||||
|
|
||||||
|
class TestStateVectorOperations:
|
||||||
|
"""Tests des operations sur StateVector."""
|
||||||
|
|
||||||
|
def test_get_current_state_id(self):
|
||||||
|
"""get_current_state_id doit retourner au moins 0."""
|
||||||
|
current_id = get_current_state_id()
|
||||||
|
assert current_id >= 0, "Aucun etat trouve"
|
||||||
|
|
||||||
|
def test_get_state_vector_returns_none_for_invalid_id(self):
|
||||||
|
"""get_state_vector doit retourner None pour un ID invalide."""
|
||||||
|
state = get_state_vector(99999)
|
||||||
|
assert state is None
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
210
ikario_processual/tests/test_phase2_directions.py
Normal file
210
ikario_processual/tests/test_phase2_directions.py
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tests pour la Phase 2: Directions de Projection.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
pytest tests/test_phase2_directions.py -v
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Ajouter le parent au path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from state_vector import (
|
||||||
|
check_weaviate_ready,
|
||||||
|
get_state_vector,
|
||||||
|
WEAVIATE_URL,
|
||||||
|
)
|
||||||
|
from projection_directions import (
|
||||||
|
get_existing_classes,
|
||||||
|
get_direction,
|
||||||
|
get_all_directions,
|
||||||
|
get_state_profile,
|
||||||
|
project_state_on_direction,
|
||||||
|
DIRECTIONS_CONFIG,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def weaviate_is_available() -> bool:
|
||||||
|
"""Verifie si Weaviate est accessible."""
|
||||||
|
return check_weaviate_ready()
|
||||||
|
|
||||||
|
|
||||||
|
# Skip tous les tests si Weaviate n'est pas disponible
|
||||||
|
pytestmark = pytest.mark.skipif(
|
||||||
|
not weaviate_is_available(),
|
||||||
|
reason=f"Weaviate non disponible sur {WEAVIATE_URL}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestProjectionDirectionCollection:
|
||||||
|
"""Tests de la collection ProjectionDirection."""
|
||||||
|
|
||||||
|
def test_collection_exists(self):
|
||||||
|
"""La collection ProjectionDirection doit exister."""
|
||||||
|
classes = get_existing_classes()
|
||||||
|
assert "ProjectionDirection" in classes
|
||||||
|
|
||||||
|
def test_all_directions_created(self):
|
||||||
|
"""Toutes les directions configurees doivent exister."""
|
||||||
|
directions = get_all_directions()
|
||||||
|
direction_names = [d["name"] for d in directions]
|
||||||
|
|
||||||
|
for name in DIRECTIONS_CONFIG.keys():
|
||||||
|
assert name in direction_names, f"Direction manquante: {name}"
|
||||||
|
|
||||||
|
def test_directions_count(self):
|
||||||
|
"""Le nombre de directions doit correspondre a la config."""
|
||||||
|
directions = get_all_directions()
|
||||||
|
assert len(directions) == len(DIRECTIONS_CONFIG)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDirectionVectors:
|
||||||
|
"""Tests des vecteurs de direction."""
|
||||||
|
|
||||||
|
def test_curiosity_direction_exists(self):
|
||||||
|
"""La direction 'curiosity' doit exister."""
|
||||||
|
direction = get_direction("curiosity")
|
||||||
|
assert direction is not None
|
||||||
|
assert direction["name"] == "curiosity"
|
||||||
|
assert direction["category"] == "epistemic"
|
||||||
|
|
||||||
|
def test_direction_has_vector(self):
|
||||||
|
"""Chaque direction doit avoir un vecteur."""
|
||||||
|
direction = get_direction("curiosity")
|
||||||
|
assert direction is not None
|
||||||
|
|
||||||
|
vector = direction.get("_additional", {}).get("vector")
|
||||||
|
assert vector is not None
|
||||||
|
assert len(vector) > 0
|
||||||
|
|
||||||
|
def test_direction_vector_is_1024_dim(self):
|
||||||
|
"""Les vecteurs de direction doivent etre 1024-dim."""
|
||||||
|
direction = get_direction("curiosity")
|
||||||
|
assert direction is not None
|
||||||
|
|
||||||
|
vector = direction.get("_additional", {}).get("vector")
|
||||||
|
assert len(vector) == 1024
|
||||||
|
|
||||||
|
def test_direction_vector_is_normalized(self):
|
||||||
|
"""Les vecteurs de direction doivent etre normalises."""
|
||||||
|
direction = get_direction("curiosity")
|
||||||
|
assert direction is not None
|
||||||
|
|
||||||
|
vector = np.array(direction.get("_additional", {}).get("vector"))
|
||||||
|
norm = np.linalg.norm(vector)
|
||||||
|
|
||||||
|
assert abs(norm - 1.0) < 0.01, f"Norme: {norm}"
|
||||||
|
|
||||||
|
def test_all_categories_present(self):
|
||||||
|
"""Toutes les categories doivent etre representees."""
|
||||||
|
directions = get_all_directions()
|
||||||
|
categories = set(d["category"] for d in directions)
|
||||||
|
|
||||||
|
expected_categories = {"epistemic", "affective", "relational", "vital", "philosophical"}
|
||||||
|
assert categories == expected_categories
|
||||||
|
|
||||||
|
|
||||||
|
class TestProjection:
|
||||||
|
"""Tests des fonctions de projection."""
|
||||||
|
|
||||||
|
def test_projection_in_range(self):
|
||||||
|
"""Les projections doivent etre entre -1 et 1."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
state_vec = np.array(s0.get("_additional", {}).get("vector"))
|
||||||
|
profile = get_state_profile(state_vec)
|
||||||
|
|
||||||
|
for category, components in profile.items():
|
||||||
|
for name, value in components.items():
|
||||||
|
assert -1 <= value <= 1, f"{name} = {value} hors limites [-1, 1]"
|
||||||
|
|
||||||
|
def test_get_state_profile_structure(self):
|
||||||
|
"""Le profil doit avoir la bonne structure."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
state_vec = np.array(s0.get("_additional", {}).get("vector"))
|
||||||
|
profile = get_state_profile(state_vec)
|
||||||
|
|
||||||
|
# Verifier que c'est un dict de dicts
|
||||||
|
assert isinstance(profile, dict)
|
||||||
|
for category, components in profile.items():
|
||||||
|
assert isinstance(components, dict)
|
||||||
|
for name, value in components.items():
|
||||||
|
assert isinstance(value, float)
|
||||||
|
|
||||||
|
def test_projection_orthogonal_vectors(self):
|
||||||
|
"""Test de projection avec des vecteurs orthogonaux."""
|
||||||
|
# Deux vecteurs orthogonaux ont une projection de 0
|
||||||
|
v1 = np.zeros(1024)
|
||||||
|
v1[0] = 1.0
|
||||||
|
|
||||||
|
v2 = np.zeros(1024)
|
||||||
|
v2[1] = 1.0
|
||||||
|
|
||||||
|
projection = project_state_on_direction(v1, v2)
|
||||||
|
assert abs(projection) < 0.001
|
||||||
|
|
||||||
|
def test_projection_parallel_vectors(self):
|
||||||
|
"""Test de projection avec des vecteurs paralleles."""
|
||||||
|
v = np.random.randn(1024)
|
||||||
|
v = v / np.linalg.norm(v)
|
||||||
|
|
||||||
|
projection = project_state_on_direction(v, v)
|
||||||
|
assert abs(projection - 1.0) < 0.001
|
||||||
|
|
||||||
|
def test_projection_antiparallel_vectors(self):
|
||||||
|
"""Test de projection avec des vecteurs antiparalleles."""
|
||||||
|
v = np.random.randn(1024)
|
||||||
|
v = v / np.linalg.norm(v)
|
||||||
|
|
||||||
|
projection = project_state_on_direction(v, -v)
|
||||||
|
assert abs(projection + 1.0) < 0.001
|
||||||
|
|
||||||
|
|
||||||
|
class TestS0Profile:
|
||||||
|
"""Tests du profil de S(0)."""
|
||||||
|
|
||||||
|
def test_s0_has_profile(self):
|
||||||
|
"""S(0) doit avoir un profil calculable."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
state_vec = np.array(s0.get("_additional", {}).get("vector"))
|
||||||
|
profile = get_state_profile(state_vec)
|
||||||
|
|
||||||
|
assert len(profile) > 0
|
||||||
|
|
||||||
|
def test_s0_profile_has_all_categories(self):
|
||||||
|
"""Le profil de S(0) doit avoir toutes les categories."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
state_vec = np.array(s0.get("_additional", {}).get("vector"))
|
||||||
|
profile = get_state_profile(state_vec)
|
||||||
|
|
||||||
|
expected = {"epistemic", "affective", "relational", "vital", "philosophical"}
|
||||||
|
assert set(profile.keys()) == expected
|
||||||
|
|
||||||
|
def test_s0_has_curiosity_component(self):
|
||||||
|
"""S(0) doit avoir une composante curiosity."""
|
||||||
|
s0 = get_state_vector(0)
|
||||||
|
assert s0 is not None
|
||||||
|
|
||||||
|
state_vec = np.array(s0.get("_additional", {}).get("vector"))
|
||||||
|
profile = get_state_profile(state_vec)
|
||||||
|
|
||||||
|
assert "curiosity" in profile.get("epistemic", {})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
Reference in New Issue
Block a user