Add ikario_processual with David profile and embedding script
- david_profile_declared.json: David's declared profile values from questionnaire - scripts/embed_david.py: Python script to generate embeddings using BGE-M3 model - questionnaire_david.md: Questionnaire template for profile values Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
160
ikario_processual/scripts/create_all_directions.py
Normal file
160
ikario_processual/scripts/create_all_directions.py
Normal file
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script pour creer toutes les directions de projection dans Weaviate.
|
||||
|
||||
Usage:
|
||||
python scripts/create_all_directions.py [--reset]
|
||||
|
||||
Options:
|
||||
--reset Supprimer et recreer la collection (attention: perte de donnees!)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# Ajouter le parent au path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from projection_directions import (
|
||||
DIRECTIONS_CONFIG,
|
||||
create_projection_direction_collection,
|
||||
delete_projection_direction_collection,
|
||||
create_direction_by_contrast,
|
||||
save_direction,
|
||||
get_all_directions,
|
||||
get_existing_classes,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
reset = "--reset" in sys.argv
|
||||
|
||||
print("=" * 70)
|
||||
print("CREATION DES DIRECTIONS DE PROJECTION")
|
||||
print("=" * 70)
|
||||
print(f"Total directions configurees: {len(DIRECTIONS_CONFIG)}")
|
||||
print()
|
||||
|
||||
# Verifier Weaviate
|
||||
try:
|
||||
classes = get_existing_classes()
|
||||
print(f"[OK] Weaviate accessible, {len(classes)} classes existantes")
|
||||
except Exception as e:
|
||||
print(f"[ERREUR] Weaviate non accessible: {e}")
|
||||
print("Assurez-vous que Weaviate est en cours d'execution sur localhost:8080")
|
||||
return 1
|
||||
|
||||
# Reset si demande
|
||||
if reset:
|
||||
print("\n[RESET] Suppression de la collection ProjectionDirection...")
|
||||
if delete_projection_direction_collection():
|
||||
print("[OK] Collection supprimee")
|
||||
else:
|
||||
print("[INFO] Collection n'existait pas")
|
||||
|
||||
# Creer la collection si necessaire
|
||||
print("\n[INFO] Creation de la collection ProjectionDirection...")
|
||||
if create_projection_direction_collection():
|
||||
print("[OK] Collection creee")
|
||||
else:
|
||||
print("[INFO] Collection existe deja")
|
||||
|
||||
# Charger le modele d'embedding
|
||||
print("\n[INFO] Chargement du modele BGE-M3...")
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
model = SentenceTransformer('BAAI/bge-m3')
|
||||
print("[OK] Modele charge")
|
||||
except Exception as e:
|
||||
print(f"[ERREUR] Impossible de charger le modele: {e}")
|
||||
return 1
|
||||
|
||||
# Verifier les directions existantes
|
||||
existing_directions = get_all_directions()
|
||||
existing_names = {d["name"] for d in existing_directions}
|
||||
print(f"\n[INFO] {len(existing_names)} directions existantes")
|
||||
|
||||
# Compter les categories
|
||||
categories = {}
|
||||
for name, config in DIRECTIONS_CONFIG.items():
|
||||
cat = config["category"]
|
||||
categories[cat] = categories.get(cat, 0) + 1
|
||||
|
||||
print("\nDirections par categorie:")
|
||||
for cat, count in sorted(categories.items()):
|
||||
print(f" - {cat}: {count}")
|
||||
|
||||
# Creer les directions manquantes
|
||||
new_directions = [name for name in DIRECTIONS_CONFIG if name not in existing_names]
|
||||
print(f"\n[INFO] {len(new_directions)} nouvelles directions a creer")
|
||||
|
||||
if not new_directions:
|
||||
print("[OK] Toutes les directions existent deja!")
|
||||
return 0
|
||||
|
||||
# Creation
|
||||
print("\n" + "-" * 70)
|
||||
print("CREATION DES DIRECTIONS")
|
||||
print("-" * 70)
|
||||
|
||||
created = 0
|
||||
errors = 0
|
||||
start_time = time.time()
|
||||
|
||||
for i, name in enumerate(new_directions, 1):
|
||||
config = DIRECTIONS_CONFIG[name]
|
||||
|
||||
print(f"\n[{i}/{len(new_directions)}] {name} ({config['category']})")
|
||||
|
||||
try:
|
||||
# Creer le vecteur direction par contraste
|
||||
direction_vector = create_direction_by_contrast(
|
||||
config["positive_examples"],
|
||||
config["negative_examples"],
|
||||
model
|
||||
)
|
||||
|
||||
# Sauvegarder dans Weaviate
|
||||
obj_id = save_direction(name, config, direction_vector)
|
||||
|
||||
if obj_id:
|
||||
print(f" [OK] Cree: {obj_id[:8]}...")
|
||||
created += 1
|
||||
else:
|
||||
print(f" [ERREUR] Echec de sauvegarde")
|
||||
errors += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" [ERREUR] {e}")
|
||||
errors += 1
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Resume
|
||||
print("\n" + "=" * 70)
|
||||
print("RESUME")
|
||||
print("=" * 70)
|
||||
print(f"Directions creees: {created}")
|
||||
print(f"Erreurs: {errors}")
|
||||
print(f"Temps: {elapsed:.1f}s ({elapsed/max(1,created):.1f}s par direction)")
|
||||
|
||||
# Verification finale
|
||||
final_directions = get_all_directions()
|
||||
print(f"\nTotal directions dans Weaviate: {len(final_directions)}")
|
||||
|
||||
# Afficher par categorie
|
||||
final_categories = {}
|
||||
for d in final_directions:
|
||||
cat = d.get("category", "unknown")
|
||||
final_categories[cat] = final_categories.get(cat, 0) + 1
|
||||
|
||||
print("\nDirections par categorie (final):")
|
||||
for cat, count in sorted(final_categories.items()):
|
||||
print(f" - {cat}: {count}")
|
||||
|
||||
return 0 if errors == 0 else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user