- david_profile_declared.json: David's declared profile values from questionnaire - scripts/embed_david.py: Python script to generate embeddings using BGE-M3 model - questionnaire_david.md: Questionnaire template for profile values Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
161 lines
4.7 KiB
Python
161 lines
4.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script pour creer toutes les directions de projection dans Weaviate.
|
|
|
|
Usage:
|
|
python scripts/create_all_directions.py [--reset]
|
|
|
|
Options:
|
|
--reset Supprimer et recreer la collection (attention: perte de donnees!)
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
# Ajouter le parent au path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from projection_directions import (
|
|
DIRECTIONS_CONFIG,
|
|
create_projection_direction_collection,
|
|
delete_projection_direction_collection,
|
|
create_direction_by_contrast,
|
|
save_direction,
|
|
get_all_directions,
|
|
get_existing_classes,
|
|
)
|
|
|
|
|
|
def main():
|
|
reset = "--reset" in sys.argv
|
|
|
|
print("=" * 70)
|
|
print("CREATION DES DIRECTIONS DE PROJECTION")
|
|
print("=" * 70)
|
|
print(f"Total directions configurees: {len(DIRECTIONS_CONFIG)}")
|
|
print()
|
|
|
|
# Verifier Weaviate
|
|
try:
|
|
classes = get_existing_classes()
|
|
print(f"[OK] Weaviate accessible, {len(classes)} classes existantes")
|
|
except Exception as e:
|
|
print(f"[ERREUR] Weaviate non accessible: {e}")
|
|
print("Assurez-vous que Weaviate est en cours d'execution sur localhost:8080")
|
|
return 1
|
|
|
|
# Reset si demande
|
|
if reset:
|
|
print("\n[RESET] Suppression de la collection ProjectionDirection...")
|
|
if delete_projection_direction_collection():
|
|
print("[OK] Collection supprimee")
|
|
else:
|
|
print("[INFO] Collection n'existait pas")
|
|
|
|
# Creer la collection si necessaire
|
|
print("\n[INFO] Creation de la collection ProjectionDirection...")
|
|
if create_projection_direction_collection():
|
|
print("[OK] Collection creee")
|
|
else:
|
|
print("[INFO] Collection existe deja")
|
|
|
|
# Charger le modele d'embedding
|
|
print("\n[INFO] Chargement du modele BGE-M3...")
|
|
try:
|
|
from sentence_transformers import SentenceTransformer
|
|
model = SentenceTransformer('BAAI/bge-m3')
|
|
print("[OK] Modele charge")
|
|
except Exception as e:
|
|
print(f"[ERREUR] Impossible de charger le modele: {e}")
|
|
return 1
|
|
|
|
# Verifier les directions existantes
|
|
existing_directions = get_all_directions()
|
|
existing_names = {d["name"] for d in existing_directions}
|
|
print(f"\n[INFO] {len(existing_names)} directions existantes")
|
|
|
|
# Compter les categories
|
|
categories = {}
|
|
for name, config in DIRECTIONS_CONFIG.items():
|
|
cat = config["category"]
|
|
categories[cat] = categories.get(cat, 0) + 1
|
|
|
|
print("\nDirections par categorie:")
|
|
for cat, count in sorted(categories.items()):
|
|
print(f" - {cat}: {count}")
|
|
|
|
# Creer les directions manquantes
|
|
new_directions = [name for name in DIRECTIONS_CONFIG if name not in existing_names]
|
|
print(f"\n[INFO] {len(new_directions)} nouvelles directions a creer")
|
|
|
|
if not new_directions:
|
|
print("[OK] Toutes les directions existent deja!")
|
|
return 0
|
|
|
|
# Creation
|
|
print("\n" + "-" * 70)
|
|
print("CREATION DES DIRECTIONS")
|
|
print("-" * 70)
|
|
|
|
created = 0
|
|
errors = 0
|
|
start_time = time.time()
|
|
|
|
for i, name in enumerate(new_directions, 1):
|
|
config = DIRECTIONS_CONFIG[name]
|
|
|
|
print(f"\n[{i}/{len(new_directions)}] {name} ({config['category']})")
|
|
|
|
try:
|
|
# Creer le vecteur direction par contraste
|
|
direction_vector = create_direction_by_contrast(
|
|
config["positive_examples"],
|
|
config["negative_examples"],
|
|
model
|
|
)
|
|
|
|
# Sauvegarder dans Weaviate
|
|
obj_id = save_direction(name, config, direction_vector)
|
|
|
|
if obj_id:
|
|
print(f" [OK] Cree: {obj_id[:8]}...")
|
|
created += 1
|
|
else:
|
|
print(f" [ERREUR] Echec de sauvegarde")
|
|
errors += 1
|
|
|
|
except Exception as e:
|
|
print(f" [ERREUR] {e}")
|
|
errors += 1
|
|
|
|
elapsed = time.time() - start_time
|
|
|
|
# Resume
|
|
print("\n" + "=" * 70)
|
|
print("RESUME")
|
|
print("=" * 70)
|
|
print(f"Directions creees: {created}")
|
|
print(f"Erreurs: {errors}")
|
|
print(f"Temps: {elapsed:.1f}s ({elapsed/max(1,created):.1f}s par direction)")
|
|
|
|
# Verification finale
|
|
final_directions = get_all_directions()
|
|
print(f"\nTotal directions dans Weaviate: {len(final_directions)}")
|
|
|
|
# Afficher par categorie
|
|
final_categories = {}
|
|
for d in final_directions:
|
|
cat = d.get("category", "unknown")
|
|
final_categories[cat] = final_categories.get(cat, 0) + 1
|
|
|
|
print("\nDirections par categorie (final):")
|
|
for cat, count in sorted(final_categories.items()):
|
|
print(f" - {cat}: {count}")
|
|
|
|
return 0 if errors == 0 else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|