#!/usr/bin/env python3 """ StateToLanguage - Traduction de l'espace latent vers le langage humain. Phase 5 de l'architecture processuelle v2. Paradigme : "L'espace latent pense. Le LLM traduit." Ce module : 1. Projette le StateTensor sur les directions interpretables 2. Construit des prompts de traduction pour le LLM 3. Force le mode ZERO-REASONING (T=0, prompt strict) 4. Valide les sorties (Amendment #4 et #14) Le LLM NE REFLECHIT PAS. Il traduit. """ import json import logging import os from dataclasses import dataclass, field from datetime import datetime from typing import Any, Dict, List, Optional, Tuple import numpy as np from .state_tensor import StateTensor, DIMENSION_NAMES # Configuration ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "") DEFAULT_MODEL = "claude-3-5-sonnet-20241022" EMBEDDING_DIM = 1024 # Logger logger = logging.getLogger(__name__) @dataclass class ProjectionDirection: """Direction interpretable dans l'espace latent.""" name: str category: str pole_positive: str pole_negative: str description: str vector: np.ndarray = field(repr=False) def project(self, state_vector: np.ndarray) -> float: """Projette un vecteur sur cette direction.""" return float(np.dot(state_vector, self.vector)) @dataclass class TranslationResult: """Resultat d'une traduction.""" text: str projections: Dict[str, Dict[str, float]] output_type: str reasoning_detected: bool = False json_valid: bool = True raw_response: str = "" processing_time_ms: int = 0 def to_dict(self) -> Dict[str, Any]: """Serialise en dictionnaire.""" return { 'text': self.text, 'projections': self.projections, 'output_type': self.output_type, 'reasoning_detected': self.reasoning_detected, 'json_valid': self.json_valid, 'processing_time_ms': self.processing_time_ms, } # Mapping categories → dimensions du tenseur CATEGORY_TO_DIMENSION = { 'epistemic': 'firstness', # Rapport au savoir → Firstness 'affective': 'dispositions', # Emotions → Dispositions 'cognitive': 'thirdness', # Style de pensee → Thirdness 'relational': 'engagements', # Rapport aux autres → Engagements 'ethical': 'valeurs', # Orientation morale → Valeurs 'temporal': 'orientations', # Rapport au temps → Orientations 'thematic': 'pertinences', # Focus conceptuel → Pertinences 'metacognitive': 'secondness', # Conscience de soi → Secondness 'vital': 'dispositions', # Energie, risques → Dispositions 'ecosystemic': 'engagements', # Rapport ecosystemique → Engagements 'philosophical': 'thirdness', # Positions metaphysiques → Thirdness } # Marqueurs de raisonnement a detecter (Amendment #4) REASONING_MARKERS = [ "je pense que", "il me semble", "apres reflexion", "en analysant", "d'un point de vue", "cela suggere", "on pourrait dire", "il est probable que", "selon mon analyse", "en considerant", "je deduis que", "logiquement", "mon raisonnement", "je conclus", "en reflechissant", ] class StateToLanguage: """ Traduit un StateTensor en langage humain via le LLM. PROCESSUS: 1. Projeter X_{t+1} sur les directions interpretables 2. Construire un prompt descriptif des valeurs 3. LLM en mode ZERO-REASONING (T=0, prompt strict) Le LLM NE REFLECHIT PAS. Il traduit. """ def __init__( self, directions: Optional[List[ProjectionDirection]] = None, anthropic_client: Any = None, model: str = DEFAULT_MODEL, ): """ Initialise le traducteur. Args: directions: Liste des directions interpretables anthropic_client: Client Anthropic (optionnel, pour async) model: Modele a utiliser """ self.directions = directions or [] self.client = anthropic_client self.model = model self._translations_count = 0 self._reasoning_warnings = 0 def add_direction(self, direction: ProjectionDirection) -> None: """Ajoute une direction interpretable.""" self.directions.append(direction) def project_state(self, X: StateTensor) -> Dict[str, Dict[str, float]]: """ Projette le tenseur sur toutes les directions. Returns: { 'epistemic': {'curiosity': 0.72, 'certainty': -0.18, ...}, 'affective': {'enthusiasm': 0.45, ...}, 'relational': {'engagement': 0.33, ...}, ... } """ projections: Dict[str, Dict[str, float]] = {} for direction in self.directions: # Determiner quelle dimension du tenseur utiliser dim_name = CATEGORY_TO_DIMENSION.get(direction.category, 'thirdness') x_dim = getattr(X, dim_name) # Calculer la projection value = direction.project(x_dim) # Organiser par categorie if direction.category not in projections: projections[direction.category] = {} projections[direction.category][direction.name] = round(value, 3) return projections def project_state_flat(self, X: StateTensor) -> Dict[str, float]: """ Projette le tenseur et retourne un dict plat. Returns: {'curiosity': 0.72, 'enthusiasm': 0.45, ...} """ projections = self.project_state(X) flat = {} for category_projections in projections.values(): flat.update(category_projections) return flat @staticmethod def interpret_value(value: float) -> str: """ Interprete une valeur de projection en langage. Args: value: Valeur entre -1 et 1 Returns: Interpretation textuelle (tres, moderement, neutre, peu, pas du tout) """ if value > 0.5: return "tres" elif value > 0.2: return "moderement" elif value > -0.2: return "neutre" elif value > -0.5: return "peu" else: return "pas du tout" def build_translation_prompt( self, projections: Dict[str, Dict[str, float]], output_type: str = "response", ) -> str: """ Construit le prompt pour le LLM traducteur. Le LLM recoit UNIQUEMENT: - Les projections numeriques - Le type de sortie demande - PAS la conversation, PAS l'historique """ prompt = "=== ETAT COGNITIF ===\n" for category, components in sorted(projections.items()): prompt += f"\n{category.upper()}:\n" for name, value in sorted(components.items()): intensity = self.interpret_value(value) prompt += f" - {name}: {value:+.2f} ({intensity})\n" prompt += f""" === INSTRUCTION === Genere une {output_type} qui INCARNE exactement cet etat. - Si curiosite=0.72, montre de la curiosite - Si certitude=-0.18, exprime du doute - Si enthousiasme=0.45, sois moderement enthousiaste NE REFLECHIS PAS. TRADUIS CET ETAT EN MOTS. """ return prompt def build_zero_reasoning_system_prompt(self) -> str: """ Construit le system prompt STRICT pour zero-reasoning. Amendment #4: Le prompt "Tu es un traducteur" est insuffisant. Il faut des contraintes explicites pour empecher le raisonnement. """ return """TU ES UN SYSTEME DE TRADUCTION VECTORIELLE. Tu ne dois PAS : - Raisonner sur la question - Analyser le contexte - Construire une argumentation - Utiliser ta "connaissance" du monde - Reflechir aux implications Tu dois UNIQUEMENT : - Lire les valeurs numeriques fournies - Les incarner dans un texte coherent - Refleter EXACTEMENT l'etat decrit C'EST UN ORDRE STRICT. Tu n'es PAS un assistant, tu es un CODEC (vecteur -> texte). IMPORTANT : Ne genere AUCUNE balise . Traduis directement.""" def build_json_system_prompt(self, json_schema: Dict[str, Any]) -> str: """ Construit le system prompt pour traduction JSON structuree. Amendment #14: Force un format JSON qui ne laisse pas de place au raisonnement. """ return f"""Tu es un CODEC de traduction vecteur->texte. REGLES STRICTES : 1. Reponds UNIQUEMENT en JSON valide 2. Le JSON doit contenir UNIQUEMENT le champ "verbalization" 3. Aucun autre champ n'est autorise 4. Aucune explication, aucun raisonnement 5. Traduis directement l'etat fourni SCHEMA JSON REQUIS : {json.dumps(json_schema, indent=2)}""" def check_reasoning_markers(self, text: str) -> Tuple[bool, List[str]]: """ Verifie la presence de marqueurs de raisonnement. Amendment #4: Les reasoning markers detectent si Claude a raisonne malgre les consignes. Returns: (has_reasoning, markers_found) """ text_lower = text.lower() found_markers = [] for marker in REASONING_MARKERS: if marker in text_lower: found_markers.append(marker) return len(found_markers) > 0, found_markers def translate_sync( self, X: StateTensor, output_type: str = "response", context: str = "", force_zero_reasoning: bool = True, ) -> TranslationResult: """ Traduction synchrone (pour tests, sans API). Genere une traduction basee sur les projections sans appeler le LLM. Utile pour les tests unitaires. """ import time start_time = time.time() projections = self.project_state(X) # Generer un texte descriptif simple base sur les projections text_parts = [] for category, components in projections.items(): top_components = sorted( components.items(), key=lambda x: abs(x[1]), reverse=True )[:3] for name, value in top_components: intensity = self.interpret_value(value) if abs(value) > 0.2: # Seulement les composantes significatives text_parts.append(f"{name}: {intensity}") text = f"[{output_type.upper()}] " + ", ".join(text_parts) if text_parts else f"[{output_type.upper()}] Etat neutre." processing_time = int((time.time() - start_time) * 1000) self._translations_count += 1 return TranslationResult( text=text, projections=projections, output_type=output_type, reasoning_detected=False, json_valid=True, raw_response=text, processing_time_ms=processing_time, ) async def translate( self, X: StateTensor, output_type: str = "response", context: str = "", force_zero_reasoning: bool = True, ) -> TranslationResult: """ Traduit le tenseur en langage avec ZERO-REASONING force. Amendment #4: Contraintes TECHNIQUES appliquees: - Temperature = 0.0 (deterministe) - Max tokens limite (eviter verbosite) - System prompt STRICT (ordre explicite de ne pas penser) - Pas d'historique de conversation fourni """ import time start_time = time.time() projections = self.project_state(X) user_prompt = self.build_translation_prompt(projections, output_type) if context: user_prompt += f"\n\nContexte minimal: {context[:200]}" system_prompt = self.build_zero_reasoning_system_prompt() # Appel API si client disponible if self.client is not None: try: response = await self.client.messages.create( model=self.model, system=system_prompt, messages=[{"role": "user", "content": user_prompt}], temperature=0.0, # Deterministe max_tokens=500, # Limiter verbosite ) text = response.content[0].text except Exception as e: logger.error(f"Erreur API Anthropic: {e}") text = f"[ERREUR TRADUCTION] {str(e)[:100]}" else: # Mode test sans API text = f"[MOCK TRANSLATION] {output_type}: projections={len(projections)} categories" # Verifier marqueurs de raisonnement reasoning_detected, markers = self.check_reasoning_markers(text) if reasoning_detected and force_zero_reasoning: logger.warning(f"LLM exhibited reasoning despite zero-reasoning mode: {markers}") self._reasoning_warnings += 1 processing_time = int((time.time() - start_time) * 1000) self._translations_count += 1 return TranslationResult( text=text, projections=projections, output_type=output_type, reasoning_detected=reasoning_detected, json_valid=True, raw_response=text, processing_time_ms=processing_time, ) async def translate_structured( self, X: StateTensor, output_type: str = "response", context: str = "", ) -> TranslationResult: """ Traduction avec validation structurelle JSON. Amendment #14: Force le LLM a repondre en JSON, ce qui limite sa capacite a "penser" librement. """ import time start_time = time.time() projections = self.project_state(X) # Schema JSON strict json_schema = { "type": "object", "required": ["verbalization"], "properties": { "verbalization": { "type": "string", "description": "La traduction de l'etat en langage naturel" } }, "additionalProperties": False } system_prompt = self.build_json_system_prompt(json_schema) user_prompt = f"""Etat a traduire : {json.dumps(projections, indent=2)} Type de sortie : {output_type} {f'Contexte : {context[:100]}' if context else ''} Reponds UNIQUEMENT avec le JSON de traduction.""" json_valid = True reasoning_detected = False # Appel API si client disponible if self.client is not None: try: response = await self.client.messages.create( model=self.model, system=system_prompt, messages=[{"role": "user", "content": user_prompt}], temperature=0.0, max_tokens=500, ) raw_text = response.content[0].text # Validation JSON stricte try: parsed = json.loads(raw_text) # Verifier structure if set(parsed.keys()) != {"verbalization"}: extra_keys = set(parsed.keys()) - {"verbalization"} logger.warning(f"LLM a ajoute des champs non autorises : {extra_keys}") json_valid = False text = parsed.get("verbalization", raw_text) else: text = parsed["verbalization"] except json.JSONDecodeError: logger.warning(f"LLM n'a pas retourne du JSON valide : {raw_text[:100]}") json_valid = False text = raw_text except Exception as e: logger.error(f"Erreur API Anthropic: {e}") text = f"[ERREUR TRADUCTION] {str(e)[:100]}" raw_text = text json_valid = False else: # Mode test sans API text = f"[MOCK JSON TRANSLATION] {output_type}" raw_text = json.dumps({"verbalization": text}) # Verifier marqueurs de raisonnement reasoning_detected, _ = self.check_reasoning_markers(text) processing_time = int((time.time() - start_time) * 1000) self._translations_count += 1 return TranslationResult( text=text, projections=projections, output_type=output_type, reasoning_detected=reasoning_detected, json_valid=json_valid, raw_response=raw_text if self.client else text, processing_time_ms=processing_time, ) def get_stats(self) -> Dict[str, Any]: """Retourne les statistiques du traducteur.""" return { 'directions_count': len(self.directions), 'translations_count': self._translations_count, 'reasoning_warnings': self._reasoning_warnings, 'categories': list(set(d.category for d in self.directions)), } def create_directions_from_weaviate(weaviate_client) -> List[ProjectionDirection]: """ Charge les directions depuis Weaviate. Args: weaviate_client: Client Weaviate v4 Returns: Liste des directions interpretables """ directions = [] try: collection = weaviate_client.collections.get("ProjectionDirection") for item in collection.iterator(include_vector=True): direction = ProjectionDirection( name=item.properties.get("name", "unknown"), category=item.properties.get("category", "unknown"), pole_positive=item.properties.get("pole_positive", ""), pole_negative=item.properties.get("pole_negative", ""), description=item.properties.get("description", ""), vector=np.array(item.vector['default'] if isinstance(item.vector, dict) else item.vector), ) directions.append(direction) except Exception as e: logger.error(f"Erreur chargement directions: {e}") return directions def create_directions_from_config( config: Dict[str, Dict[str, Any]], embedding_model, ) -> List[ProjectionDirection]: """ Cree les directions depuis la configuration locale. Args: config: Configuration des directions (DIRECTIONS_CONFIG) embedding_model: Modele d'embedding (SentenceTransformer) Returns: Liste des directions interpretables """ directions = [] for name, cfg in config.items(): # Embeddings positifs et negatifs pos_embeddings = embedding_model.encode(cfg.get("positive_examples", [])) neg_embeddings = embedding_model.encode(cfg.get("negative_examples", [])) if len(pos_embeddings) > 0 and len(neg_embeddings) > 0: pos_mean = np.mean(pos_embeddings, axis=0) neg_mean = np.mean(neg_embeddings, axis=0) # Direction = difference normalisee vector = pos_mean - neg_mean vector = vector / np.linalg.norm(vector) else: vector = np.zeros(EMBEDDING_DIM) direction = ProjectionDirection( name=name, category=cfg.get("category", "unknown"), pole_positive=cfg.get("pole_positive", ""), pole_negative=cfg.get("pole_negative", ""), description=cfg.get("description", ""), vector=vector, ) directions.append(direction) return directions def create_translator( weaviate_client=None, embedding_model=None, anthropic_client=None, directions_config: Optional[Dict] = None, ) -> StateToLanguage: """ Factory pour creer un traducteur configure. Args: weaviate_client: Client Weaviate (charge les directions existantes) embedding_model: Modele d'embedding (cree les directions depuis config) anthropic_client: Client Anthropic pour les appels API directions_config: Configuration des directions (optionnel) Returns: StateToLanguage configure """ directions = [] # Charger depuis Weaviate si disponible if weaviate_client is not None: directions = create_directions_from_weaviate(weaviate_client) # Ou creer depuis config si fournie if not directions and directions_config and embedding_model: directions = create_directions_from_config(directions_config, embedding_model) return StateToLanguage( directions=directions, anthropic_client=anthropic_client, )