Add Ikario Architecture v2 - Phases 1-8 complete

Implements the processual architecture based on Whitehead's Process Philosophy and Peirce's Semiotics. Core paradigm: "L'espace latent pense. Le LLM traduit." (The latent space thinks. The LLM translates.) Phase 1-4: Core semiotic cycle - StateTensor 8x1024 (8 Peircean dimensions) - Dissonance computation with hard negatives - Fixation via 4 Peircean methods (Tenacity, Authority, A Priori, Science) - LatentEngine orchestrating the full cycle Phase 5: StateToLanguage - LLM as pure translator (zero-reasoning, T=0) - Projection on interpretable directions - Reasoning markers detection (Amendment #4) Phase 6: Vigilance - x_ref (David) as guard-rail, NOT attractor - Drift detection per dimension and globally - Alerts: ok, warning, critical Phase 7: Autonomous Daemon - Two modes: CONVERSATION (always verbalize), AUTONOMOUS (~1000 cycles/day) - Amendment #5: 50% probability on unresolved impacts - TriggerGenerator with weighted random selection Phase 8: Integration & Metrics - ProcessMetrics for daily/weekly reports - Health status monitoring - Integration tests validating all modules 297 tests passing, version 0.7.0 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 21:18:40 +01:00
parent 9c2145bcf2
commit f6fe71e2f7
19 changed files with 9887 additions and 9 deletions
--- a/ikario_processual/contradiction_detector.py
+++ b/ikario_processual/contradiction_detector.py
@@ -0,0 +1,350 @@
+#!/usr/bin/env python3
+"""
+Détecteur de Contradictions par NLI (Natural Language Inference).
+
+AMENDEMENT #8 : Détection fiable des hard negatives.
+
+Le problème avec la détection par seuil de similarité :
+- "L'IA a une conscience" vs "L'IA n'a pas de conscience"
+- Similarité cosine ~0.7 (haute !)
+- Mais ce sont des contradictions sémantiques
+
+Solution : Utiliser un modèle NLI pré-entraîné.
+- Modèle : facebook/bart-large-mnli (ou cross-encoder/nli-deberta-v3-base)
+- Classes : entailment, neutral, contradiction
+"""
+
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Any, Dict
+import numpy as np
+
+# Lazy import pour éviter de charger le modèle si non utilisé
+_classifier = None
+_model_name = None
+
+
+def get_nli_classifier(model_name: str = "facebook/bart-large-mnli"):
+    """
+    Lazy loader pour le classifieur NLI.
+
+    Utilise transformers pipeline (zero-shot classification).
+    """
+    global _classifier, _model_name
+
+    if _classifier is not None and _model_name == model_name:
+        return _classifier
+
+    try:
+        from transformers import pipeline
+        _classifier = pipeline(
+            "zero-shot-classification",
+            model=model_name,
+            device=-1  # CPU, utiliser 0 pour GPU
+        )
+        _model_name = model_name
+        return _classifier
+    except ImportError:
+        raise ImportError(
+            "transformers non installé. "
+            "Installez avec: pip install transformers torch"
+        )
+    except Exception as e:
+        raise RuntimeError(f"Erreur chargement modèle NLI: {e}")
+
+
+@dataclass
+class ContradictionResult:
+    """Résultat de la détection de contradiction."""
+    is_contradiction: bool
+    confidence: float
+    entailment_score: float
+    neutral_score: float
+    contradiction_score: float
+    text1: str
+    text2: str
+
+
+class ContradictionDetector:
+    """
+    Détecteur de contradictions sémantiques via NLI.
+
+    Usage:
+        detector = ContradictionDetector()
+        result = detector.detect("L'IA a une conscience", "L'IA n'a pas de conscience")
+        print(result.is_contradiction)  # True
+    """
+
+    def __init__(
+        self,
+        model_name: str = "facebook/bart-large-mnli",
+        contradiction_threshold: float = 0.5,
+        lazy_load: bool = True
+    ):
+        """
+        Args:
+            model_name: Nom du modèle HuggingFace NLI
+            contradiction_threshold: Seuil pour déclarer contradiction
+            lazy_load: Si True, charge le modèle à la première utilisation
+        """
+        self.model_name = model_name
+        self.contradiction_threshold = contradiction_threshold
+        self._classifier = None
+
+        if not lazy_load:
+            self._load_model()
+
+    def _load_model(self):
+        """Charge le modèle NLI."""
+        if self._classifier is None:
+            self._classifier = get_nli_classifier(self.model_name)
+
+    def detect_contradiction(
+        self,
+        premise: str,
+        hypothesis: str
+    ) -> Tuple[bool, float]:
+        """
+        Vérifie si deux textes sont en contradiction.
+
+        Args:
+            premise: Premier texte (la "vérité" de référence)
+            hypothesis: Second texte (ce qui est testé)
+
+        Returns:
+            (is_contradiction, confidence_score)
+        """
+        self._load_model()
+
+        # Construire l'entrée pour NLI
+        # Format: "premise" + " " + "hypothesis"
+        # Le classifieur évalue si hypothesis est impliqué/neutre/contredit par premise
+
+        result = self._classifier(
+            premise,
+            candidate_labels=["entailment", "neutral", "contradiction"],
+            hypothesis_template="{}",  # hypothesis brut
+            multi_label=False
+        )
+
+        # Extraire les scores
+        labels = result['labels']
+        scores = result['scores']
+
+        score_dict = dict(zip(labels, scores))
+        contradiction_score = score_dict.get('contradiction', 0.0)
+
+        is_contradiction = contradiction_score > self.contradiction_threshold
+
+        return (is_contradiction, contradiction_score)
+
+    def detect(self, text1: str, text2: str) -> ContradictionResult:
+        """
+        Détection complète avec tous les scores.
+
+        Args:
+            text1: Premier texte
+            text2: Second texte
+
+        Returns:
+            ContradictionResult avec tous les détails
+        """
+        self._load_model()
+
+        result = self._classifier(
+            text1,
+            candidate_labels=["entailment", "neutral", "contradiction"],
+            hypothesis_template="{}",
+        )
+
+        labels = result['labels']
+        scores = result['scores']
+        score_dict = dict(zip(labels, scores))
+
+        contradiction_score = score_dict.get('contradiction', 0.0)
+
+        return ContradictionResult(
+            is_contradiction=contradiction_score > self.contradiction_threshold,
+            confidence=contradiction_score,
+            entailment_score=score_dict.get('entailment', 0.0),
+            neutral_score=score_dict.get('neutral', 0.0),
+            contradiction_score=contradiction_score,
+            text1=text1[:200],
+            text2=text2[:200],
+        )
+
+    def detect_batch(
+        self,
+        premise: str,
+        hypotheses: List[str]
+    ) -> List[ContradictionResult]:
+        """
+        Détecte les contradictions pour plusieurs hypothèses.
+
+        Args:
+            premise: Texte de référence
+            hypotheses: Liste de textes à tester
+
+        Returns:
+            Liste de ContradictionResult
+        """
+        return [self.detect(premise, h) for h in hypotheses]
+
+
+class HybridContradictionDetector:
+    """
+    Détecteur hybride : cosine + NLI.
+
+    Combine la similarité cosine (rapide) et NLI (précis).
+
+    Logique:
+    1. Si similarité < 0.1 → hard negative certain
+    2. Si similarité > 0.7 → probablement OK (sauf si NLI dit contradiction)
+    3. Si 0.1 <= similarité <= 0.7 → utiliser NLI pour trancher
+    """
+
+    def __init__(
+        self,
+        nli_detector: Optional[ContradictionDetector] = None,
+        low_sim_threshold: float = 0.1,
+        high_sim_threshold: float = 0.7,
+        nli_threshold: float = 0.5
+    ):
+        """
+        Args:
+            nli_detector: Détecteur NLI (créé si None)
+            low_sim_threshold: En dessous = contradiction certaine
+            high_sim_threshold: Au dessus = vérifier avec NLI seulement si score > 0.8
+            nli_threshold: Seuil NLI pour contradiction
+        """
+        self.nli_detector = nli_detector
+        self.low_sim_threshold = low_sim_threshold
+        self.high_sim_threshold = high_sim_threshold
+        self.nli_threshold = nli_threshold
+
+    def _get_nli_detector(self) -> ContradictionDetector:
+        """Lazy load du détecteur NLI."""
+        if self.nli_detector is None:
+            self.nli_detector = ContradictionDetector(
+                contradiction_threshold=self.nli_threshold
+            )
+        return self.nli_detector
+
+    def detect(
+        self,
+        input_text: str,
+        input_vector: np.ndarray,
+        candidate_text: str,
+        candidate_vector: np.ndarray
+    ) -> Dict[str, Any]:
+        """
+        Détecte si input contredit candidate.
+
+        Args:
+            input_text: Texte de l'entrée
+            input_vector: Vecteur de l'entrée (1024-dim)
+            candidate_text: Texte du candidat (corpus)
+            candidate_vector: Vecteur du candidat
+
+        Returns:
+            Dict avec is_hard_negative, similarity, nli_score, method
+        """
+        # Étape 1 : Similarité cosine
+        norm1 = np.linalg.norm(input_vector)
+        norm2 = np.linalg.norm(candidate_vector)
+
+        if norm1 == 0 or norm2 == 0:
+            similarity = 0.0
+        else:
+            similarity = float(np.dot(input_vector, candidate_vector) / (norm1 * norm2))
+
+        result = {
+            'similarity': similarity,
+            'is_hard_negative': False,
+            'nli_score': None,
+            'method': 'cosine_only',
+        }
+
+        # Étape 2 : Décision basée sur similarité
+        if similarity < self.low_sim_threshold:
+            # Très différent → hard negative certain
+            result['is_hard_negative'] = True
+            result['method'] = 'low_similarity'
+            return result
+
+        if similarity > self.high_sim_threshold:
+            # Très similaire → probablement pas contradiction
+            # Mais on peut quand même vérifier avec NLI si les textes sont fournis
+            if input_text and candidate_text:
+                nli = self._get_nli_detector()
+                is_contradiction, score = nli.detect_contradiction(
+                    input_text, candidate_text
+                )
+                result['nli_score'] = score
+                if is_contradiction and score > 0.8:  # Seuil élevé car très similaire
+                    result['is_hard_negative'] = True
+                    result['method'] = 'nli_high_confidence'
+            return result
+
+        # Étape 3 : Zone grise (0.1-0.7) → utiliser NLI
+        if input_text and candidate_text:
+            nli = self._get_nli_detector()
+            is_contradiction, score = nli.detect_contradiction(
+                input_text, candidate_text
+            )
+            result['nli_score'] = score
+            result['is_hard_negative'] = is_contradiction
+            result['method'] = 'nli_zone_grise'
+        else:
+            # Pas de texte disponible → fallback sur similarité
+            result['is_hard_negative'] = similarity < 0.3
+            result['method'] = 'cosine_fallback'
+
+        return result
+
+
+# ============================================================================
+# CONVENIENCE FUNCTIONS
+# ============================================================================
+
+def is_contradiction(text1: str, text2: str, threshold: float = 0.5) -> bool:
+    """
+    Fonction utilitaire simple pour vérifier une contradiction.
+
+    Args:
+        text1: Premier texte
+        text2: Second texte
+        threshold: Seuil de confiance
+
+    Returns:
+        True si contradiction détectée
+    """
+    detector = ContradictionDetector(contradiction_threshold=threshold)
+    is_contra, _ = detector.detect_contradiction(text1, text2)
+    return is_contra
+
+
+def find_contradictions(
+    reference: str,
+    candidates: List[str],
+    threshold: float = 0.5
+) -> List[Tuple[str, float]]:
+    """
+    Trouve les contradictions dans une liste de candidats.
+
+    Args:
+        reference: Texte de référence
+        candidates: Liste de textes à vérifier
+        threshold: Seuil de confiance
+
+    Returns:
+        Liste de (texte, score) pour les contradictions détectées
+    """
+    detector = ContradictionDetector(contradiction_threshold=threshold)
+    results = []
+
+    for candidate in candidates:
+        is_contra, score = detector.detect_contradiction(reference, candidate)
+        if is_contra:
+            results.append((candidate, score))
+
+    return sorted(results, key=lambda x: x[1], reverse=True)