From 4823fd1b10a28c1a6acb5006cabbfb6ce1111292 Mon Sep 17 00:00:00 2001
From: David Blanc Brioir <davidblancbrioir@gmail.com>
Date: Tue, 30 Dec 2025 22:26:29 +0100
Subject: [PATCH] Fix: Gestion robuste des valeurs None dans .lower()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problème:
- AttributeError: 'NoneType' object has no attribute 'lower'
- Se produisait quand section.get("title") retournait None au lieu de ""

Corrections:
- llm_classifier.py:
  * is_excluded_section(): (section.get("title") or "").lower()
  * filter_indexable_sections(): (s.get("chapterTitle") or "").lower()
  * validate_classified_sections(): Idem pour chapter_title et section_title

- llm_validator.py:
  * apply_corrections(): Ajout de vérification "if title and ..."

- llm_chat.py:
  * call_llm(): Ajout d'une exception si provider est None/vide

Pattern de correction:
  AVANT: section.get("title", "").lower()  # Échoue si None
  APRÈS: (section.get("title") or "").lower()  # Sûr avec None

Raison:
.get(key, default) retourne le default SEULEMENT si la clé n'existe pas.
Si la clé existe avec valeur None, .get() retourne None, pas le default!

Donc: {"title": None}.get("title", "") -> None (pas "")

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 generations/library_rag/utils/llm_chat.py       |  3 +++
 generations/library_rag/utils/llm_classifier.py | 10 +++++-----
 generations/library_rag/utils/llm_validator.py  |  2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/generations/library_rag/utils/llm_chat.py b/generations/library_rag/utils/llm_chat.py
index 7741fce..c9eabf9 100644
--- a/generations/library_rag/utils/llm_chat.py
+++ b/generations/library_rag/utils/llm_chat.py
@@ -56,6 +56,9 @@ def call_llm(
         >>> for token in call_llm("Test", "ollama", "qwen2.5:7b"):
         ...     print(token, end="")
     """
+    if not provider:
+        raise LLMError("Provider cannot be None or empty")
+
     provider = provider.lower()
 
     logger.info(f"[LLM Call] Provider: {provider}, Model: {model}, Stream: {stream}")
diff --git a/generations/library_rag/utils/llm_classifier.py b/generations/library_rag/utils/llm_classifier.py
index b4e448b..720720b 100644
--- a/generations/library_rag/utils/llm_classifier.py
+++ b/generations/library_rag/utils/llm_classifier.py
@@ -351,8 +351,8 @@ def is_excluded_section(section: dict[str, Any]) -> bool:
         >>> is_excluded_section({"title": "Introduction", "content": "..."})
         False
     """
-    title: str = section.get("title", "").lower().strip()
-    chapter_title: str = section.get("chapterTitle", "").lower().strip()
+    title: str = (section.get("title") or "").lower().strip()
+    chapter_title: str = (section.get("chapterTitle") or "").lower().strip()
 
     # Vérifier le titre de la section
     for excluded in EXCLUDED_SECTION_TITLES:
@@ -454,7 +454,7 @@ def filter_indexable_sections(sections: list[dict[str, Any]]) -> list[dict[str,
             continue
 
         # Vérifier si le chapitre parent est une TOC
-        chapter_title: str = s.get("chapterTitle", "").lower().strip()
+        chapter_title: str = (s.get("chapterTitle") or "").lower().strip()
         if any(excluded in chapter_title for excluded in EXCLUDED_SECTION_TITLES):
             logger.info(f"Section exclue (chapitre TOC): '{s.get('title', 'Sans titre')}' dans '{chapter_title}'")
             excluded_count += 1
@@ -497,8 +497,8 @@ def validate_classified_sections(sections: list[dict[str, Any]]) -> list[dict[st
 
     for section in sections:
         # Vérifier d'abord si le titre du chapitre parent est une TOC
-        chapter_title: str = section.get("chapter_title", "").lower().strip()
-        section_title: str = section.get("title", "").lower().strip()
+        chapter_title: str = (section.get("chapter_title") or "").lower().strip()
+        section_title: str = (section.get("title") or "").lower().strip()
 
         # Exclure si le chapitre parent est une TOC
         is_toc_chapter: bool = False
diff --git a/generations/library_rag/utils/llm_validator.py b/generations/library_rag/utils/llm_validator.py
index fff0991..a1b290b 100644
--- a/generations/library_rag/utils/llm_validator.py
+++ b/generations/library_rag/utils/llm_validator.py
@@ -495,7 +495,7 @@ def apply_corrections(
         title: str = metadata["title"]
         # Si le titre contient des phrases de validation, utiliser le champ "work" à la place
         validation_phrases: List[str] = ["à confirmer", "confirmer avec", "vérifier"]
-        if any(phrase in title.lower() for phrase in validation_phrases):
+        if title and any(phrase in title.lower() for phrase in validation_phrases):
             if "work" in metadata and metadata["work"]:
                 logger.info(f"Titre remplacé par 'work': '{title}' -> '{metadata['work']}'")
                 metadata["original_title"] = title