diff --git a/generations/library_rag/utils/llm_chat.py b/generations/library_rag/utils/llm_chat.py index 7741fce..c9eabf9 100644 --- a/generations/library_rag/utils/llm_chat.py +++ b/generations/library_rag/utils/llm_chat.py @@ -56,6 +56,9 @@ def call_llm( >>> for token in call_llm("Test", "ollama", "qwen2.5:7b"): ... print(token, end="") """ + if not provider: + raise LLMError("Provider cannot be None or empty") + provider = provider.lower() logger.info(f"[LLM Call] Provider: {provider}, Model: {model}, Stream: {stream}") diff --git a/generations/library_rag/utils/llm_classifier.py b/generations/library_rag/utils/llm_classifier.py index b4e448b..720720b 100644 --- a/generations/library_rag/utils/llm_classifier.py +++ b/generations/library_rag/utils/llm_classifier.py @@ -351,8 +351,8 @@ def is_excluded_section(section: dict[str, Any]) -> bool: >>> is_excluded_section({"title": "Introduction", "content": "..."}) False """ - title: str = section.get("title", "").lower().strip() - chapter_title: str = section.get("chapterTitle", "").lower().strip() + title: str = (section.get("title") or "").lower().strip() + chapter_title: str = (section.get("chapterTitle") or "").lower().strip() # Vérifier le titre de la section for excluded in EXCLUDED_SECTION_TITLES: @@ -454,7 +454,7 @@ def filter_indexable_sections(sections: list[dict[str, Any]]) -> list[dict[str, continue # Vérifier si le chapitre parent est une TOC - chapter_title: str = s.get("chapterTitle", "").lower().strip() + chapter_title: str = (s.get("chapterTitle") or "").lower().strip() if any(excluded in chapter_title for excluded in EXCLUDED_SECTION_TITLES): logger.info(f"Section exclue (chapitre TOC): '{s.get('title', 'Sans titre')}' dans '{chapter_title}'") excluded_count += 1 @@ -497,8 +497,8 @@ def validate_classified_sections(sections: list[dict[str, Any]]) -> list[dict[st for section in sections: # Vérifier d'abord si le titre du chapitre parent est une TOC - chapter_title: str = section.get("chapter_title", "").lower().strip() - section_title: str = section.get("title", "").lower().strip() + chapter_title: str = (section.get("chapter_title") or "").lower().strip() + section_title: str = (section.get("title") or "").lower().strip() # Exclure si le chapitre parent est une TOC is_toc_chapter: bool = False diff --git a/generations/library_rag/utils/llm_validator.py b/generations/library_rag/utils/llm_validator.py index fff0991..a1b290b 100644 --- a/generations/library_rag/utils/llm_validator.py +++ b/generations/library_rag/utils/llm_validator.py @@ -495,7 +495,7 @@ def apply_corrections( title: str = metadata["title"] # Si le titre contient des phrases de validation, utiliser le champ "work" à la place validation_phrases: List[str] = ["à confirmer", "confirmer avec", "vérifier"] - if any(phrase in title.lower() for phrase in validation_phrases): + if title and any(phrase in title.lower() for phrase in validation_phrases): if "work" in metadata and metadata["work"]: logger.info(f"Titre remplacé par 'work': '{title}' -> '{metadata['work']}'") metadata["original_title"] = title