Add Library RAG project and cleanup root directory
- Add complete Library RAG application (Flask + MCP server) - PDF processing pipeline with OCR and LLM extraction - Weaviate vector database integration (BGE-M3 embeddings) - Flask web interface with search and document management - MCP server for Claude Desktop integration - Comprehensive test suite (134 tests) - Clean up root directory - Remove obsolete documentation files - Remove backup and temporary files - Update autonomous agent configuration - Update prompts - Enhance initializer bis prompt with better instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
319
generations/library_rag/utils/llm_chat.py
Normal file
319
generations/library_rag/utils/llm_chat.py
Normal file
@@ -0,0 +1,319 @@
|
||||
"""Multi-LLM Integration Module for Chat Conversation.
|
||||
|
||||
Provides a unified interface for calling different LLM providers with streaming support:
|
||||
- Ollama (local, free)
|
||||
- Mistral API
|
||||
- Anthropic API (Claude)
|
||||
- OpenAI API
|
||||
|
||||
Example:
|
||||
>>> for token in call_llm("Hello world", "ollama", "qwen2.5:7b"):
|
||||
... print(token, end="", flush=True)
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
from typing import Iterator, Optional
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LLMError(Exception):
|
||||
"""Base exception for LLM errors."""
|
||||
pass
|
||||
|
||||
|
||||
def call_llm(
|
||||
prompt: str,
|
||||
provider: str,
|
||||
model: str,
|
||||
stream: bool = True,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 16384,
|
||||
) -> Iterator[str]:
|
||||
"""Call an LLM provider with unified interface.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send to the LLM.
|
||||
provider: Provider name ("ollama", "mistral", "anthropic", "openai").
|
||||
model: Model name (e.g., "qwen2.5:7b", "mistral-small-latest", "claude-sonnet-4-5").
|
||||
stream: Whether to stream tokens (default: True).
|
||||
temperature: Temperature for generation (0-1).
|
||||
max_tokens: Maximum tokens to generate (default 16384 for philosophical discussions).
|
||||
|
||||
Yields:
|
||||
Tokens as strings (when streaming).
|
||||
|
||||
Raises:
|
||||
LLMError: If provider is invalid or API call fails.
|
||||
|
||||
Example:
|
||||
>>> for token in call_llm("Test", "ollama", "qwen2.5:7b"):
|
||||
... print(token, end="")
|
||||
"""
|
||||
provider = provider.lower()
|
||||
|
||||
logger.info(f"[LLM Call] Provider: {provider}, Model: {model}, Stream: {stream}")
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
if provider == "ollama":
|
||||
yield from _call_ollama(prompt, model, temperature, stream)
|
||||
elif provider == "mistral":
|
||||
yield from _call_mistral(prompt, model, temperature, max_tokens, stream)
|
||||
elif provider == "anthropic":
|
||||
yield from _call_anthropic(prompt, model, temperature, max_tokens, stream)
|
||||
elif provider == "openai":
|
||||
yield from _call_openai(prompt, model, temperature, max_tokens, stream)
|
||||
else:
|
||||
raise LLMError(f"Provider '{provider}' non supporté. Utilisez: ollama, mistral, anthropic, openai")
|
||||
|
||||
except Exception as e:
|
||||
elapsed = time.time() - start_time
|
||||
logger.error(f"[LLM Call] Error after {elapsed:.2f}s: {e}")
|
||||
raise
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
logger.info(f"[LLM Call] Completed in {elapsed:.2f}s")
|
||||
|
||||
|
||||
def _call_ollama(prompt: str, model: str, temperature: float, stream: bool) -> Iterator[str]:
|
||||
"""Call Ollama API with streaming support.
|
||||
|
||||
Args:
|
||||
prompt: The prompt text.
|
||||
model: Ollama model name.
|
||||
temperature: Temperature (0-1).
|
||||
stream: Whether to stream.
|
||||
|
||||
Yields:
|
||||
Tokens from the model.
|
||||
"""
|
||||
import requests
|
||||
|
||||
base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
|
||||
url = f"{base_url}/api/generate"
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": stream,
|
||||
"options": {
|
||||
"temperature": temperature,
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=payload, stream=stream, timeout=120)
|
||||
response.raise_for_status()
|
||||
|
||||
if stream:
|
||||
# Stream mode: each line is a JSON object with "response" field
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
try:
|
||||
data = json.loads(line)
|
||||
token = data.get("response", "")
|
||||
if token:
|
||||
yield token
|
||||
|
||||
# Check if done
|
||||
if data.get("done", False):
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
else:
|
||||
# Non-stream mode
|
||||
data = response.json()
|
||||
yield data.get("response", "")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise LLMError(f"Ollama API error: {e}")
|
||||
|
||||
|
||||
def _call_mistral(prompt: str, model: str, temperature: float, max_tokens: int, stream: bool) -> Iterator[str]:
|
||||
"""Call Mistral API with streaming support.
|
||||
|
||||
Args:
|
||||
prompt: The prompt text.
|
||||
model: Mistral model name.
|
||||
temperature: Temperature (0-1).
|
||||
max_tokens: Max tokens to generate.
|
||||
stream: Whether to stream.
|
||||
|
||||
Yields:
|
||||
Tokens from the model.
|
||||
"""
|
||||
api_key = os.getenv("MISTRAL_API_KEY")
|
||||
if not api_key:
|
||||
raise LLMError("MISTRAL_API_KEY not set in environment")
|
||||
|
||||
try:
|
||||
from mistralai import Mistral
|
||||
except ImportError:
|
||||
raise LLMError("mistralai package not installed. Run: pip install mistralai")
|
||||
|
||||
client = Mistral(api_key=api_key)
|
||||
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
|
||||
try:
|
||||
if stream:
|
||||
# Streaming mode
|
||||
stream_response = client.chat.stream(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
|
||||
for chunk in stream_response:
|
||||
if chunk.data.choices:
|
||||
delta = chunk.data.choices[0].delta
|
||||
if hasattr(delta, 'content') and delta.content:
|
||||
yield delta.content
|
||||
else:
|
||||
# Non-streaming mode
|
||||
response = client.chat.complete(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
if response.choices:
|
||||
yield response.choices[0].message.content or ""
|
||||
|
||||
except Exception as e:
|
||||
raise LLMError(f"Mistral API error: {e}")
|
||||
|
||||
|
||||
def _call_anthropic(prompt: str, model: str, temperature: float, max_tokens: int, stream: bool) -> Iterator[str]:
|
||||
"""Call Anthropic API (Claude) with streaming support.
|
||||
|
||||
Args:
|
||||
prompt: The prompt text.
|
||||
model: Claude model name.
|
||||
temperature: Temperature (0-1).
|
||||
max_tokens: Max tokens to generate.
|
||||
stream: Whether to stream.
|
||||
|
||||
Yields:
|
||||
Tokens from the model.
|
||||
"""
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise LLMError("ANTHROPIC_API_KEY not set in environment")
|
||||
|
||||
try:
|
||||
from anthropic import Anthropic
|
||||
except ImportError:
|
||||
raise LLMError("anthropic package not installed. Run: pip install anthropic")
|
||||
|
||||
client = Anthropic(api_key=api_key)
|
||||
|
||||
try:
|
||||
if stream:
|
||||
# Streaming mode
|
||||
with client.messages.stream(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
) as stream:
|
||||
for text in stream.text_stream:
|
||||
yield text
|
||||
else:
|
||||
# Non-streaming mode
|
||||
response = client.messages.create(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
if response.content:
|
||||
yield response.content[0].text
|
||||
|
||||
except Exception as e:
|
||||
raise LLMError(f"Anthropic API error: {e}")
|
||||
|
||||
|
||||
def _call_openai(prompt: str, model: str, temperature: float, max_tokens: int, stream: bool) -> Iterator[str]:
|
||||
"""Call OpenAI API with streaming support.
|
||||
|
||||
Args:
|
||||
prompt: The prompt text.
|
||||
model: OpenAI model name.
|
||||
temperature: Temperature (0-1).
|
||||
max_tokens: Max tokens to generate.
|
||||
stream: Whether to stream.
|
||||
|
||||
Yields:
|
||||
Tokens from the model.
|
||||
"""
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise LLMError("OPENAI_API_KEY not set in environment")
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
raise LLMError("openai package not installed. Run: pip install openai")
|
||||
|
||||
client = OpenAI(api_key=api_key)
|
||||
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
|
||||
# Detect if model uses max_completion_tokens (o1, gpt-5.x) instead of max_tokens
|
||||
uses_completion_tokens = model.startswith("o1") or model.startswith("gpt-5")
|
||||
|
||||
try:
|
||||
if stream:
|
||||
# Streaming mode
|
||||
if uses_completion_tokens:
|
||||
stream_response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
max_completion_tokens=max_tokens,
|
||||
stream=True,
|
||||
)
|
||||
else:
|
||||
stream_response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for chunk in stream_response:
|
||||
if chunk.choices:
|
||||
delta = chunk.choices[0].delta
|
||||
if hasattr(delta, 'content') and delta.content:
|
||||
yield delta.content
|
||||
else:
|
||||
# Non-streaming mode
|
||||
if uses_completion_tokens:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
max_completion_tokens=max_tokens,
|
||||
stream=False,
|
||||
)
|
||||
else:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
stream=False,
|
||||
)
|
||||
if response.choices:
|
||||
yield response.choices[0].message.content or ""
|
||||
|
||||
except Exception as e:
|
||||
raise LLMError(f"OpenAI API error: {e}")
|
||||
Reference in New Issue
Block a user