feat: Add multi-file batch upload with sequential processing

Implements comprehensive batch upload system with real-time progress tracking:

Backend Infrastructure:
- Add batch_jobs global dict for batch orchestration
- Add BatchFileInfo and BatchJob TypedDicts to utils/types.py
- Create run_batch_sequential() worker function with thread.join() synchronization
- Modify /upload POST route to detect single vs multi-file uploads
- Add 3 batch API routes: /upload/batch/progress, /status, /result
- Add timestamp_to_date Jinja2 template filter

Frontend:
- Update upload.html with 'multiple' attribute and file counter
- Create upload_batch_progress.html: Real-time dashboard with SSE per file
- Create upload_batch_result.html: Final summary with statistics

Architecture:
- Backward compatible: single-file upload unchanged
- Sequential processing: one file after another (respects API limits)
- N parallel SSE connections: one per file for real-time progress
- Polling mechanism to discover job IDs as files start processing
- 1-hour timeout per file with error handling and continuation

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-08 22:41:52 +01:00
parent 7a7a2b8e19
commit b70b796ef8
5 changed files with 819 additions and 37 deletions

View File

@@ -74,7 +74,7 @@ import threading
import queue
import time
from pathlib import Path
from typing import Any, Dict, Generator, Iterator, List, Optional, Union
from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Union
from flask import Flask, render_template, request, jsonify, redirect, url_for, send_from_directory, Response, flash
from contextlib import contextmanager
@@ -123,6 +123,32 @@ chat_sessions: Dict[str, Dict[str, Any]] = {} # {session_id: {"status": str, "q
# Stockage des jobs TTS en cours
tts_jobs: Dict[str, Dict[str, Any]] = {} # {job_id: {"status": str, "filepath": Path, "error": str}}
# Stockage des batch jobs (upload multiple)
batch_jobs: Dict[str, Dict[str, Any]] = {} # {batch_id: BatchJob dict}
# ═══════════════════════════════════════════════════════════════════════════════
# Template Filters
# ═══════════════════════════════════════════════════════════════════════════════
@app.template_filter("timestamp_to_date")
def timestamp_to_date(timestamp: float) -> str:
"""Convert Unix timestamp to formatted date string.
Args:
timestamp: Unix timestamp (seconds since epoch).
Returns:
Formatted date string (e.g., "15 janvier 2026 à 14:30").
"""
from datetime import datetime
if not timestamp:
return ""
try:
dt = datetime.fromtimestamp(timestamp)
return dt.strftime("%d %B %Y à %H:%M")
except (ValueError, OSError):
return ""
# ═══════════════════════════════════════════════════════════════════════════════
# Weaviate Connection
# ═══════════════════════════════════════════════════════════════════════════════
@@ -2613,6 +2639,88 @@ def run_processing_job(
q.put(exception_event)
def run_batch_sequential(
batch_id: str,
files: List[Tuple[bytes, str, int]],
options: ProcessingOptions,
) -> None:
"""Execute batch processing of multiple PDFs sequentially.
This function processes files ONE BY ONE (not in parallel) to respect
API rate limits and provide clear progress tracking. Each file creates
an individual processing job that is tracked separately.
Args:
batch_id: Unique identifier for this batch job.
files: List of tuples (file_bytes, filename, size_bytes).
options: Processing options applied to all files.
"""
import time
import threading
batch: Dict[str, Any] = batch_jobs[batch_id]
for idx, (file_bytes, filename, size_bytes) in enumerate(files):
# 1. Create individual job (reuses existing infrastructure)
job_id = str(uuid.uuid4())
processing_jobs[job_id] = {
"status": "processing",
"queue": queue.Queue(),
"result": None,
"filename": filename,
"batch_id": batch_id, # New field to link back to batch
}
# 2. Update batch state
batch["files"][idx]["job_id"] = job_id
batch["files"][idx]["status"] = "processing"
batch["current_job_id"] = job_id
# 3. Launch processing thread (uses existing function)
thread = threading.Thread(
target=run_processing_job,
args=(job_id, file_bytes, filename, options),
daemon=True
)
thread.start()
# 4. WAIT for completion with 1-hour timeout
thread.join(timeout=3600)
# 5. Check result and update batch
job = processing_jobs[job_id]
if thread.is_alive():
# Thread still running after timeout
batch["failed_files"] += 1
batch["files"][idx]["status"] = "error"
batch["files"][idx]["error"] = "Timeout (> 1 heure)"
continue
if job["status"] == "complete":
batch["completed_files"] += 1
batch["files"][idx]["status"] = "complete"
else:
batch["failed_files"] += 1
batch["files"][idx]["status"] = "error"
error_msg = job.get("result", {}).get("error", "Erreur inconnue") if job.get("result") else "Erreur inconnue"
batch["files"][idx]["error"] = error_msg
# Clear current job before next iteration
batch["current_job_id"] = None
# Small delay between files (optional)
time.sleep(1)
# Mark batch as complete
if batch["failed_files"] == 0:
batch["status"] = "complete"
elif batch["completed_files"] == 0:
batch["status"] = "error"
else:
batch["status"] = "partial"
def run_word_processing_job(
job_id: str,
file_bytes: bytes,
@@ -2728,17 +2836,22 @@ def upload() -> str:
if request.method == "GET":
return render_template("upload.html")
# POST: traiter le fichier
# POST: traiter le(s) fichier(s)
if "file" not in request.files:
return render_template("upload.html", error="Aucun fichier sélectionné")
file = request.files["file"]
# Récupérer tous les fichiers (support single + multiple)
files = request.files.getlist("file")
if not file.filename or file.filename == "":
if not files or len(files) == 0:
return render_template("upload.html", error="Aucun fichier sélectionné")
# Valider tous les fichiers
for file in files:
if not file.filename or file.filename == "":
return render_template("upload.html", error="Un des fichiers est vide")
if not allowed_file(file.filename):
return render_template("upload.html", error="Format non supporté. Utilisez un fichier PDF (.pdf) ou Word (.docx).")
return render_template("upload.html", error=f"Format non supporté pour {file.filename}. Utilisez PDF (.pdf) ou Word (.docx).")
# Options de traitement
llm_provider: str = request.form.get("llm_provider", "mistral")
@@ -2754,6 +2867,12 @@ def upload() -> str:
"max_toc_pages": int(request.form.get("max_toc_pages", "8")),
}
# ═════════════════════════════════════════════════════════════════════════
# SINGLE FILE UPLOAD (existing behavior, backward compatible)
# ═════════════════════════════════════════════════════════════════════════
if len(files) == 1:
file = files[0]
# Lire le fichier
filename: str = secure_filename(file.filename)
file_bytes: bytes = file.read()
@@ -2787,9 +2906,161 @@ def upload() -> str:
thread.start()
# Afficher la page de progression
file_type_label: str = "Word" if is_word_document else "PDF"
return render_template("upload_progress.html", job_id=job_id, filename=filename)
# ═════════════════════════════════════════════════════════════════════════
# MULTI-FILE BATCH UPLOAD (new feature)
# ═════════════════════════════════════════════════════════════════════════
else:
import time
from utils.types import BatchFileInfo
# Créer un batch ID
batch_id: str = str(uuid.uuid4())
# Lire tous les fichiers et créer les structures
files_data: List[Tuple[bytes, str, int]] = []
batch_files: List[BatchFileInfo] = []
for file in files:
filename_secure: str = secure_filename(file.filename)
file_bytes_data: bytes = file.read()
size_bytes: int = len(file_bytes_data)
files_data.append((file_bytes_data, filename_secure, size_bytes))
batch_files.append({
"filename": filename_secure,
"job_id": None, # Will be assigned during processing
"status": "pending",
"error": None,
"size_bytes": size_bytes,
})
# Créer le batch job
batch_jobs[batch_id] = {
"job_ids": [],
"files": batch_files,
"total_files": len(files),
"completed_files": 0,
"failed_files": 0,
"status": "processing",
"current_job_id": None,
"options": options,
"created_at": time.time(),
}
# Lancer le thread de traitement séquentiel
batch_thread: threading.Thread = threading.Thread(
target=run_batch_sequential,
args=(batch_id, files_data, options),
daemon=True
)
batch_thread.start()
# Rediriger vers la page de progression batch
return redirect(url_for("upload_batch_progress", batch_id=batch_id))
@app.route("/upload/batch/progress/<batch_id>")
def upload_batch_progress(batch_id: str) -> str:
"""Display batch processing progress dashboard.
Shows a table with all files in the batch and their processing status.
Uses polling to discover job IDs and opens SSE connections for each file.
Args:
batch_id: Unique identifier for the batch job.
Returns:
Rendered batch progress template with batch info.
"""
if batch_id not in batch_jobs:
return render_template("upload.html", error="Batch non trouvé")
batch: Dict[str, Any] = batch_jobs[batch_id]
return render_template(
"upload_batch_progress.html",
batch_id=batch_id,
batch=batch,
files=batch["files"],
total_files=batch["total_files"],
)
@app.route("/upload/batch/status/<batch_id>")
def upload_batch_status(batch_id: str) -> Response:
"""API endpoint for batch status polling.
Returns JSON with current batch status and file information.
Used by the frontend to discover job IDs as files start processing.
Args:
batch_id: Unique identifier for the batch job.
Returns:
JSON response with batch status.
"""
if batch_id not in batch_jobs:
return jsonify({"error": "Batch non trouvé"}), 404
batch: Dict[str, Any] = batch_jobs[batch_id]
return jsonify({
"batch_id": batch_id,
"status": batch["status"],
"total_files": batch["total_files"],
"completed_files": batch["completed_files"],
"failed_files": batch["failed_files"],
"current_job_id": batch["current_job_id"],
"files": batch["files"],
})
@app.route("/upload/batch/result/<batch_id>")
def upload_batch_result(batch_id: str) -> str:
"""Display batch processing results summary.
Shows final statistics and links to successfully processed documents.
Args:
batch_id: Unique identifier for the batch job.
Returns:
Rendered batch result template with summary.
"""
if batch_id not in batch_jobs:
return render_template("upload.html", error="Batch non trouvé")
batch: Dict[str, Any] = batch_jobs[batch_id]
# Build results with document names for completed files
results: List[Dict[str, Any]] = []
for file_info in batch["files"]:
result_data: Dict[str, Any] = {
"filename": file_info["filename"],
"status": file_info["status"],
"error": file_info.get("error"),
"document_name": None,
}
# Get document name from job result if successful
if file_info["status"] == "complete" and file_info.get("job_id"):
job_id = file_info["job_id"]
if job_id in processing_jobs:
job = processing_jobs[job_id]
if job.get("result") and job["result"].get("document_name"):
result_data["document_name"] = job["result"]["document_name"]
results.append(result_data)
return render_template(
"upload_batch_result.html",
batch_id=batch_id,
batch=batch,
results=results,
)
@app.route("/upload/progress/<job_id>")
def upload_progress(job_id: str) -> Response:

View File

@@ -17,7 +17,7 @@
<form method="post" enctype="multipart/form-data">
<!-- Sélection du fichier -->
<div class="form-group">
<label class="form-label" for="file">📎 Sélectionnez votre fichier</label>
<label class="form-label" for="file">📎 Sélectionnez votre fichier (ou plusieurs)</label>
<input
type="file"
name="file"
@@ -25,9 +25,13 @@
class="form-control"
accept=".pdf,.docx,.md"
required
multiple
onchange="updateOptionsForFileType()"
>
<div class="caption mt-1">Formats acceptés : PDF (.pdf), Word (.docx) ou Markdown (.md) • Max 50 MB</div>
<div class="caption mt-1">
Formats acceptés : PDF (.pdf), Word (.docx) ou Markdown (.md) • Max 50 MB par fichier<br>
<span id="file-count-info" style="font-weight: 600; color: #2196F3;"></span>
</div>
</div>
<!-- Configuration recommandée (par défaut) -->
@@ -217,6 +221,18 @@ function updateModelOptions() {
function updateOptionsForFileType() {
const fileInput = document.getElementById('file');
const fileCountInfo = document.getElementById('file-count-info');
const fileCount = fileInput.files.length;
// Update file count display
if (fileCount === 0) {
fileCountInfo.textContent = '';
} else if (fileCount === 1) {
fileCountInfo.textContent = '';
} else {
fileCountInfo.textContent = `📦 ${fileCount} fichiers sélectionnés (traitement séquentiel : un fichier après l'autre)`;
}
const fileName = fileInput.files[0]?.name || '';
const isWord = fileName.toLowerCase().endsWith('.docx');
const isPDF = fileName.toLowerCase().endsWith('.pdf');
@@ -238,7 +254,7 @@ function updateOptionsForFileType() {
wordPipelineInfo.style.display = 'none';
markdownPipelineInfo.style.display = 'none';
// Afficher selon le type
// Afficher selon le type (basé sur le premier fichier)
if (isWord) {
wordInfo.style.display = 'block';
wordPipelineInfo.style.display = 'block';

View File

@@ -0,0 +1,284 @@
{% extends "base.html" %}
{% block title %}Traitement Batch en cours{% endblock %}
{% block content %}
<section class="section">
<h1>📦 Traitement de {{ total_files }} document(s)</h1>
<p class="lead">Suivi en temps réel du traitement séquentiel</p>
<!-- Progression globale -->
<div class="card" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; margin-bottom: 2rem;">
<h3 style="margin-bottom: 1rem; color: white;">📊 Progression Globale</h3>
<div style="display: flex; gap: 2rem; align-items: center; flex-wrap: wrap;">
<div style="flex: 1; min-width: 200px;">
<div style="font-size: 2.5rem; font-weight: bold;" id="batch-completed-count">0</div>
<div style="opacity: 0.9;">Fichiers complétés</div>
</div>
<div style="flex: 1; min-width: 200px;">
<div style="font-size: 2.5rem; font-weight: bold;" id="batch-failed-count">0</div>
<div style="opacity: 0.9;">Fichiers en erreur</div>
</div>
<div style="flex: 2; min-width: 300px;">
<div style="background: rgba(255,255,255,0.2); border-radius: 10px; height: 30px; overflow: hidden; position: relative;">
<div
id="batch-progress-bar"
style="background: linear-gradient(90deg, #4caf50, #8bc34a); height: 100%; width: 0%; transition: width 0.3s ease; display: flex; align-items: center; justify-content: center; font-weight: 600; font-size: 0.9rem;"
>
<span id="batch-progress-text">0%</span>
</div>
</div>
<div style="margin-top: 0.5rem; opacity: 0.9; font-size: 0.9rem;" id="batch-status-text">
Initialisation...
</div>
</div>
</div>
</div>
<!-- Tableau des fichiers -->
<div class="card">
<h3>📄 Détails par fichier</h3>
<div style="overflow-x: auto;">
<table style="width: 100%; border-collapse: collapse; margin-top: 1rem;">
<thead>
<tr style="background: #f5f5f5; border-bottom: 2px solid #ddd;">
<th style="padding: 0.75rem; text-align: left; font-weight: 600;">#</th>
<th style="padding: 0.75rem; text-align: left; font-weight: 600;">Fichier</th>
<th style="padding: 0.75rem; text-align: center; font-weight: 600;">Statut</th>
<th style="padding: 0.75rem; text-align: left; font-weight: 600;">Étape actuelle</th>
<th style="padding: 0.75rem; text-align: left; font-weight: 600;">Progression</th>
<th style="padding: 0.75rem; text-align: center; font-weight: 600;">Actions</th>
</tr>
</thead>
<tbody>
{% for file in files %}
<tr data-file-index="{{ loop.index0 }}" style="border-bottom: 1px solid #eee;">
<td style="padding: 0.75rem; color: #666;">{{ loop.index }}</td>
<td style="padding: 0.75rem; font-family: monospace; font-size: 0.9rem;">
{{ file.filename }}
</td>
<td style="padding: 0.75rem; text-align: center;">
<span class="badge badge-status" id="status-{{ loop.index0 }}"
style="padding: 0.4rem 0.8rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600;">
En attente
</span>
</td>
<td style="padding: 0.75rem;">
<span id="step-{{ loop.index0 }}" style="color: #666; font-size: 0.9rem;"></span>
</td>
<td style="padding: 0.75rem;">
<div style="background: #f0f0f0; border-radius: 10px; height: 20px; overflow: hidden; position: relative;">
<div
id="progress-{{ loop.index0 }}"
style="background: linear-gradient(90deg, #2196F3, #21cbf3); height: 100%; width: 0%; transition: width 0.3s ease;"
></div>
</div>
</td>
<td style="padding: 0.75rem; text-align: center;">
<span id="actions-{{ loop.index0 }}" style="font-size: 0.85rem; color: #999;"></span>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<!-- Bouton de résultats (caché initialement) -->
<div id="results-button-container" style="display: none; text-align: center; margin-top: 2rem;">
<a href="/upload/batch/result/{{ batch_id }}" class="btn btn-primary" style="font-size: 1.1rem; padding: 0.75rem 2rem;">
✅ Voir les résultats du batch
</a>
</div>
</section>
<style>
.badge-status {
display: inline-block;
white-space: nowrap;
}
.badge-pending {
background: #e0e0e0;
color: #666;
}
.badge-processing {
background: #2196F3;
color: white;
}
.badge-complete {
background: #4caf50;
color: white;
}
.badge-error {
background: #f44336;
color: white;
}
</style>
<script>
const batchId = "{{ batch_id }}";
const totalFiles = {{ total_files }};
const eventSources = {}; // Store EventSource objects by file index
// Poll batch status every 2 seconds
const pollInterval = setInterval(pollBatchStatus, 2000);
// Initial poll
pollBatchStatus();
function pollBatchStatus() {
fetch(`/upload/batch/status/${batchId}`)
.then(response => response.json())
.then(data => {
// Update global progress
const completed = data.completed_files;
const failed = data.failed_files;
const total = data.total_files;
const progress = Math.round((completed + failed) / total * 100);
document.getElementById('batch-completed-count').textContent = completed;
document.getElementById('batch-failed-count').textContent = failed;
document.getElementById('batch-progress-bar').style.width = progress + '%';
document.getElementById('batch-progress-text').textContent = progress + '%';
// Update status text
if (data.status === 'complete') {
document.getElementById('batch-status-text').textContent =
'✅ Traitement terminé !';
document.getElementById('results-button-container').style.display = 'block';
clearInterval(pollInterval); // Stop polling
} else if (data.status === 'partial') {
document.getElementById('batch-status-text').textContent =
'⚠️ Traitement partiel (certains fichiers en erreur)';
document.getElementById('results-button-container').style.display = 'block';
clearInterval(pollInterval);
} else if (data.status === 'error') {
document.getElementById('batch-status-text').textContent =
'❌ Tous les fichiers ont échoué';
document.getElementById('results-button-container').style.display = 'block';
clearInterval(pollInterval);
} else {
const currentFile = data.files.findIndex(f => f.status === 'processing');
if (currentFile >= 0) {
document.getElementById('batch-status-text').textContent =
`🔄 Traitement du fichier ${currentFile + 1}/${total}...`;
} else {
document.getElementById('batch-status-text').textContent =
'🔄 Traitement en cours...';
}
}
// Update individual file rows
data.files.forEach((file, index) => {
updateFileRow(index, file);
// Open SSE connection if job_id is assigned and not already connected
if (file.job_id && !eventSources[index]) {
connectSSE(index, file.job_id);
}
});
})
.catch(error => {
console.error('Erreur polling batch status:', error);
});
}
function updateFileRow(fileIndex, fileData) {
const statusBadge = document.getElementById(`status-${fileIndex}`);
const stepSpan = document.getElementById(`step-${fileIndex}`);
const progressBar = document.getElementById(`progress-${fileIndex}`);
const actionsSpan = document.getElementById(`actions-${fileIndex}`);
// Update status badge
statusBadge.className = 'badge badge-status badge-' + fileData.status;
if (fileData.status === 'pending') {
statusBadge.textContent = '⏳ En attente';
} else if (fileData.status === 'processing') {
statusBadge.textContent = '🔄 En cours';
} else if (fileData.status === 'complete') {
statusBadge.textContent = '✅ Terminé';
progressBar.style.width = '100%';
progressBar.style.background = 'linear-gradient(90deg, #4caf50, #8bc34a)';
stepSpan.textContent = 'Terminé avec succès';
actionsSpan.innerHTML = `<a href="/documents/${fileData.job_id}/view" style="color: #2196F3;">📄 Voir</a>`;
} else if (fileData.status === 'error') {
statusBadge.textContent = '❌ Erreur';
progressBar.style.width = '100%';
progressBar.style.background = '#f44336';
stepSpan.textContent = fileData.error || 'Erreur inconnue';
stepSpan.style.color = '#f44336';
actionsSpan.textContent = '—';
}
}
function connectSSE(fileIndex, jobId) {
console.log(`Connecting SSE for file ${fileIndex}, job ${jobId}`);
const eventSource = new EventSource(`/upload/progress/${jobId}`);
eventSources[fileIndex] = eventSource;
const stepSpan = document.getElementById(`step-${fileIndex}`);
const progressBar = document.getElementById(`progress-${fileIndex}`);
eventSource.onmessage = function(event) {
try {
const data = JSON.parse(event.data);
if (data.type === 'step') {
// Update step text
let stepText = data.step || '';
if (data.detail) {
stepText += ` - ${data.detail}`;
}
stepSpan.textContent = stepText;
// Update progress (simplified estimation)
const stepProgress = {
'ocr': 10,
'markdown': 20,
'metadata': 30,
'toc': 40,
'classification': 50,
'chunking': 60,
'cleaning': 70,
'validation': 80,
'ingestion': 90,
'complete': 100
};
const progress = stepProgress[data.step] || 50;
progressBar.style.width = progress + '%';
} else if (data.type === 'complete') {
stepSpan.textContent = 'Terminé avec succès';
progressBar.style.width = '100%';
progressBar.style.background = 'linear-gradient(90deg, #4caf50, #8bc34a)';
eventSource.close();
delete eventSources[fileIndex];
} else if (data.type === 'error') {
stepSpan.textContent = data.message || 'Erreur';
stepSpan.style.color = '#f44336';
progressBar.style.width = '100%';
progressBar.style.background = '#f44336';
eventSource.close();
delete eventSources[fileIndex];
}
} catch (error) {
console.error('Error parsing SSE data:', error);
}
};
eventSource.onerror = function(error) {
console.error(`SSE error for file ${fileIndex}:`, error);
eventSource.close();
delete eventSources[fileIndex];
};
}
// Close all SSE connections when leaving the page
window.addEventListener('beforeunload', function() {
Object.values(eventSources).forEach(es => es.close());
});
</script>
{% endblock %}

View File

@@ -0,0 +1,167 @@
{% extends "base.html" %}
{% block title %}Résultats Batch{% endblock %}
{% block content %}
<section class="section">
<h1>📊 Résumé du Traitement Batch</h1>
<p class="lead">Résultats finaux du traitement de {{ batch.total_files }} fichier(s)</p>
<!-- Statistiques globales -->
<div class="card" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; margin-bottom: 2rem;">
<h3 style="margin-bottom: 1.5rem; color: white;">📈 Statistiques Globales</h3>
<div style="display: flex; gap: 2rem; flex-wrap: wrap; justify-content: space-around;">
<div style="text-align: center; min-width: 150px;">
<div style="font-size: 3rem; font-weight: bold;">{{ batch.total_files }}</div>
<div style="opacity: 0.9; margin-top: 0.5rem;">Total Fichiers</div>
</div>
<div style="text-align: center; min-width: 150px;">
<div style="font-size: 3rem; font-weight: bold; color: #4caf50;">{{ batch.completed_files }}</div>
<div style="opacity: 0.9; margin-top: 0.5rem;">✅ Réussis</div>
</div>
<div style="text-align: center; min-width: 150px;">
<div style="font-size: 3rem; font-weight: bold; color: #f44336;">{{ batch.failed_files }}</div>
<div style="opacity: 0.9; margin-top: 0.5rem;">❌ Erreurs</div>
</div>
<div style="text-align: center; min-width: 150px;">
<div style="font-size: 3rem; font-weight: bold;">
{% set success_rate = (batch.completed_files / batch.total_files * 100) | round(1) %}
{{ success_rate }}%
</div>
<div style="opacity: 0.9; margin-top: 0.5rem;">Taux de Réussite</div>
</div>
</div>
</div>
<!-- Message de statut global -->
{% if batch.failed_files == 0 %}
<div class="alert alert-success" style="background: #e8f5e9; border-left: 4px solid #4caf50; padding: 1rem; margin-bottom: 2rem;">
<strong>✅ Tous les fichiers ont été traités avec succès !</strong>
<p style="margin-top: 0.5rem;">Vous pouvez maintenant consulter les documents via les liens ci-dessous ou dans la section "Documents".</p>
</div>
{% elif batch.completed_files == 0 %}
<div class="alert alert-danger" style="background: #ffebee; border-left: 4px solid #f44336; padding: 1rem; margin-bottom: 2rem;">
<strong>❌ Aucun fichier n'a pu être traité avec succès.</strong>
<p style="margin-top: 0.5rem;">Vérifiez les erreurs ci-dessous pour plus de détails.</p>
</div>
{% else %}
<div class="alert alert-warning" style="background: #fff3e0; border-left: 4px solid #ff9800; padding: 1rem; margin-bottom: 2rem;">
<strong>⚠️ Traitement partiel : {{ batch.completed_files }} réussi(s), {{ batch.failed_files }} erreur(s).</strong>
<p style="margin-top: 0.5rem;">Certains fichiers ont été traités avec succès, d'autres ont rencontré des erreurs.</p>
</div>
{% endif %}
<!-- Liste détaillée des fichiers -->
<div class="card">
<h3>📄 Détails par Fichier</h3>
<div style="margin-top: 1.5rem;">
{% for result in results %}
<div class="file-result-card"
style="border: 1px solid #ddd; border-radius: 8px; padding: 1.5rem; margin-bottom: 1rem; {% if result.status == 'complete' %}border-left: 4px solid #4caf50;{% elif result.status == 'error' %}border-left: 4px solid #f44336;{% else %}border-left: 4px solid #e0e0e0;{% endif %}">
<div style="display: flex; align-items: center; justify-content: space-between; flex-wrap: wrap; gap: 1rem;">
<!-- Nom du fichier -->
<div style="flex: 1; min-width: 250px;">
<div style="font-weight: 600; font-size: 1.1rem; margin-bottom: 0.5rem;">
{{ loop.index }}. {{ result.filename }}
</div>
<div style="display: flex; align-items: center; gap: 0.5rem;">
{% if result.status == 'complete' %}
<span style="background: #4caf50; color: white; padding: 0.3rem 0.8rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600;">
✅ Réussi
</span>
{% elif result.status == 'error' %}
<span style="background: #f44336; color: white; padding: 0.3rem 0.8rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600;">
❌ Erreur
</span>
{% else %}
<span style="background: #e0e0e0; color: #666; padding: 0.3rem 0.8rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600;">
⏳ En attente
</span>
{% endif %}
</div>
</div>
<!-- Actions -->
<div style="text-align: right;">
{% if result.status == 'complete' and result.document_name %}
<a href="/documents/{{ result.document_name }}/view"
class="btn btn-primary"
style="display: inline-block; padding: 0.5rem 1.5rem; text-decoration: none;">
📄 Voir le document
</a>
{% elif result.status == 'error' %}
<div style="color: #f44336; font-size: 0.9rem; max-width: 400px;">
<strong>Erreur :</strong><br>
{{ result.error or 'Erreur inconnue' }}
</div>
{% endif %}
</div>
</div>
</div>
{% endfor %}
</div>
</div>
<!-- Actions finales -->
<div style="text-align: center; margin-top: 2rem; display: flex; gap: 1rem; justify-content: center; flex-wrap: wrap;">
<a href="/upload" class="btn btn-primary" style="padding: 0.75rem 2rem;">
📤 Nouveau Upload
</a>
<a href="/documents" class="btn" style="padding: 0.75rem 2rem;">
📚 Voir tous les documents
</a>
{% if batch.completed_files > 0 %}
<a href="/search" class="btn" style="padding: 0.75rem 2rem;">
🔍 Rechercher dans les documents
</a>
{% endif %}
</div>
<!-- Informations de traitement -->
<div class="card" style="margin-top: 2rem; background: #f9f9f9;">
<h4> Informations de Traitement</h4>
<div style="margin-top: 1rem; font-size: 0.9rem; color: #666;">
<p><strong>Batch ID :</strong> <code style="background: #e0e0e0; padding: 0.2rem 0.5rem; border-radius: 3px;">{{ batch_id }}</code></p>
<p><strong>Date de traitement :</strong> {{ batch.created_at | default(0) | int | timestamp_to_date }}</p>
<p><strong>Options utilisées :</strong></p>
<ul style="margin-left: 1.5rem;">
<li>Provider LLM : {{ batch.options.llm_provider }}</li>
<li>Modèle : {{ batch.options.llm_model }}</li>
<li>Skip OCR : {{ "Oui" if batch.options.skip_ocr else "Non" }}</li>
<li>Ingestion Weaviate : {{ "Oui" if batch.options.ingest_weaviate else "Non" }}</li>
</ul>
</div>
</div>
</section>
<style>
.file-result-card {
transition: box-shadow 0.2s ease;
}
.file-result-card:hover {
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
}
</style>
<script>
// Add timestamp formatting filter if not already available
// This is a simple JavaScript fallback for the Jinja2 filter
document.addEventListener('DOMContentLoaded', function() {
const timestampElements = document.querySelectorAll('[data-timestamp]');
timestampElements.forEach(function(el) {
const timestamp = parseInt(el.dataset.timestamp);
if (timestamp) {
const date = new Date(timestamp * 1000);
el.textContent = date.toLocaleString('fr-FR', {
year: 'numeric',
month: 'long',
day: 'numeric',
hour: '2-digit',
minute: '2-digit'
});
}
});
});
</script>
{% endblock %}

View File

@@ -1216,3 +1216,47 @@ class DeleteDocumentResult(TypedDict, total=False):
deleted_sections: int
deleted_document: bool
error: Optional[str]
class BatchFileInfo(TypedDict, total=False):
"""Information about a single file in a batch upload.
Attributes:
filename: Original filename
job_id: Processing job ID (assigned when processing starts)
status: Current status (pending, processing, complete, error)
error: Error message if processing failed
size_bytes: File size in bytes
"""
filename: str
job_id: Optional[str]
status: str # Literal["pending", "processing", "complete", "error"]
error: Optional[str]
size_bytes: int
class BatchJob(TypedDict, total=False):
"""Batch processing job tracking multiple file uploads.
Attributes:
job_ids: List of individual processing job IDs
files: List of file information dictionaries
total_files: Total number of files in batch
completed_files: Number of files successfully processed
failed_files: Number of files that failed processing
status: Overall batch status (processing, complete, partial)
current_job_id: Currently processing job ID (None if between files)
options: Processing options applied to all files
created_at: Timestamp when batch was created
"""
job_ids: List[str]
files: List[BatchFileInfo]
total_files: int
completed_files: int
failed_files: int
status: str # Literal["processing", "complete", "partial"]
current_job_id: Optional[str]
options: ProcessingOptions
created_at: float