# Library RAG - Weaviate + Python GPU Embedder # ============================================== # # This docker-compose runs Weaviate with manual vectorization via Python GPU embedder. # # BGE-M3 GPU Embedder (Python): # - 1024 dimensions - Rich semantic representation # - 8192 token context - Long document support # - Superior multilingual support (Greek, Latin, French, English) # - GPU acceleration (NVIDIA RTX 4070) - 30-70x faster than Docker text2vec # - PyTorch CUDA + FP16 precision # # Architecture (Jan 2026): # - Ingestion: Python GPU embedder (manual vectorization) # - Queries: Python GPU embedder (manual vectorization) # - Weaviate: Vector storage only (no auto-vectorization) # # Migration Notes: # - Dec 2024: Migrated from MiniLM-L6 (384-dim) to BGE-M3 (1024-dim) # - Jan 2026: Migrated from Docker text2vec-transformers to Python GPU embedder # - See MIGRATION_GPU_EMBEDDER_SUCCESS.md for details services: weaviate: image: cr.weaviate.io/semitechnologies/weaviate:1.34.4 restart: on-failure:0 ports: - "8080:8080" - "50051:50051" environment: QUERY_DEFAULTS_LIMIT: "25" AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true" # ok pour dev/local PERSISTENCE_DATA_PATH: "/var/lib/weaviate" CLUSTER_HOSTNAME: "node1" CLUSTER_GOSSIP_BIND_PORT: "7946" CLUSTER_DATA_BIND_PORT: "7947" # Fix for "No private IP address found" error CLUSTER_JOIN: "" # NOTE: Manual vectorization via Python GPU embedder - no modules needed # DEFAULT_VECTORIZER_MODULE and ENABLE_MODULES removed (Jan 2026) # Limits to prevent OOM crashes GOMEMLIMIT: "6GiB" GOGC: "100" volumes: - weaviate_data:/var/lib/weaviate mem_limit: 8g memswap_limit: 10g cpus: 4 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/v1/.well-known/ready"] interval: 30s timeout: 10s retries: 3 start_period: 60s # NOTE: text2vec-transformers service REMOVED (Jan 2026) # Vectorization now handled by Python GPU embedder (memory/core/embedding_service.py) # Benefits: 30-70x faster ingestion, -10 GB RAM, unified architecture # See MIGRATION_GPU_EMBEDDER_SUCCESS.md for details volumes: weaviate_data: