# Library RAG - Weaviate + BGE-M3 Embeddings # =========================================== # # This docker-compose runs Weaviate with BAAI/bge-m3 embedding model. # # BGE-M3 Advantages: # - 1024 dimensions (vs 384 for MiniLM-L6) - 2.7x richer representation # - 8192 token context (vs 512) - 16x longer sequences # - Superior multilingual support (Greek, Latin, French, English) # - Better trained on academic/philosophical texts # # GPU Configuration: # - ENABLE_CUDA="1" - Uses NVIDIA GPU for faster vectorization # - ENABLE_CUDA="0" - Uses CPU only (slower but functional) # - GPU device mapping included for CUDA acceleration # # Migration Note (2024-12): # Migrated from sentence-transformers-multi-qa-MiniLM-L6-cos-v1 (384-dim) # to BAAI/bge-m3 (1024-dim). All collections were deleted and recreated. # See MIGRATION_BGE_M3.md for details. services: weaviate: image: cr.weaviate.io/semitechnologies/weaviate:1.34.4 restart: on-failure:0 ports: - "8080:8080" - "50051:50051" environment: QUERY_DEFAULTS_LIMIT: "25" AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true" # ok pour dev/local PERSISTENCE_DATA_PATH: "/var/lib/weaviate" CLUSTER_HOSTNAME: "node1" CLUSTER_GOSSIP_BIND_PORT: "7946" CLUSTER_DATA_BIND_PORT: "7947" # Fix for "No private IP address found" error CLUSTER_JOIN: "" DEFAULT_VECTORIZER_MODULE: "text2vec-transformers" ENABLE_MODULES: "text2vec-transformers" TRANSFORMERS_INFERENCE_API: "http://text2vec-transformers:8080" # Limits to prevent OOM crashes GOMEMLIMIT: "6GiB" GOGC: "100" volumes: - weaviate_data:/var/lib/weaviate mem_limit: 8g memswap_limit: 10g cpus: 4 # Ensure Weaviate waits for text2vec-transformers to be healthy before starting depends_on: text2vec-transformers: condition: service_healthy healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/v1/.well-known/ready"] interval: 30s timeout: 10s retries: 3 start_period: 60s text2vec-transformers: # BAAI/bge-m3: Multilingual embedding model (1024 dimensions) # Superior for philosophical texts (Greek, Latin, French, English) # 8192 token context window (16x longer than MiniLM-L6) # Using ONNX version (only available format in Weaviate registry) # # GPU LIMITATION (Dec 2024): # - Weaviate only provides ONNX version of BGE-M3 (no PyTorch) # - ONNX runtime is CPU-optimized (no native CUDA support) # - GPU acceleration would require NVIDIA NIM (different architecture) # - Current setup: CPU-only with AVX2 optimization (functional but slower) image: cr.weaviate.io/semitechnologies/transformers-inference:baai-bge-m3-onnx-latest restart: on-failure:0 ports: - "8090:8080" # Expose vectorizer API for manual vectorization environment: # ONNX runtime - CPU only (CUDA not supported in ONNX version) ENABLE_CUDA: "0" # Increased timeouts for very long chunks (e.g., Peirce CP 3.403, CP 8.388, Menon chunk 10) # Default is 60s, increased to 600s (10 minutes) for exceptionally large texts (e.g., CP 8.388: 218k chars) WORKER_TIMEOUT: "600" mem_limit: 10g memswap_limit: 12g cpus: 3 # Healthcheck ensures service is fully loaded before Weaviate starts # BGE-M3 model takes ~60-120s to load into memory healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/.well-known/ready"] interval: 30s timeout: 10s retries: 5 start_period: 120s # BGE-M3 model loading can take up to 2 minutes volumes: weaviate_data: