linear-coding-agent/generations/library_rag/docker-compose.yml

# Library RAG - Weaviate + Python GPU Embedder
# ==============================================
#
# This docker-compose runs Weaviate with manual vectorization via Python GPU embedder.
#
# BGE-M3 GPU Embedder (Python):
#   - 1024 dimensions - Rich semantic representation
#   - 8192 token context - Long document support
#   - Superior multilingual support (Greek, Latin, French, English)
#   - GPU acceleration (NVIDIA RTX 4070) - 30-70x faster than Docker text2vec
#   - PyTorch CUDA + FP16 precision
#
# Architecture (Jan 2026):
#   - Ingestion: Python GPU embedder (manual vectorization)
#   - Queries: Python GPU embedder (manual vectorization)
#   - Weaviate: Vector storage only (no auto-vectorization)
#
# Migration Notes:
#   - Dec 2024: Migrated from MiniLM-L6 (384-dim) to BGE-M3 (1024-dim)
#   - Jan 2026: Migrated from Docker text2vec-transformers to Python GPU embedder
#   - See MIGRATION_GPU_EMBEDDER_SUCCESS.md for details

services:
  weaviate:
    image: cr.weaviate.io/semitechnologies/weaviate:1.34.4
    restart: on-failure:0
    ports:
      - "8080:8080"
      - "50051:50051"
    environment:
      QUERY_DEFAULTS_LIMIT: "25"
      AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true"   # ok pour dev/local
      PERSISTENCE_DATA_PATH: "/var/lib/weaviate"
      CLUSTER_HOSTNAME: "node1"
      CLUSTER_GOSSIP_BIND_PORT: "7946"
      CLUSTER_DATA_BIND_PORT: "7947"
      # Fix for "No private IP address found" error
      CLUSTER_JOIN: ""
      # NOTE: Manual vectorization via Python GPU embedder - no modules needed
      # DEFAULT_VECTORIZER_MODULE and ENABLE_MODULES removed (Jan 2026)
      # Limits to prevent OOM crashes
      GOMEMLIMIT: "6GiB"
      GOGC: "100"
    volumes:
      - weaviate_data:/var/lib/weaviate
    mem_limit: 8g
    memswap_limit: 10g
    cpus: 4
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/v1/.well-known/ready"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s

  # NOTE: text2vec-transformers service REMOVED (Jan 2026)
  # Vectorization now handled by Python GPU embedder (memory/core/embedding_service.py)
  # Benefits: 30-70x faster ingestion, -10 GB RAM, unified architecture
  # See MIGRATION_GPU_EMBEDDER_SUCCESS.md for details

volumes:
  weaviate_data: