linear-coding-agent/memory/schemas/memory_schemas.py

#!/usr/bin/env python3
"""
Memory Collections Schemas for Weaviate.

This module defines the schema for 3 Memory collections:
    - Thought: Individual thoughts/reflections
    - Conversation: Complete conversations
    - Message: Individual messages in conversations

All collections use manual vectorization (GPU embeddings).
"""

import weaviate
import weaviate.classes.config as wvc
from datetime import datetime
from typing import Optional


def create_thought_collection(client: weaviate.WeaviateClient) -> None:
    """
    Create Thought collection.

    Schema:
        - content: TEXT (vectorized) - The thought content
        - thought_type: TEXT - Type (reflexion, question, intuition, observation, etc.)
        - timestamp: DATE - When created
        - trigger: TEXT (optional) - What triggered the thought
        - emotional_state: TEXT (optional) - Emotional state
        - concepts: TEXT_ARRAY (vectorized) - Related concepts/tags
        - privacy_level: TEXT - private, shared, public
        - context: TEXT (optional) - Additional context
    """
    # Check if exists
    if "Thought" in client.collections.list_all():
        print("[WARN]  Thought collection already exists, skipping")
        return

    client.collections.create(
        name="Thought",

        # Manual vectorization (GPU) - single default vector
        vectorizer_config=wvc.Configure.Vectorizer.none(),

        properties=[
            # Vectorized fields
            wvc.Property(
                name="content",
                data_type=wvc.DataType.TEXT,
                description="The thought content",
            ),
            wvc.Property(
                name="concepts",
                data_type=wvc.DataType.TEXT_ARRAY,
                description="Related concepts/tags",
            ),

            # Metadata fields (not vectorized)
            wvc.Property(
                name="thought_type",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Type: reflexion, question, intuition, observation, etc.",
            ),
            wvc.Property(
                name="timestamp",
                data_type=wvc.DataType.DATE,
                description="When the thought was created",
            ),
            wvc.Property(
                name="trigger",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="What triggered the thought (optional)",
            ),
            wvc.Property(
                name="emotional_state",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Emotional state (optional)",
            ),
            wvc.Property(
                name="privacy_level",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Privacy level: private, shared, public",
            ),
            wvc.Property(
                name="context",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Additional context (optional)",
            ),
        ],
    )

    print("[OK] Thought collection created")


def create_conversation_collection(client: weaviate.WeaviateClient) -> None:
    """
    Create Conversation collection.

    Schema:
        - conversation_id: TEXT - Unique conversation ID
        - category: TEXT - philosophy, technical, personal, etc.
        - timestamp_start: DATE - Conversation start
        - timestamp_end: DATE (optional) - Conversation end
        - summary: TEXT (vectorized) - Conversation summary
        - participants: TEXT_ARRAY - List of participants
        - tags: TEXT_ARRAY - Semantic tags
        - message_count: INT - Number of messages
        - context: TEXT (optional) - Global context
    """
    # Check if exists
    if "Conversation" in client.collections.list_all():
        print("[WARN]  Conversation collection already exists, skipping")
        return

    client.collections.create(
        name="Conversation",

        # Manual vectorization (GPU)
        vectorizer_config=wvc.Configure.Vectorizer.none(),

        properties=[
            # Vectorized field
            wvc.Property(
                name="summary",
                data_type=wvc.DataType.TEXT,
                description="Conversation summary",
            ),

            # Metadata fields (not vectorized)
            wvc.Property(
                name="conversation_id",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Unique conversation identifier",
            ),
            wvc.Property(
                name="category",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Category: philosophy, technical, personal, etc.",
            ),
            wvc.Property(
                name="timestamp_start",
                data_type=wvc.DataType.DATE,
                description="Conversation start time",
            ),
            wvc.Property(
                name="timestamp_end",
                data_type=wvc.DataType.DATE,
                description="Conversation end time (optional)",
            ),
            wvc.Property(
                name="participants",
                data_type=wvc.DataType.TEXT_ARRAY,
                skip_vectorization=True,
                description="List of participants",
            ),
            wvc.Property(
                name="tags",
                data_type=wvc.DataType.TEXT_ARRAY,
                skip_vectorization=True,
                description="Semantic tags",
            ),
            wvc.Property(
                name="message_count",
                data_type=wvc.DataType.INT,
                description="Number of messages in conversation",
            ),
            wvc.Property(
                name="context",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Global context (optional)",
            ),
        ],
    )

    print("[OK] Conversation collection created")


def create_message_collection(client: weaviate.WeaviateClient) -> None:
    """
    Create Message collection.

    Schema:
        - content: TEXT (vectorized) - Message content
        - role: TEXT - user, assistant, system
        - timestamp: DATE - When sent
        - conversation_id: TEXT - Link to parent Conversation
        - order_index: INT - Position in conversation
        - conversation: OBJECT (nested) - Denormalized conversation data
            - conversation_id: TEXT
            - category: TEXT
    """
    # Check if exists
    if "Message" in client.collections.list_all():
        print("[WARN]  Message collection already exists, skipping")
        return

    client.collections.create(
        name="Message",

        # Manual vectorization (GPU)
        vectorizer_config=wvc.Configure.Vectorizer.none(),

        properties=[
            # Vectorized field
            wvc.Property(
                name="content",
                data_type=wvc.DataType.TEXT,
                description="Message content",
            ),

            # Metadata fields (not vectorized)
            wvc.Property(
                name="role",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Role: user, assistant, system",
            ),
            wvc.Property(
                name="timestamp",
                data_type=wvc.DataType.DATE,
                description="When the message was sent",
            ),
            wvc.Property(
                name="conversation_id",
                data_type=wvc.DataType.TEXT,
                skip_vectorization=True,
                description="Link to parent Conversation",
            ),
            wvc.Property(
                name="order_index",
                data_type=wvc.DataType.INT,
                description="Position in conversation",
            ),

            # Nested object (denormalized for performance)
            wvc.Property(
                name="conversation",
                data_type=wvc.DataType.OBJECT,
                skip_vectorization=True,
                description="Denormalized conversation data",
                nested_properties=[
                    wvc.Property(
                        name="conversation_id",
                        data_type=wvc.DataType.TEXT,
                    ),
                    wvc.Property(
                        name="category",
                        data_type=wvc.DataType.TEXT,
                    ),
                ],
            ),
        ],
    )

    print("[OK] Message collection created")


def create_all_memory_schemas(client: weaviate.WeaviateClient) -> None:
    """
    Create all 3 Memory collections.

    Args:
        client: Connected Weaviate client.
    """
    print("="*60)
    print("Creating Memory Schemas")
    print("="*60)

    create_thought_collection(client)
    create_conversation_collection(client)
    create_message_collection(client)

    print("\n" + "="*60)
    print("Memory Schemas Created Successfully")
    print("="*60)

    # List all collections
    all_collections = client.collections.list_all()
    print(f"\nTotal collections: {len(all_collections)}")

    memory_cols = [c for c in all_collections.keys() if c in ["Thought", "Conversation", "Message"]]
    library_cols = [c for c in all_collections.keys() if c in ["Work", "Document", "Chunk", "Summary"]]

    print(f"\nMemory collections ({len(memory_cols)}): {', '.join(sorted(memory_cols))}")
    print(f"Library collections ({len(library_cols)}): {', '.join(sorted(library_cols))}")


def delete_memory_schemas(client: weaviate.WeaviateClient) -> None:
    """
    Delete all Memory collections (for testing/cleanup).

    WARNING: This deletes all data in Memory collections!
    """
    print("[WARN]  WARNING: Deleting all Memory collections...")

    for collection_name in ["Thought", "Conversation", "Message"]:
        try:
            client.collections.delete(collection_name)
            print(f"Deleted {collection_name}")
        except Exception as e:
            print(f"Could not delete {collection_name}: {e}")