feat: Add Weaviate memory export scripts and documentation

Added two export scripts to backup memory collections: 1. export_conversations.py: - Exports all Conversation + Message objects to markdown - Includes conversation metadata (category, timestamps, participants) - Formats messages chronologically with role indicators - Generated: docs/conversations.md (12 conversations, 377 messages) 2. export_thoughts.py: - Exports all Thought objects to markdown - Groups by thought_type with summary statistics - Includes metadata (trigger, emotional_state, concepts, privacy) - Generated: docs/thoughts.md (104 thoughts across 8 types) Both scripts use UTF-8 encoding for markdown output with emoji formatting for better readability. Exports stored in docs/ for versioned backup of memory collections. Stats: - Conversations: 12 (5 testing, 7 general) - Messages: 377 total - Thoughts: 104 (28 reflection, 36 synthesis, 27 test) - Privacy: 100% private Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-09 17:15:44 +01:00
parent 1acda7e830
commit 5a732e885f
4 changed files with 46639 additions and 0 deletions
--- a/docs/conversations.md
+++ b/docs/conversations.md
--- a/docs/thoughts.md
+++ b/docs/thoughts.md
--- a/export_conversations.py
+++ b/export_conversations.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""
+Export conversations from Weaviate to Markdown file.
+
+Exports all conversations with their messages to docs/conversations.md
+"""
+
+import weaviate
+from datetime import datetime
+from pathlib import Path
+
+
+def export_conversations_to_md(output_file: str = "docs/conversations.md"):
+    """Export all conversations to markdown file."""
+
+    # Connect to Weaviate
+    client = weaviate.connect_to_local()
+
+    try:
+        # Get collections
+        conversation_collection = client.collections.get("Conversation")
+        message_collection = client.collections.get("Message")
+
+        # Fetch all conversations (sorted by start date)
+        conversations_response = conversation_collection.query.fetch_objects(
+            limit=1000
+        )
+
+        conversations = conversations_response.objects
+        print(f"Found {len(conversations)} conversations")
+
+        # Sort by timestamp_start
+        conversations = sorted(
+            conversations,
+            key=lambda c: c.properties.get("timestamp_start", datetime.min),
+            reverse=True
+        )
+
+        # Build markdown content
+        lines = []
+        lines.append("# Conversations Export")
+        lines.append(f"\n**Exported**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        lines.append(f"\n**Total conversations**: {len(conversations)}")
+        lines.append("\n---\n")
+
+        # Process each conversation
+        for idx, conv in enumerate(conversations, 1):
+            props = conv.properties
+
+            conv_id = props.get("conversation_id", "unknown")
+            category = props.get("category", "N/A")
+            summary = props.get("summary", "No summary")
+            timestamp_start = props.get("timestamp_start")
+            timestamp_end = props.get("timestamp_end")
+            participants = props.get("participants", [])
+            tags = props.get("tags", [])
+            message_count = props.get("message_count", 0)
+            context = props.get("context", "")
+
+            # Format timestamps
+            start_str = timestamp_start.strftime('%Y-%m-%d %H:%M:%S') if timestamp_start else "N/A"
+            end_str = timestamp_end.strftime('%Y-%m-%d %H:%M:%S') if timestamp_end else "Ongoing"
+
+            # Write conversation header
+            lines.append(f"## Conversation {idx}: {conv_id}")
+            lines.append(f"\n**Category**: {category}")
+            lines.append(f"**Start**: {start_str}")
+            lines.append(f"**End**: {end_str}")
+
+            if participants:
+                lines.append(f"**Participants**: {', '.join(participants)}")
+
+            if tags:
+                lines.append(f"**Tags**: {', '.join(tags)}")
+
+            lines.append(f"**Message count**: {message_count}")
+
+            lines.append(f"\n**Summary**:\n{summary}")
+
+            if context:
+                lines.append(f"\n**Context**:\n{context}")
+
+            # Fetch messages for this conversation
+            messages_response = message_collection.query.fetch_objects(
+                filters=weaviate.classes.query.Filter.by_property("conversation_id").equal(conv_id),
+                limit=1000
+            )
+
+            messages = messages_response.objects
+
+            # Sort by order_index
+            messages = sorted(
+                messages,
+                key=lambda m: m.properties.get("order_index", 0)
+            )
+
+            if messages:
+                lines.append(f"\n### Messages ({len(messages)})\n")
+
+                for msg in messages:
+                    msg_props = msg.properties
+                    role = msg_props.get("role", "unknown")
+                    content = msg_props.get("content", "")
+                    timestamp = msg_props.get("timestamp")
+                    order_idx = msg_props.get("order_index", 0)
+
+                    timestamp_str = timestamp.strftime('%H:%M:%S') if timestamp else "N/A"
+
+                    # Format role emoji
+                    role_emoji = {
+                        "user": "👤",
+                        "assistant": "🤖",
+                        "system": "⚙️"
+                    }.get(role, "❓")
+
+                    lines.append(f"**[{order_idx}] {role_emoji} {role.upper()}** ({timestamp_str})")
+                    lines.append(f"\n{content}\n")
+            else:
+                lines.append("\n*No messages found*\n")
+
+            lines.append("\n---\n")
+
+        # Write to file
+        output_path = Path(output_file)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write('\n'.join(lines))
+
+        print(f"\n[OK] Exported {len(conversations)} conversations to {output_file}")
+
+        # Stats
+        total_messages = sum(c.properties.get("message_count", 0) for c in conversations)
+        print(f"   Total messages: {total_messages}")
+
+        categories = {}
+        for c in conversations:
+            cat = c.properties.get("category", "unknown")
+            categories[cat] = categories.get(cat, 0) + 1
+
+        print(f"   Categories: {dict(categories)}")
+
+    finally:
+        client.close()
+
+
+if __name__ == "__main__":
+    export_conversations_to_md()
--- a/export_thoughts.py
+++ b/export_thoughts.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""
+Export thoughts from Weaviate to Markdown file.
+
+Exports all thoughts to docs/thoughts.md
+"""
+
+import weaviate
+from datetime import datetime
+from pathlib import Path
+
+
+def export_thoughts_to_md(output_file: str = "docs/thoughts.md"):
+    """Export all thoughts to markdown file."""
+
+    # Connect to Weaviate
+    client = weaviate.connect_to_local()
+
+    try:
+        # Get collection
+        thought_collection = client.collections.get("Thought")
+
+        # Fetch all thoughts (sorted by timestamp)
+        thoughts_response = thought_collection.query.fetch_objects(
+            limit=1000
+        )
+
+        thoughts = thoughts_response.objects
+        print(f"Found {len(thoughts)} thoughts")
+
+        # Sort by timestamp (most recent first)
+        thoughts = sorted(
+            thoughts,
+            key=lambda t: t.properties.get("timestamp", datetime.min),
+            reverse=True
+        )
+
+        # Build markdown content
+        lines = []
+        lines.append("# Thoughts Export")
+        lines.append(f"\n**Exported**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        lines.append(f"\n**Total thoughts**: {len(thoughts)}")
+        lines.append("\n---\n")
+
+        # Group by type
+        thoughts_by_type = {}
+        for thought in thoughts:
+            thought_type = thought.properties.get("thought_type", "unknown")
+            if thought_type not in thoughts_by_type:
+                thoughts_by_type[thought_type] = []
+            thoughts_by_type[thought_type].append(thought)
+
+        # Write summary by type
+        lines.append("## Summary by Type\n")
+        for thought_type in sorted(thoughts_by_type.keys()):
+            count = len(thoughts_by_type[thought_type])
+            lines.append(f"- **{thought_type}**: {count}")
+
+        lines.append("\n---\n")
+
+        # Process each thought
+        for idx, thought in enumerate(thoughts, 1):
+            props = thought.properties
+
+            content = props.get("content", "No content")
+            thought_type = props.get("thought_type", "unknown")
+            timestamp = props.get("timestamp")
+            trigger = props.get("trigger", "")
+            emotional_state = props.get("emotional_state", "")
+            concepts = props.get("concepts", [])
+            privacy_level = props.get("privacy_level", "private")
+            context = props.get("context", "")
+
+            # Format timestamp
+            timestamp_str = timestamp.strftime('%Y-%m-%d %H:%M:%S') if timestamp else "N/A"
+
+            # Type emoji
+            type_emoji = {
+                "reflexion": "💭",
+                "question": "❓",
+                "intuition": "💡",
+                "observation": "👁️",
+                "conclusion": "✅",
+                "hypothesis": "🤔",
+                "discovery": "🔍"
+            }.get(thought_type, "📝")
+
+            # Privacy emoji
+            privacy_emoji = {
+                "private": "🔒",
+                "shared": "👥",
+                "public": "🌐"
+            }.get(privacy_level, "❓")
+
+            # Write thought entry
+            lines.append(f"## {type_emoji} Thought {idx}: {thought_type.upper()}")
+            lines.append(f"\n**Timestamp**: {timestamp_str}")
+            lines.append(f"**Privacy**: {privacy_emoji} {privacy_level}")
+
+            if trigger:
+                lines.append(f"**Trigger**: {trigger}")
+
+            if emotional_state:
+                lines.append(f"**Emotional state**: {emotional_state}")
+
+            if concepts:
+                lines.append(f"**Concepts**: {', '.join(concepts)}")
+
+            lines.append(f"\n### Content\n\n{content}\n")
+
+            if context:
+                lines.append(f"**Context**: {context}\n")
+
+            lines.append("\n---\n")
+
+        # Write to file
+        output_path = Path(output_file)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write('\n'.join(lines))
+
+        print(f"\n[OK] Exported {len(thoughts)} thoughts to {output_file}")
+
+        # Stats
+        print(f"   Types: {dict((k, len(v)) for k, v in thoughts_by_type.items())}")
+
+        privacy_stats = {}
+        for t in thoughts:
+            privacy = t.properties.get("privacy_level", "unknown")
+            privacy_stats[privacy] = privacy_stats.get(privacy, 0) + 1
+
+        print(f"   Privacy: {dict(privacy_stats)}")
+
+    finally:
+        client.close()
+
+
+if __name__ == "__main__":
+    export_thoughts_to_md()