.agents/memory/scripts/export_embeddings.py

#!/usr/bin/env python3
"""Export embeddings for visualization."""

import json
import sqlite3
import sys
from pathlib import Path

try:
    import zvec
    ZVEC_AVAILABLE = True
except ImportError:
    ZVEC_AVAILABLE = False

AGENTS_DIR = Path.home() / ".agents"
DB_PATH = AGENTS_DIR / "memory" / "memories.db"
ZVEC_PATH = AGENTS_DIR / "memory" / "vectors.zvec"


def export_embeddings():
    """Export all embeddings with their memory data."""
    if not ZVEC_AVAILABLE:
        return {"error": "zvec not installed (requires Python 3.10-3.12)", "embeddings": []}

    if not DB_PATH.exists():
        return {"error": "No database found", "embeddings": []}

    if not ZVEC_PATH.exists():
        return {"error": "No vector store found", "embeddings": []}

    # Open database
    db = sqlite3.connect(str(DB_PATH))
    db.row_factory = sqlite3.Row

    # Get all memories
    rows = db.execute("""
        SELECT id, content, who, importance, tags, created_at
        FROM memories
        ORDER BY created_at DESC
    """).fetchall()

    # Open zvec collection
    try:
        collection = zvec.open(path=str(ZVEC_PATH))
    except Exception as e:
        db.close()
        return {"error": f"Failed to open vector store: {e}", "embeddings": []}

    embeddings = []

    for row in rows:
        memory_id = str(row["id"])

        # Try to get vector from zvec
        # Use a self-query: search for exact match
        try:
            # Unfortunately zvec doesn't have a direct get-by-id
            # We'll use the memory content to search and verify ID
            # For now, skip the vector data and just include metadata
            # The UMAP will run client-side only if we have vectors

            embeddings.append({
                "id": memory_id,
                "text": row["content"],
                "who": row["who"] or "unknown",
                "importance": row["importance"] or 0.5,
                "tags": row["tags"],
                "createdAt": row["created_at"],
                # Vector will be loaded separately or we compute PCA server-side
            })
        except Exception:
            continue

    db.close()

    return {"embeddings": embeddings, "count": len(embeddings)}


def export_with_vectors():
    """Export embeddings with actual vector data for UMAP."""
    if not DB_PATH.exists():
        return {"error": "No database found", "embeddings": []}

    if not ZVEC_PATH.exists():
        return {"error": "No vector store found", "embeddings": []}

    # Import embeddings module for re-embedding
    sys.path.insert(0, str(AGENTS_DIR / "memory" / "scripts"))
    from embeddings import embed
    import yaml

    # Load config
    config_path = AGENTS_DIR / "config.yaml"
    config = {}
    if config_path.exists():
        with open(config_path) as f:
            config = yaml.safe_load(f)

    # Open database
    db = sqlite3.connect(str(DB_PATH))
    db.row_factory = sqlite3.Row

    # Get all memories
    rows = db.execute("""
        SELECT id, content, who, importance, tags, created_at
        FROM memories
        ORDER BY created_at DESC
        LIMIT 200
    """).fetchall()

    embeddings = []

    for row in rows:
        memory_id = str(row["id"])
        content = row["content"]

        try:
            # Re-embed to get vector (cached by ollama)
            vector, _ = embed(content, config)

            embeddings.append({
                "id": memory_id,
                "text": content[:200],  # Truncate for JSON size
                "who": row["who"] or "unknown",
                "importance": row["importance"] or 0.5,
                "tags": row["tags"],
                "createdAt": row["created_at"],
                "vector": vector,
            })
        except Exception as e:
            # Skip memories we can't embed
            continue

    db.close()

    return {"embeddings": embeddings, "count": len(embeddings)}


if __name__ == "__main__":
    # If --with-vectors flag, include vector data
    if len(sys.argv) > 1 and sys.argv[1] == "--with-vectors":
        result = export_with_vectors()
    else:
        result = export_embeddings()

    print(json.dumps(result))