147 lines
4.2 KiB
Python
147 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Export embeddings for visualization."""
|
|
|
|
import json
|
|
import sqlite3
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
try:
|
|
import zvec
|
|
ZVEC_AVAILABLE = True
|
|
except ImportError:
|
|
ZVEC_AVAILABLE = False
|
|
|
|
AGENTS_DIR = Path.home() / ".agents"
|
|
DB_PATH = AGENTS_DIR / "memory" / "memories.db"
|
|
ZVEC_PATH = AGENTS_DIR / "memory" / "vectors.zvec"
|
|
|
|
|
|
def export_embeddings():
|
|
"""Export all embeddings with their memory data."""
|
|
if not ZVEC_AVAILABLE:
|
|
return {"error": "zvec not installed (requires Python 3.10-3.12)", "embeddings": []}
|
|
|
|
if not DB_PATH.exists():
|
|
return {"error": "No database found", "embeddings": []}
|
|
|
|
if not ZVEC_PATH.exists():
|
|
return {"error": "No vector store found", "embeddings": []}
|
|
|
|
# Open database
|
|
db = sqlite3.connect(str(DB_PATH))
|
|
db.row_factory = sqlite3.Row
|
|
|
|
# Get all memories
|
|
rows = db.execute("""
|
|
SELECT id, content, who, importance, tags, created_at
|
|
FROM memories
|
|
ORDER BY created_at DESC
|
|
""").fetchall()
|
|
|
|
# Open zvec collection
|
|
try:
|
|
collection = zvec.open(path=str(ZVEC_PATH))
|
|
except Exception as e:
|
|
db.close()
|
|
return {"error": f"Failed to open vector store: {e}", "embeddings": []}
|
|
|
|
embeddings = []
|
|
|
|
for row in rows:
|
|
memory_id = str(row["id"])
|
|
|
|
# Try to get vector from zvec
|
|
# Use a self-query: search for exact match
|
|
try:
|
|
# Unfortunately zvec doesn't have a direct get-by-id
|
|
# We'll use the memory content to search and verify ID
|
|
# For now, skip the vector data and just include metadata
|
|
# The UMAP will run client-side only if we have vectors
|
|
|
|
embeddings.append({
|
|
"id": memory_id,
|
|
"text": row["content"],
|
|
"who": row["who"] or "unknown",
|
|
"importance": row["importance"] or 0.5,
|
|
"tags": row["tags"],
|
|
"createdAt": row["created_at"],
|
|
# Vector will be loaded separately or we compute PCA server-side
|
|
})
|
|
except Exception:
|
|
continue
|
|
|
|
db.close()
|
|
|
|
return {"embeddings": embeddings, "count": len(embeddings)}
|
|
|
|
|
|
def export_with_vectors():
|
|
"""Export embeddings with actual vector data for UMAP."""
|
|
if not DB_PATH.exists():
|
|
return {"error": "No database found", "embeddings": []}
|
|
|
|
if not ZVEC_PATH.exists():
|
|
return {"error": "No vector store found", "embeddings": []}
|
|
|
|
# Import embeddings module for re-embedding
|
|
sys.path.insert(0, str(AGENTS_DIR / "memory" / "scripts"))
|
|
from embeddings import embed
|
|
import yaml
|
|
|
|
# Load config
|
|
config_path = AGENTS_DIR / "config.yaml"
|
|
config = {}
|
|
if config_path.exists():
|
|
with open(config_path) as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
# Open database
|
|
db = sqlite3.connect(str(DB_PATH))
|
|
db.row_factory = sqlite3.Row
|
|
|
|
# Get all memories
|
|
rows = db.execute("""
|
|
SELECT id, content, who, importance, tags, created_at
|
|
FROM memories
|
|
ORDER BY created_at DESC
|
|
LIMIT 200
|
|
""").fetchall()
|
|
|
|
embeddings = []
|
|
|
|
for row in rows:
|
|
memory_id = str(row["id"])
|
|
content = row["content"]
|
|
|
|
try:
|
|
# Re-embed to get vector (cached by ollama)
|
|
vector, _ = embed(content, config)
|
|
|
|
embeddings.append({
|
|
"id": memory_id,
|
|
"text": content[:200], # Truncate for JSON size
|
|
"who": row["who"] or "unknown",
|
|
"importance": row["importance"] or 0.5,
|
|
"tags": row["tags"],
|
|
"createdAt": row["created_at"],
|
|
"vector": vector,
|
|
})
|
|
except Exception as e:
|
|
# Skip memories we can't embed
|
|
continue
|
|
|
|
db.close()
|
|
|
|
return {"embeddings": embeddings, "count": len(embeddings)}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# If --with-vectors flag, include vector data
|
|
if len(sys.argv) > 1 and sys.argv[1] == "--with-vectors":
|
|
result = export_with_vectors()
|
|
else:
|
|
result = export_embeddings()
|
|
|
|
print(json.dumps(result))
|