clawdbot-workspace/memory-maintenance.py
2026-01-28 23:00:58 -05:00

306 lines
9.6 KiB
Python

#!/usr/bin/env python3
"""
Memory maintenance tasks: consolidation, decay, and pruning.
Run weekly via cron or manually.
"""
import sqlite3
import os
import stat
import sys
from datetime import datetime, timedelta
from typing import List, Dict, Any
DB_PATH = os.path.expanduser("~/.clawdbot/memory/main.sqlite")
DB_DIR = os.path.dirname(DB_PATH)
# Configuration
DECAY_RATE = 0.1 # Reduce confidence by 10% per month of inactivity
MIN_CONFIDENCE = 0.3 # Below this, memories become prune candidates
PRUNE_AFTER_DAYS = 180 # Prune low-confidence memories older than this
MAX_MEMORIES_PER_GUILD = 10000 # Hard cap per guild
# Security: Required permissions
SECURE_FILE_MODE = 0o600 # Owner read/write only
SECURE_DIR_MODE = 0o700 # Owner read/write/execute only
def ensure_secure_permissions(warn: bool = True) -> list:
"""Check and auto-fix permissions on database and directory."""
fixes = []
if os.path.exists(DB_DIR):
current_mode = stat.S_IMODE(os.stat(DB_DIR).st_mode)
if current_mode != SECURE_DIR_MODE:
os.chmod(DB_DIR, SECURE_DIR_MODE)
msg = f"[SECURITY] Fixed directory permissions: {DB_DIR}"
fixes.append(msg)
if warn:
print(msg, file=sys.stderr)
if os.path.exists(DB_PATH):
current_mode = stat.S_IMODE(os.stat(DB_PATH).st_mode)
if current_mode != SECURE_FILE_MODE:
os.chmod(DB_PATH, SECURE_FILE_MODE)
msg = f"[SECURITY] Fixed database permissions: {DB_PATH}"
fixes.append(msg)
if warn:
print(msg, file=sys.stderr)
return fixes
def get_db():
"""Get database connection with automatic security checks."""
ensure_secure_permissions(warn=True)
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def decay_confidence():
"""
Reduce confidence of memories not accessed in 30+ days.
Memories that are accessed regularly maintain confidence.
Uses time-proportional decay based on months of inactivity.
"""
db = get_db()
try:
now = datetime.now()
cutoff = int((now - timedelta(days=30)).timestamp())
cursor = db.execute("""
SELECT id, confidence, last_accessed, created_at
FROM memories
WHERE superseded_by IS NULL
AND confidence > ?
AND (last_accessed < ? OR (last_accessed IS NULL AND created_at < ?))
""", (MIN_CONFIDENCE, cutoff, cutoff))
to_decay = cursor.fetchall()
if not to_decay:
print("No memories to decay.")
return 0
decayed = 0
for row in to_decay:
# Calculate months of inactivity for time-proportional decay
last_active = row["last_accessed"] or row["created_at"]
months_inactive = (now.timestamp() - last_active) / (30 * 24 * 3600)
decay_amount = DECAY_RATE * max(1, months_inactive)
new_confidence = max(MIN_CONFIDENCE, row["confidence"] - decay_amount)
db.execute(
"UPDATE memories SET confidence = ? WHERE id = ?",
(new_confidence, row["id"])
)
decayed += 1
db.commit()
print(f"Decayed confidence for {decayed} memories.")
return decayed
except Exception:
db.rollback()
raise
finally:
db.close()
def prune_low_confidence():
"""
Soft-delete memories with very low confidence that are old.
Sets superseded_by to 0 (special value meaning 'pruned').
"""
db = get_db()
try:
cutoff = int((datetime.now() - timedelta(days=PRUNE_AFTER_DAYS)).timestamp())
cursor = db.execute("""
SELECT id, content, confidence, created_at
FROM memories
WHERE superseded_by IS NULL
AND confidence <= ?
AND created_at < ?
""", (MIN_CONFIDENCE, cutoff))
to_prune = cursor.fetchall()
if not to_prune:
print("No memories to prune.")
return 0
print(f"Pruning {len(to_prune)} low-confidence memories:")
for row in to_prune:
print(f" [{row['id']}] conf={row['confidence']:.2f}: {row['content'][:60]}...")
# Mark as pruned (superseded_by = 0)
ids = [row["id"] for row in to_prune]
placeholders = ",".join("?" * len(ids))
db.execute(f"UPDATE memories SET superseded_by = 0 WHERE id IN ({placeholders})", ids)
db.commit()
return len(to_prune)
except Exception:
db.rollback()
raise
finally:
db.close()
def enforce_guild_limits():
"""
If any guild exceeds MAX_MEMORIES_PER_GUILD, prune oldest low-confidence ones.
"""
db = get_db()
try:
# Get counts per guild
cursor = db.execute("""
SELECT COALESCE(guild_id, 'global') as guild, COUNT(*) as cnt
FROM memories
WHERE superseded_by IS NULL
GROUP BY guild_id
HAVING cnt > ?
""", (MAX_MEMORIES_PER_GUILD,))
over_limit = cursor.fetchall()
total_pruned = 0
for row in over_limit:
guild = row["guild"]
excess = row["cnt"] - MAX_MEMORIES_PER_GUILD
print(f"Guild {guild} has {row['cnt']} memories, pruning {excess}...")
# Get lowest confidence, oldest memories for this guild
if guild == "global":
cursor = db.execute("""
SELECT id FROM memories
WHERE superseded_by IS NULL AND guild_id IS NULL
ORDER BY confidence ASC, created_at ASC
LIMIT ?
""", (excess,))
else:
cursor = db.execute("""
SELECT id FROM memories
WHERE superseded_by IS NULL AND guild_id = ?
ORDER BY confidence ASC, created_at ASC
LIMIT ?
""", (guild, excess))
to_prune = [r["id"] for r in cursor.fetchall()]
if to_prune:
placeholders = ",".join("?" * len(to_prune))
db.execute(f"UPDATE memories SET superseded_by = 0 WHERE id IN ({placeholders})", to_prune)
total_pruned += len(to_prune)
db.commit()
return total_pruned
except Exception:
db.rollback()
raise
finally:
db.close()
def get_maintenance_stats() -> Dict[str, Any]:
"""Get stats relevant to maintenance decisions."""
db = get_db()
try:
stats = {}
# Total active
cursor = db.execute("SELECT COUNT(*) FROM memories WHERE superseded_by IS NULL")
stats["active_memories"] = cursor.fetchone()[0]
# Superseded/pruned
cursor = db.execute("SELECT COUNT(*) FROM memories WHERE superseded_by IS NOT NULL")
stats["superseded_memories"] = cursor.fetchone()[0]
# Low confidence (prune candidates)
cursor = db.execute("""
SELECT COUNT(*) FROM memories
WHERE superseded_by IS NULL AND confidence <= ?
""", (MIN_CONFIDENCE,))
stats["low_confidence"] = cursor.fetchone()[0]
# Never accessed
cursor = db.execute("""
SELECT COUNT(*) FROM memories
WHERE superseded_by IS NULL AND last_accessed IS NULL
""")
stats["never_accessed"] = cursor.fetchone()[0]
# Confidence distribution
cursor = db.execute("""
SELECT
CASE
WHEN confidence > 0.8 THEN 'high'
WHEN confidence > 0.5 THEN 'medium'
ELSE 'low'
END as bucket,
COUNT(*)
FROM memories
WHERE superseded_by IS NULL
GROUP BY bucket
""")
stats["confidence_distribution"] = {row[0]: row[1] for row in cursor}
# Per guild counts
cursor = db.execute("""
SELECT COALESCE(guild_id, 'global') as guild, COUNT(*)
FROM memories
WHERE superseded_by IS NULL
GROUP BY guild_id
""")
stats["per_guild"] = {row[0]: row[1] for row in cursor}
return stats
finally:
db.close()
def run_weekly_maintenance():
"""
Run all maintenance tasks. Call this from cron weekly.
"""
print(f"=== Memory Maintenance: {datetime.now().isoformat()} ===\n")
print("1. Decaying confidence for inactive memories...")
decayed = decay_confidence()
print("\n2. Pruning very low confidence old memories...")
pruned = prune_low_confidence()
print("\n3. Enforcing per-guild limits...")
limited = enforce_guild_limits()
print("\n4. Current stats:")
stats = get_maintenance_stats()
for key, value in stats.items():
print(f" {key}: {value}")
print(f"\n=== Done: decayed={decayed}, pruned={pruned}, limited={limited} ===")
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage:")
print(" python memory-maintenance.py run # Run all maintenance")
print(" python memory-maintenance.py stats # Show maintenance stats")
print(" python memory-maintenance.py decay # Only decay confidence")
print(" python memory-maintenance.py prune # Only prune low confidence")
sys.exit(1)
cmd = sys.argv[1]
if cmd == "run":
run_weekly_maintenance()
elif cmd == "stats":
import json
stats = get_maintenance_stats()
print(json.dumps(stats, indent=2))
elif cmd == "decay":
decay_confidence()
elif cmd == "prune":
prune_low_confidence()
else:
print(f"Unknown command: {cmd}")
sys.exit(1)