#!/usr/bin/env python3 """ Memory maintenance tasks: consolidation, decay, and pruning. Run weekly via cron or manually. """ import sqlite3 import os import stat import sys from datetime import datetime, timedelta from typing import List, Dict, Any DB_PATH = os.path.expanduser("~/.clawdbot/memory/main.sqlite") DB_DIR = os.path.dirname(DB_PATH) # Configuration DECAY_RATE = 0.1 # Reduce confidence by 10% per month of inactivity MIN_CONFIDENCE = 0.3 # Below this, memories become prune candidates PRUNE_AFTER_DAYS = 180 # Prune low-confidence memories older than this MAX_MEMORIES_PER_GUILD = 10000 # Hard cap per guild # Security: Required permissions SECURE_FILE_MODE = 0o600 # Owner read/write only SECURE_DIR_MODE = 0o700 # Owner read/write/execute only def ensure_secure_permissions(warn: bool = True) -> list: """Check and auto-fix permissions on database and directory.""" fixes = [] if os.path.exists(DB_DIR): current_mode = stat.S_IMODE(os.stat(DB_DIR).st_mode) if current_mode != SECURE_DIR_MODE: os.chmod(DB_DIR, SECURE_DIR_MODE) msg = f"[SECURITY] Fixed directory permissions: {DB_DIR}" fixes.append(msg) if warn: print(msg, file=sys.stderr) if os.path.exists(DB_PATH): current_mode = stat.S_IMODE(os.stat(DB_PATH).st_mode) if current_mode != SECURE_FILE_MODE: os.chmod(DB_PATH, SECURE_FILE_MODE) msg = f"[SECURITY] Fixed database permissions: {DB_PATH}" fixes.append(msg) if warn: print(msg, file=sys.stderr) return fixes def get_db(): """Get database connection with automatic security checks.""" ensure_secure_permissions(warn=True) conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row return conn def decay_confidence(): """ Reduce confidence of memories not accessed in 30+ days. Memories that are accessed regularly maintain confidence. Uses time-proportional decay based on months of inactivity. """ db = get_db() try: now = datetime.now() cutoff = int((now - timedelta(days=30)).timestamp()) cursor = db.execute(""" SELECT id, confidence, last_accessed, created_at FROM memories WHERE superseded_by IS NULL AND confidence > ? AND (last_accessed < ? OR (last_accessed IS NULL AND created_at < ?)) """, (MIN_CONFIDENCE, cutoff, cutoff)) to_decay = cursor.fetchall() if not to_decay: print("No memories to decay.") return 0 decayed = 0 for row in to_decay: # Calculate months of inactivity for time-proportional decay last_active = row["last_accessed"] or row["created_at"] months_inactive = (now.timestamp() - last_active) / (30 * 24 * 3600) decay_amount = DECAY_RATE * max(1, months_inactive) new_confidence = max(MIN_CONFIDENCE, row["confidence"] - decay_amount) db.execute( "UPDATE memories SET confidence = ? WHERE id = ?", (new_confidence, row["id"]) ) decayed += 1 db.commit() print(f"Decayed confidence for {decayed} memories.") return decayed except Exception: db.rollback() raise finally: db.close() def prune_low_confidence(): """ Soft-delete memories with very low confidence that are old. Sets superseded_by to 0 (special value meaning 'pruned'). """ db = get_db() try: cutoff = int((datetime.now() - timedelta(days=PRUNE_AFTER_DAYS)).timestamp()) cursor = db.execute(""" SELECT id, content, confidence, created_at FROM memories WHERE superseded_by IS NULL AND confidence <= ? AND created_at < ? """, (MIN_CONFIDENCE, cutoff)) to_prune = cursor.fetchall() if not to_prune: print("No memories to prune.") return 0 print(f"Pruning {len(to_prune)} low-confidence memories:") for row in to_prune: print(f" [{row['id']}] conf={row['confidence']:.2f}: {row['content'][:60]}...") # Mark as pruned (superseded_by = 0) ids = [row["id"] for row in to_prune] placeholders = ",".join("?" * len(ids)) db.execute(f"UPDATE memories SET superseded_by = 0 WHERE id IN ({placeholders})", ids) db.commit() return len(to_prune) except Exception: db.rollback() raise finally: db.close() def enforce_guild_limits(): """ If any guild exceeds MAX_MEMORIES_PER_GUILD, prune oldest low-confidence ones. """ db = get_db() try: # Get counts per guild cursor = db.execute(""" SELECT COALESCE(guild_id, 'global') as guild, COUNT(*) as cnt FROM memories WHERE superseded_by IS NULL GROUP BY guild_id HAVING cnt > ? """, (MAX_MEMORIES_PER_GUILD,)) over_limit = cursor.fetchall() total_pruned = 0 for row in over_limit: guild = row["guild"] excess = row["cnt"] - MAX_MEMORIES_PER_GUILD print(f"Guild {guild} has {row['cnt']} memories, pruning {excess}...") # Get lowest confidence, oldest memories for this guild if guild == "global": cursor = db.execute(""" SELECT id FROM memories WHERE superseded_by IS NULL AND guild_id IS NULL ORDER BY confidence ASC, created_at ASC LIMIT ? """, (excess,)) else: cursor = db.execute(""" SELECT id FROM memories WHERE superseded_by IS NULL AND guild_id = ? ORDER BY confidence ASC, created_at ASC LIMIT ? """, (guild, excess)) to_prune = [r["id"] for r in cursor.fetchall()] if to_prune: placeholders = ",".join("?" * len(to_prune)) db.execute(f"UPDATE memories SET superseded_by = 0 WHERE id IN ({placeholders})", to_prune) total_pruned += len(to_prune) db.commit() return total_pruned except Exception: db.rollback() raise finally: db.close() def get_maintenance_stats() -> Dict[str, Any]: """Get stats relevant to maintenance decisions.""" db = get_db() try: stats = {} # Total active cursor = db.execute("SELECT COUNT(*) FROM memories WHERE superseded_by IS NULL") stats["active_memories"] = cursor.fetchone()[0] # Superseded/pruned cursor = db.execute("SELECT COUNT(*) FROM memories WHERE superseded_by IS NOT NULL") stats["superseded_memories"] = cursor.fetchone()[0] # Low confidence (prune candidates) cursor = db.execute(""" SELECT COUNT(*) FROM memories WHERE superseded_by IS NULL AND confidence <= ? """, (MIN_CONFIDENCE,)) stats["low_confidence"] = cursor.fetchone()[0] # Never accessed cursor = db.execute(""" SELECT COUNT(*) FROM memories WHERE superseded_by IS NULL AND last_accessed IS NULL """) stats["never_accessed"] = cursor.fetchone()[0] # Confidence distribution cursor = db.execute(""" SELECT CASE WHEN confidence > 0.8 THEN 'high' WHEN confidence > 0.5 THEN 'medium' ELSE 'low' END as bucket, COUNT(*) FROM memories WHERE superseded_by IS NULL GROUP BY bucket """) stats["confidence_distribution"] = {row[0]: row[1] for row in cursor} # Per guild counts cursor = db.execute(""" SELECT COALESCE(guild_id, 'global') as guild, COUNT(*) FROM memories WHERE superseded_by IS NULL GROUP BY guild_id """) stats["per_guild"] = {row[0]: row[1] for row in cursor} return stats finally: db.close() def run_weekly_maintenance(): """ Run all maintenance tasks. Call this from cron weekly. """ print(f"=== Memory Maintenance: {datetime.now().isoformat()} ===\n") print("1. Decaying confidence for inactive memories...") decayed = decay_confidence() print("\n2. Pruning very low confidence old memories...") pruned = prune_low_confidence() print("\n3. Enforcing per-guild limits...") limited = enforce_guild_limits() print("\n4. Current stats:") stats = get_maintenance_stats() for key, value in stats.items(): print(f" {key}: {value}") print(f"\n=== Done: decayed={decayed}, pruned={pruned}, limited={limited} ===") if __name__ == "__main__": import sys if len(sys.argv) < 2: print("Usage:") print(" python memory-maintenance.py run # Run all maintenance") print(" python memory-maintenance.py stats # Show maintenance stats") print(" python memory-maintenance.py decay # Only decay confidence") print(" python memory-maintenance.py prune # Only prune low confidence") sys.exit(1) cmd = sys.argv[1] if cmd == "run": run_weekly_maintenance() elif cmd == "stats": import json stats = get_maintenance_stats() print(json.dumps(stats, indent=2)) elif cmd == "decay": decay_confidence() elif cmd == "prune": prune_low_confidence() else: print(f"Unknown command: {cmd}") sys.exit(1)