.agents/constitution/constitution_analysis/analysis/db_manager.py

import sqlite3
import json
import numpy as np
from typing import List, Dict, Any, Optional


class DatabaseManager:
    def __init__(self, db_path: str = "constitution.db"):
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path)
        self.conn.row_factory = sqlite3.Row

    def create_tables(self):
        cursor = self.conn.cursor()

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS sections (
                id INTEGER PRIMARY KEY,
                section_type TEXT,
                parent_id INTEGER,
                title TEXT,
                content TEXT,
                line_start INTEGER,
                line_end INTEGER,
                hierarchy_level INTEGER,
                path TEXT,
                FOREIGN KEY (parent_id) REFERENCES sections(id)
            );
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS variables (
                id INTEGER PRIMARY KEY,
                name TEXT UNIQUE,
                category TEXT,
                priority_level INTEGER,
                is_hard_constraint BOOLEAN,
                principal_assignment TEXT,
                frequency INTEGER DEFAULT 0,
                description TEXT
            );
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS variable_occurrences (
                id INTEGER PRIMARY KEY,
                variable_id INTEGER,
                section_id INTEGER,
                sentence_id INTEGER,
                context TEXT,
                FOREIGN KEY (variable_id) REFERENCES variables(id),
                FOREIGN KEY (section_id) REFERENCES sections(id)
            );
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS sentences (
                id INTEGER PRIMARY KEY,
                section_id INTEGER,
                text TEXT,
                sentence_number INTEGER,
                line_number INTEGER,
                FOREIGN KEY (section_id) REFERENCES sections(id)
            );
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS embeddings (
                id INTEGER PRIMARY KEY,
                content_id INTEGER,
                content_type TEXT,
                embedding BLOB,
                embedding_dim INTEGER DEFAULT 768,
                chunk_start INTEGER,
                chunk_end INTEGER,
                FOREIGN KEY (content_id) REFERENCES sections(id) ON DELETE CASCADE
            );
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS similarity (
                id INTEGER PRIMARY KEY,
                content_id_1 INTEGER,
                content_id_2 INTEGER,
                similarity_score REAL,
                FOREIGN KEY (content_id_1) REFERENCES sections(id),
                FOREIGN KEY (content_id_2) REFERENCES sections(id)
            );
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS statistics (
                id INTEGER PRIMARY KEY,
                metric_name TEXT UNIQUE,
                metric_value REAL,
                json_data TEXT
            );
        """)

        self.conn.commit()

    def populate(
        self,
        sections: List[Dict],
        sentences: List[Dict],
        variables: List[Dict],
        constraints: List[Dict],
    ):
        cursor = self.conn.cursor()

        section_id_map = {}
        for i, section in enumerate(sections, 1):
            parent_id = (
                section_id_map.get(section.get("parent_id"))
                if section.get("parent_id")
                else None
            )
            cursor.execute(
                """
                INSERT INTO sections (id, section_type, parent_id, title, content, line_start, line_end, hierarchy_level, path)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
            """,
                (
                    i,
                    section.get("section_type"),
                    parent_id,
                    section.get("title"),
                    section.get("content"),
                    section.get("line_start"),
                    section.get("line_end"),
                    section.get("hierarchy_level"),
                    section.get("path"),
                ),
            )
            section_id_map[i] = i

        for i, sentence in enumerate(sentences, 1):
            section_ref_id = section.get("section_id")
            cursor.execute(
                """
                INSERT INTO sentences (id, section_id, text, sentence_number, line_number)
                VALUES (?, ?, ?, ?, ?)
            """,
                (
                    i,
                    section_ref_id,
                    sentence.get("text"),
                    sentence.get("sentence_number"),
                    sentence.get("line_number"),
                ),
            )

        var_id_map = {}
        for i, var in enumerate(variables, 1):
            cursor.execute(
                """
                INSERT INTO variables (id, name, category, priority_level, is_hard_constraint, principal_assignment, frequency, description)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            """,
                (
                    i,
                    var.get("name"),
                    var.get("category"),
                    var.get("priority_level"),
                    var.get("is_hard_constraint"),
                    var.get("principal_assignment"),
                    var.get("frequency", 0),
                    var.get("description"),
                ),
            )
            var_id_map[var.get("name")] = i

        self.conn.commit()

    def get_sections_with_embeddings(self) -> List[Dict]:
        cursor = self.conn.cursor()
        cursor.execute("SELECT * FROM sections")
        rows = cursor.fetchall()
        sections = []
        for row in rows:
            section = dict(row)
            cursor.execute(
                "SELECT * FROM embeddings WHERE content_id = ?", (section["id"],)
            )
            embeddings = cursor.fetchall()
            section["embeddings"] = [dict(e) for e in embeddings]
            sections.append(section)
        return sections

    def get_variables(self) -> List[Dict]:
        cursor = self.conn.cursor()
        cursor.execute("SELECT * FROM variables")
        return [dict(row) for row in cursor.fetchall()]

    def get_sentences(self) -> List[Dict]:
        cursor = self.conn.cursor()
        cursor.execute("SELECT * FROM sentences")
        return [dict(row) for row in cursor.fetchall()]

    def add_embedding(
        self,
        content_id: int,
        content_type: str,
        embedding: np.ndarray,
        chunk_start: Optional[int] = None,
        chunk_end: Optional[int] = None,
    ):
        cursor = self.conn.cursor()
        embedding_blob = embedding.tobytes()
        cursor.execute(
            """
            INSERT INTO embeddings (content_id, content_type, embedding, embedding_dim, chunk_start, chunk_end)
            VALUES (?, ?, ?, ?, ?, ?)
        """,
            (
                content_id,
                content_type,
                embedding_blob,
                len(embedding),
                chunk_start,
                chunk_end,
            ),
        )
        self.conn.commit()

    def get_embedding(self, content_id: int) -> Optional[np.ndarray]:
        cursor = self.conn.cursor()
        cursor.execute(
            "SELECT embedding FROM embeddings WHERE content_id = ? LIMIT 1",
            (content_id,),
        )
        row = cursor.fetchone()
        if row:
            return np.frombuffer(row["embedding"], dtype=np.float32)
        return None

    def add_similarity(self, content_id_1: int, content_id_2: int, score: float):
        cursor = self.conn.cursor()
        cursor.execute(
            """
            INSERT INTO similarity (content_id_1, content_id_2, similarity_score)
            VALUES (?, ?, ?)
        """,
            (content_id_1, content_id_2, score),
        )
        self.conn.commit()

    def get_statistics(self, metric_name: str) -> Optional[Dict]:
        cursor = self.conn.cursor()
        cursor.execute("SELECT * FROM statistics WHERE metric_name = ?", (metric_name,))
        row = cursor.fetchone()
        if row:
            data = dict(row)
            if data.get("json_data"):
                data["json_data"] = json.loads(data["json_data"])
            return data
        return None

    def set_statistics(
        self, metric_name: str, metric_value: float, json_data: Optional[Dict] = None
    ):
        cursor = self.conn.cursor()
        json_str = json.dumps(json_data) if json_data else None
        cursor.execute(
            """
            INSERT OR REPLACE INTO statistics (metric_name, metric_value, json_data)
            VALUES (?, ?, ?)
        """,
            (metric_name, metric_value, json_str),
        )
        self.conn.commit()

    def close(self):
        self.conn.close()