clawdbot-workspace/goosefactory/packages/learning/src/analysis/preferences.ts

/**
 * Jake's Preference Model
 *
 * Builds a statistical model of what Jake approves vs. rejects:
 * - Approval rates by server type and pipeline stage
 * - Decision speed as quality signal
 * - Consistent approval/rejection patterns
 */

import type {
  EnrichedFeedbackEvent,
  ApprovalPatterns,
} from "../types.js";

/**
 * Build the complete approval patterns model from feedback events.
 */
export async function buildPreferenceModel(
  events: EnrichedFeedbackEvent[]
): Promise<ApprovalPatterns> {
  const decisions = events.filter((e) => e.feedback.decision);

  if (decisions.length === 0) {
    return {
      overall: { approvalRate: 0, totalDecisions: 0, avgTimeToDecisionMs: 0 },
      byServerType: {},
      byStage: {},
    };
  }

  // ─── Overall ───
  const approved = decisions.filter(
    (e) => e.feedback.decision?.decision === "approved"
  ).length;
  const avgTime =
    decisions.reduce((sum, e) => sum + e.meta.timeToDecisionMs, 0) /
    decisions.length;

  // ─── By server type ───
  const byServerType: ApprovalPatterns["byServerType"] = {};
  for (const event of decisions) {
    const serverType = event.mcpServerType ?? "unknown";
    if (!byServerType[serverType]) {
      byServerType[serverType] = {
        approved: 0,
        rejected: 0,
        needsWork: 0,
        approvalRate: 0,
      };
    }
    const entry = byServerType[serverType]!;
    const d = event.feedback.decision?.decision;
    if (d === "approved") entry.approved++;
    else if (d === "rejected") entry.rejected++;
    else if (d === "needs_work") entry.needsWork++;
  }
  for (const entry of Object.values(byServerType)) {
    const total = entry.approved + entry.rejected + entry.needsWork;
    entry.approvalRate = total > 0 ? entry.approved / total : 0;
  }

  // ─── By stage ───
  const byStage: ApprovalPatterns["byStage"] = {};
  for (const event of decisions) {
    const stage = event.pipelineStage ?? "unknown";
    if (!byStage[stage]) {
      byStage[stage] = { approvalRate: 0, avgTimeMs: 0, count: 0 };
    }
    const entry = byStage[stage]!;
    entry.count++;
  }

  // Compute rates per stage
  for (const [stage, entry] of Object.entries(byStage)) {
    const stageEvents = decisions.filter(
      (e) => (e.pipelineStage ?? "unknown") === stage
    );
    const stageApproved = stageEvents.filter(
      (e) => e.feedback.decision?.decision === "approved"
    ).length;
    entry.approvalRate =
      stageEvents.length > 0 ? stageApproved / stageEvents.length : 0;
    entry.avgTimeMs =
      stageEvents.length > 0
        ? stageEvents.reduce((sum, e) => sum + e.meta.timeToDecisionMs, 0) /
          stageEvents.length
        : 0;
  }

  return {
    overall: {
      approvalRate: approved / decisions.length,
      totalDecisions: decisions.length,
      avgTimeToDecisionMs: avgTime,
    },
    byServerType,
    byStage,
  };
}

/**
 * Analyze decision speed patterns:
 * - Fast approvals (< 30s) → Buba nailed it
 * - Fast rejections (< 30s) → Obvious flaw
 * - Slow approvals (> 5min) → Edge case
 * - Slow rejections (> 5min) → Complex problem
 */
export function analyzeDecisionSpeed(events: EnrichedFeedbackEvent[]): {
  fastApprovals: number;
  fastRejections: number;
  slowApprovals: number;
  slowRejections: number;
  avgApprovalTimeMs: number;
  avgRejectionTimeMs: number;
} {
  const FAST_THRESHOLD = 30000; // 30 seconds
  const SLOW_THRESHOLD = 300000; // 5 minutes

  const approvals = events.filter(
    (e) => e.feedback.decision?.decision === "approved"
  );
  const rejections = events.filter(
    (e) => e.feedback.decision?.decision === "rejected"
  );

  return {
    fastApprovals: approvals.filter(
      (e) => e.meta.timeToDecisionMs < FAST_THRESHOLD
    ).length,
    fastRejections: rejections.filter(
      (e) => e.meta.timeToDecisionMs < FAST_THRESHOLD
    ).length,
    slowApprovals: approvals.filter(
      (e) => e.meta.timeToDecisionMs > SLOW_THRESHOLD
    ).length,
    slowRejections: rejections.filter(
      (e) => e.meta.timeToDecisionMs > SLOW_THRESHOLD
    ).length,
    avgApprovalTimeMs:
      approvals.length > 0
        ? approvals.reduce((sum, e) => sum + e.meta.timeToDecisionMs, 0) /
          approvals.length
        : 0,
    avgRejectionTimeMs:
      rejections.length > 0
        ? rejections.reduce((sum, e) => sum + e.meta.timeToDecisionMs, 0) /
          rejections.length
        : 0,
  };
}

/**
 * Identify patterns Jake consistently approves or rejects.
 * These are high-value signals for the pre-check system.
 */
export function identifyConsistentPatterns(events: EnrichedFeedbackEvent[]): {
  alwaysApproved: string[]; // Themes/patterns with 100% approval rate
  alwaysRejected: string[]; // Themes/patterns with 0% approval rate
  highApproval: string[];   // >90% approval rate
  lowApproval: string[];    // <30% approval rate
} {
  const themeOutcomes = new Map<string, { approved: number; total: number }>();

  for (const event of events) {
    const themes = event._themes ?? [];
    const isApproved = event.feedback.decision?.decision === "approved";

    for (const theme of themes) {
      if (!themeOutcomes.has(theme)) {
        themeOutcomes.set(theme, { approved: 0, total: 0 });
      }
      const entry = themeOutcomes.get(theme)!;
      entry.total++;
      if (isApproved) entry.approved++;
    }
  }

  const alwaysApproved: string[] = [];
  const alwaysRejected: string[] = [];
  const highApproval: string[] = [];
  const lowApproval: string[] = [];

  for (const [theme, data] of themeOutcomes) {
    if (data.total < 3) continue; // Need minimum data

    const rate = data.approved / data.total;
    if (rate === 1.0) alwaysApproved.push(theme);
    else if (rate === 0.0) alwaysRejected.push(theme);
    else if (rate > 0.9) highApproval.push(theme);
    else if (rate < 0.3) lowApproval.push(theme);
  }

  return { alwaysApproved, alwaysRejected, highApproval, lowApproval };
}