clawdbot-workspace/mcp-eval-agent-4-report.json
2026-02-05 23:01:36 -05:00

149 lines
6.3 KiB
JSON

{
"evaluations": [
{
"mcp": "fieldedge",
"stage": 5,
"evidence": "Compiles cleanly. Has 7 implemented tools (list_work_orders, get_work_order, create_work_order, list_customers, list_technicians, list_invoices, list_equipment) with full API client. Has comprehensive README with setup instructions. 393 lines of implementation. Uses API key auth (simpler). Can start with `node dist/index.js`.",
"blockers": [
"No tests - can't verify tools actually work",
"No MCP Apps (no ui/ directory)",
"Not verified against real API",
"No integration examples"
],
"next_action": "Create test suite using mock API responses for each tool to verify Stage 5 → Stage 6"
},
{
"mcp": "freshbooks",
"stage": 4,
"evidence": "Compiles cleanly. Has 8 tool definitions with API client implementation (453 lines). Uses OAuth access token which is harder to obtain. Has full CRUD methods for invoices, clients, expenses, time entries.",
"blockers": [
"No README - zero documentation on setup",
"OAuth required - can't just use with API key",
"No tests",
"No MCP Apps",
"Can't verify if tools work without real OAuth flow"
],
"next_action": "Write README.md with OAuth setup instructions + test with real FreshBooks sandbox account"
},
{
"mcp": "freshdesk",
"stage": 5,
"evidence": "Compiles cleanly. Has 8 implemented tools with API client. Uses simple API key auth (good). Clean implementation with proper error handling.",
"blockers": [
"No README - no documentation",
"No tests",
"No MCP Apps",
"Haven't verified tools against real API"
],
"next_action": "Create README.md documenting API key acquisition + add basic test coverage"
},
{
"mcp": "gusto",
"stage": 4,
"evidence": "Compiles cleanly. Has 7 tools implemented. Uses OAuth access token. 280+ lines of implementation with proper API client structure.",
"blockers": [
"No README - zero setup docs",
"OAuth required - complex setup barrier",
"No tests",
"No MCP Apps",
"Can't test without OAuth credentials"
],
"next_action": "Document OAuth flow in README + create integration test with Gusto sandbox"
},
{
"mcp": "helpscout",
"stage": 4,
"evidence": "Compiles cleanly. Has 7 tools defined. Uses OAuth 2.0 bearer tokens. Has conversation, customer, mailbox endpoints implemented.",
"blockers": [
"No README",
"OAuth required",
"No tests",
"No MCP Apps",
"OAuth complexity prevents immediate use"
],
"next_action": "Write README with OAuth app creation steps + validate against Help Scout API docs"
},
{
"mcp": "housecall-pro",
"stage": 5,
"evidence": "Compiles cleanly. Has 8 implemented tools (jobs, estimates, customers, invoices, employees). Has good README with setup instructions (393 lines total). Uses simple API key auth. Documentation explains MAX plan requirement.",
"blockers": [
"No tests",
"No MCP Apps",
"Not verified against real API",
"README could include example responses"
],
"next_action": "Add test suite with mock API responses to verify Stage 5 → Stage 6"
},
{
"mcp": "jobber",
"stage": 4,
"evidence": "Compiles cleanly. Has 8 tools with API client. Uses OAuth access token. Implementation covers jobs, clients, quotes, visits, invoices.",
"blockers": [
"No README",
"OAuth required - barrier to immediate use",
"No tests",
"No MCP Apps"
],
"next_action": "Create README documenting OAuth setup + test with Jobber sandbox environment"
},
{
"mcp": "keap",
"stage": 4,
"evidence": "Compiles cleanly. Has 8 tools implemented. Uses OAuth2 bearer token. Covers contacts, opportunities, tasks, emails, tags, campaigns, notes, appointments.",
"blockers": [
"No README",
"OAuth2 required",
"No tests",
"No MCP Apps",
"Complex auth prevents quick testing"
],
"next_action": "Document OAuth2 app registration process + create integration test suite"
},
{
"mcp": "lightspeed",
"stage": 4,
"evidence": "Compiles cleanly. Has 8 tools for retail operations. Uses OAuth2 authentication. Covers products, customers, sales, inventory, categories.",
"blockers": [
"No README",
"OAuth2 authentication barrier",
"No tests",
"No MCP Apps",
"Account ID required in addition to OAuth token"
],
"next_action": "Create comprehensive README with OAuth setup + account ID configuration"
},
{
"mcp": "mailchimp",
"stage": 5,
"evidence": "Compiles cleanly. Has 8 tools implemented (384 lines). Uses simple API key authentication. Includes datacenter detection from API key. Tools for lists, campaigns, members, templates, automation.",
"blockers": [
"No README - no setup documentation",
"No tests",
"No MCP Apps",
"Haven't verified MD5 email hashing works correctly"
],
"next_action": "Write README with API key setup instructions + add test suite with mock responses"
}
],
"summary": {
"total_evaluated": 10,
"stage_distribution": {
"stage_4": 6,
"stage_5": 4
},
"common_blockers": [
"No tests (10/10)",
"No MCP Apps/UI (10/10)",
"No README (8/10)",
"OAuth complexity (6/10)"
],
"quality_tiers": {
"best": ["fieldedge", "housecall-pro"],
"good_but_undocumented": ["freshdesk", "mailchimp"],
"needs_oauth_docs": ["freshbooks", "gusto", "helpscout", "jobber", "keap", "lightspeed"]
},
"ruthless_assessment": "ALL of these are Stage 4-5 at best. They compile and have tool implementations, but NONE have tests, NONE have MCP Apps, and MOST lack documentation. The OAuth-based ones (6/10) can't be used TODAY without significant setup work. Only 2 (fieldedge, housecall-pro) have READMEs, but even those lack tests to prove the tools work. None are Integration Ready (Stage 8) or Production Ready (Stage 9). Call it Stage 4.5 average - better than scaffolding, but far from production."
}
}