=== NEW === - studio/ — MCPEngine Studio scaffold (Next.js monorepo, build plan) - docs/FACTORY-V2.md — Factory v2 architecture doc - docs/CALENDLY_MCP_BUILD_SUMMARY.md — Calendly MCP build report === UPDATED SERVERS === - fieldedge: Added jobs-tools, UI build script, main entry update - lightspeed: Updated main + server entry points - squarespace: Added collection-browser + page-manager apps - toast: Added main + server entry points === INFRA === - infra/command-center/state.json — Updated pipeline state - infra/command-center/FACTORY-V2.md — Factory v2 operator playbook
236 lines
6.2 KiB
TypeScript
236 lines
6.2 KiB
TypeScript
// Tester Service — streams multi-layer QA test execution via Claude
|
|
|
|
import Anthropic from '@anthropic-ai/sdk';
|
|
import { getSkill } from '../skills/loader';
|
|
import type {
|
|
PipelineEvent,
|
|
TestLayer,
|
|
TestResult,
|
|
TestDetail,
|
|
} from '../types';
|
|
|
|
const MODEL = 'claude-sonnet-4-5-20250514';
|
|
const MAX_TOKENS = 8192;
|
|
|
|
/**
|
|
* Run multi-layer tests against generated MCP server code.
|
|
* Streams test:running and test:result events per layer.
|
|
*/
|
|
export async function* runTests(
|
|
serverCode: string,
|
|
layers: TestLayer[]
|
|
): AsyncGenerator<PipelineEvent> {
|
|
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
|
|
const systemPrompt = getSkill('tester');
|
|
|
|
for (const layer of layers) {
|
|
yield { type: 'test:running', layer };
|
|
|
|
try {
|
|
const stream = client.messages.stream({
|
|
model: MODEL,
|
|
max_tokens: MAX_TOKENS,
|
|
system: systemPrompt,
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: `Run "${layer}" layer tests on this MCP server code.
|
|
|
|
## Test Layer: ${layer}
|
|
${getLayerInstructions(layer)}
|
|
|
|
## Server Code
|
|
\`\`\`typescript
|
|
${serverCode}
|
|
\`\`\`
|
|
|
|
## Output Format
|
|
Return your test results as a JSON object:
|
|
\`\`\`json
|
|
{
|
|
"layer": "${layer}",
|
|
"passed": true/false,
|
|
"total": <number>,
|
|
"failures": <number>,
|
|
"details": [
|
|
{ "name": "test name", "passed": true/false, "message": "...", "severity": "error|warning|info" }
|
|
],
|
|
"duration": <estimated_ms>
|
|
}
|
|
\`\`\`
|
|
|
|
Run all ${layer} tests now and return results.`,
|
|
},
|
|
],
|
|
});
|
|
|
|
let fullText = '';
|
|
|
|
for await (const event of stream) {
|
|
if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
|
|
fullText += event.delta.text;
|
|
}
|
|
}
|
|
|
|
await stream.finalMessage();
|
|
|
|
// Parse test results
|
|
const result = extractTestResult(fullText, layer);
|
|
yield { type: 'test:result', result };
|
|
} catch (error) {
|
|
const msg = error instanceof Error ? error.message : String(error);
|
|
|
|
// Yield a failed result for this layer
|
|
yield {
|
|
type: 'test:result',
|
|
result: {
|
|
layer,
|
|
passed: false,
|
|
total: 0,
|
|
failures: 1,
|
|
details: [
|
|
{
|
|
name: `${layer} layer execution`,
|
|
passed: false,
|
|
message: `Test execution failed: ${msg}`,
|
|
severity: 'error' as const,
|
|
},
|
|
],
|
|
duration: 0,
|
|
},
|
|
};
|
|
|
|
// Also yield an error event if it's a rate limit / recoverable issue
|
|
if (error instanceof Anthropic.RateLimitError) {
|
|
yield {
|
|
type: 'error',
|
|
message: `Rate limited during ${layer} tests: ${msg}`,
|
|
recoverable: true,
|
|
};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
function extractTestResult(text: string, layer: TestLayer): TestResult {
|
|
// Try JSON code block
|
|
const jsonMatch = text.match(/```json\s*\n([\s\S]*?)\n```/);
|
|
if (jsonMatch) {
|
|
try {
|
|
const parsed = JSON.parse(jsonMatch[1]);
|
|
return {
|
|
layer,
|
|
passed: parsed.passed ?? false,
|
|
total: parsed.total ?? 0,
|
|
failures: parsed.failures ?? 0,
|
|
details: (parsed.details || []).map((d: Partial<TestDetail>) => ({
|
|
name: d.name || 'unnamed',
|
|
passed: d.passed ?? false,
|
|
message: d.message,
|
|
severity: d.severity || 'info',
|
|
})),
|
|
duration: parsed.duration ?? 0,
|
|
};
|
|
} catch {
|
|
// fall through
|
|
}
|
|
}
|
|
|
|
// Try raw JSON
|
|
const braceStart = text.indexOf('{');
|
|
const braceEnd = text.lastIndexOf('}');
|
|
if (braceStart !== -1 && braceEnd > braceStart) {
|
|
try {
|
|
const parsed = JSON.parse(text.slice(braceStart, braceEnd + 1));
|
|
return {
|
|
layer,
|
|
passed: parsed.passed ?? false,
|
|
total: parsed.total ?? 0,
|
|
failures: parsed.failures ?? 0,
|
|
details: (parsed.details || []).map((d: Partial<TestDetail>) => ({
|
|
name: d.name || 'unnamed',
|
|
passed: d.passed ?? false,
|
|
message: d.message,
|
|
severity: d.severity || 'info',
|
|
})),
|
|
duration: parsed.duration ?? 0,
|
|
};
|
|
} catch {
|
|
// fall through
|
|
}
|
|
}
|
|
|
|
// Fallback: couldn't parse
|
|
return {
|
|
layer,
|
|
passed: false,
|
|
total: 0,
|
|
failures: 1,
|
|
details: [
|
|
{
|
|
name: 'result_parsing',
|
|
passed: false,
|
|
message: 'Could not parse test results from Claude response',
|
|
severity: 'error',
|
|
},
|
|
],
|
|
duration: 0,
|
|
};
|
|
}
|
|
|
|
function getLayerInstructions(layer: TestLayer): string {
|
|
switch (layer) {
|
|
case 'protocol':
|
|
return `Test MCP protocol compliance:
|
|
- Verify initialize/capabilities handshake
|
|
- Check tools/list returns valid tool definitions
|
|
- Verify tool call/response format matches MCP spec
|
|
- Test error response format
|
|
- Check JSON-RPC envelope correctness`;
|
|
|
|
case 'static':
|
|
return `Run static analysis:
|
|
- TypeScript type safety (look for any, unknown misuse)
|
|
- Input validation completeness (all required params validated)
|
|
- Error handling coverage (try/catch around external calls)
|
|
- Import/export correctness
|
|
- Naming convention compliance`;
|
|
|
|
case 'visual':
|
|
return `Evaluate code quality visually:
|
|
- Code organization and file structure
|
|
- Documentation completeness (JSDoc, README)
|
|
- Consistent formatting and style
|
|
- Appropriate abstraction levels
|
|
- Clean separation of concerns`;
|
|
|
|
case 'functional':
|
|
return `Test functional correctness:
|
|
- Each tool handles valid input correctly
|
|
- Each tool handles invalid input gracefully
|
|
- Auth flow works for the configured auth type
|
|
- Rate limiting is respected
|
|
- Edge cases (empty arrays, null values, large inputs)`;
|
|
|
|
case 'performance':
|
|
return `Evaluate performance characteristics:
|
|
- No synchronous blocking operations
|
|
- Efficient data serialization
|
|
- Connection pooling / reuse patterns
|
|
- Memory leak potential (event listeners, closures)
|
|
- Response size management`;
|
|
|
|
case 'security':
|
|
return `Security audit:
|
|
- No hardcoded credentials
|
|
- Input sanitization (injection prevention)
|
|
- Proper auth token handling (not logged, not in URLs)
|
|
- Rate limit enforcement
|
|
- SSRF prevention for URL parameters
|
|
- Safe error messages (no internal details leaked)`;
|
|
|
|
default:
|
|
return `Run comprehensive tests for the "${layer}" layer.`;
|
|
}
|
|
}
|