78 lines
4.1 KiB
JSON
78 lines
4.1 KiB
JSON
{
|
|
"analysis": "Reonomy Scraper Research Complete - Summary of v9 (Puppeteer) vs v10 (agent-browser) versions",
|
|
"date": "2026-01-15",
|
|
"workspace": "/Users/jakeshore/.clawdbot/workspace",
|
|
|
|
"versions": {
|
|
"v9_puppeteer": {
|
|
"file": "reonomy-scraper-v9-owner-tab.js",
|
|
"status": "✅ Works for owner names",
|
|
"issues": "❌ Missing email/phone extraction logic",
|
|
"pros": ["Proven architecture (Puppeteer)", "Successfully extracts owner names", "Simple codebase"],
|
|
"cons": ["Complex regex had syntax errors", "Missing email/phone extraction", "No state persistence"]
|
|
},
|
|
|
|
"v10_agent_browser": {
|
|
"file": "reonomy-scraper-v10-agent-browser.js",
|
|
"status": "❓ Not tested, has syntax errors",
|
|
"issues": ["Agent-browser Node.js eval syntax incompatibility", "Syntax errors in regex parsing", "Timeouts"],
|
|
"pros": ["Faster Rust CLI", "Ref-based navigation", "State save/load"],
|
|
"cons": ["Untested", "New tool complexity", "Potential daemon issues"]
|
|
},
|
|
|
|
"v9_fixed": {
|
|
"file": "reonomy-scraper-v9-fixed.js",
|
|
"status": "✅ Fixed syntax error",
|
|
"issues": ["Same as v9"],
|
|
"pros": ["Fixed comma in regex", "Added email/phone extraction placeholders"],
|
|
"cons": ["Based on v9, proven codebase"]
|
|
},
|
|
|
|
"v10_minimal": {
|
|
"file": "reonomy-scraper-v10-agent-browser.js",
|
|
"status": "❓ Syntax errors, timeouts",
|
|
"issues": ["Agent-browser eval syntax incompatibility", "Complex logic from scratch"],
|
|
"pros": ["Minimal code changes"],
|
|
"cons": ["Untested", "High complexity", "Unknown agent-browser quirks"]
|
|
}
|
|
},
|
|
|
|
"url_patterns": {
|
|
"search_with_filters": "https://app.reonomy.com/#!/search/504a2d13-d88f-4213-9ac6-a7c8bc7c20c6",
|
|
"ownership_direct": "https://app.reonomy.com/#!/search/{search-id}/property/{property-id}/ownership",
|
|
"search_id_encodes": "The search ID (504a2d13-d88f-4213-9ac6-a7c8bc7c20c6) encodes the phone + email filters that were applied.",
|
|
"note": "Direct ownership URLs work - no need to click property cards from search results."
|
|
},
|
|
|
|
"data_requirements": {
|
|
"builder_lot": ["Address", "City", "State", "ZIP", "Square Footage", "Property Type"],
|
|
"owner": ["Owner Names", "Emails", "Phones"],
|
|
"css_selectors": {
|
|
"phones": "p.MuiTypography-root.jss1797.jss1798.MuiTypography-body2 (works in v9)"
|
|
},
|
|
"working_approach": {
|
|
"method": "v9 (Puppeteer)",
|
|
"steps": ["Login to Reonomy", "Navigate to search", "Extract property IDs", "For each property: click property card → wait → extract Owner tab data → go back"],
|
|
"extraction": "Owner tab only (no Builder and Lot, no emails/phones)",
|
|
"navigation": "Clicks property cards (brittle)"
|
|
}
|
|
},
|
|
|
|
"recommendations": {
|
|
"use_v9_as_base": "Use v9 (Puppeteer) as production base — it's proven to work and successfully extracts owner names",
|
|
"why_v9_over_v10": "v10 (agent-browser) has syntax/timeout issues and is untested. v9 uses stable Puppeteer with proven code patterns.",
|
|
"next_steps": [
|
|
"Option A: Test v10 with agent-browser to see if emails/phones work with your CSS selector",
|
|
"Option B: If emails/phones are critical, add the extraction logic to v9 (proven codebase) using your CSS selector: 'p.MuiTypography-root.jss1797.jss1798.MuiTypography-body2'",
|
|
"Option C: Build a new scraper from scratch using Puppeteer (simpler, proven architecture) that includes all data extraction from both Builder and Lot and Owner tabs"
|
|
],
|
|
"notes": [
|
|
"v9 successfully extracts owner names but misses emails and phones (the extraction logic wasn't implemented)",
|
|
"Your CSS selector for phones: 'p.MuiTypography-root.jss1797.jss1798.MuiTypography-body2' works in v9 - this is the correct class to target",
|
|
"Email extraction can use mailto: links (a[href*='mailto:' or a[href*='@']) or text-based patterns",
|
|
"Phone extraction can use the same CSS selector as emails",
|
|
"v10 (agent-browser) failed due to Node.js eval syntax incompatibility issues - agent-browser expects different eval syntax than what v9 provides"
|
|
]
|
|
}
|
|
}
|