#!/usr/bin/env node /** * Reonomy Scraper v12 - AGENT-BROWSER EDITION (Vercel Labs) * * Key features: * - Uses agent-browser CLI tool (Rust backend, Playwright engine) * - State save/load for auth persistence (no repeated login) * - Ref-based navigation (AI-friendly, deterministic) * - Semantic locators (find by role, text, label, placeholder) * - Extracts from BOTH Builder and Lot AND Owner tabs * - Uses direct ownership URLs (no property card clicking) * - Dual-tab extraction: property details + owner names + emails + phones * * Usage: * SEARCH_ID="504a2d13-d88f-4213-9ac6-a7c8bc7c20c6" node reonomy-scraper-v12-agent-browser.js * Or set as environment variable */ const { spawn } = require('child_process'); const fs = require('fs'); const path = require('path'); // Configuration const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com'; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532'; const SEARCH_ID = process.env.REONOMY_SEARCH_ID || '504a2d13-d88f-4213-9ac6-a7c8bc7c20c6'; const MAX_PROPERTIES = parseInt(process.env.MAX_PROPERTIES) || 20; const HEADLESS = process.env.HEADLESS !== 'false'; // Full path to agent-browser wrapper const AGENT_BROWSER = '/opt/homebrew/bin/agent-browser'; const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads-v12-agent-browser.json'); const LOG_FILE = path.join(__dirname, 'reonomy-scraper-v12.log'); const AUTH_STATE_FILE = path.join(__dirname, 'reonomy-auth-state.txt'); function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Execute agent-browser command and capture output */ async function execAgentBrowser(args, description = '') { const fullArgs = args.length > 0 ? [AGENT_BROWSER, ...args] : [AGENT_BROWSER]; log(`šŸ”§ ${description}`); log(` Command: ${fullArgs.join(' ')}`); return new Promise((resolve, reject) => { const child = spawn(AGENT_BROWSER, args); let stdout = ''; let stderr = ''; child.stdout.on('data', data => { stdout += data.toString(); }); child.stderr.on('data', data => { stderr += data.toString(); }); child.on('close', code => { if (code === 0) { log(` āœ… Success`); resolve(stdout.trim()); } else { log(` āŒ Failed (code ${code})`); if (stderr) { log(` Error: ${stderr.trim()}`); } reject(new Error(`agent-browser failed with code ${code}: ${stderr.trim()}`)); } }); }); } /** * Execute agent-browser command and parse JSON output */ async function execAgentBrowserJson(args, description = '') { const output = await execAgentBrowser([...args, '--json'], description); try { return JSON.parse(output); } catch (error) { log(` āš ļø JSON parse error: ${error.message}`); return null; } } /** * Check if auth state file exists and load it */ async function loadAuthState() { if (fs.existsSync(AUTH_STATE_FILE)) { const state = fs.readFileSync(AUTH_STATE_FILE, 'utf8'); log('šŸ”‘ Loading saved auth state...'); log(` State file: ${AUTH_STATE_FILE}`); return state.trim(); } return null; } /** * Save auth state to file */ async function saveAuthState(state) { fs.writeFileSync(AUTH_STATE_FILE, state); log('šŸ”‘ Saved auth state to file'); log(` State file: ${AUTH_STATE_FILE}`); log(` State: ${state.substring(0, 100)}...`); } /** * Take screenshot for debugging */ async function takeScreenshot(filename) { const screenshotPath = `/tmp/${filename}`; const outputPath = await execAgentBrowser(['screenshot', screenshotPath], 'Taking screenshot'); if (outputPath.includes('Saved')) { log(` šŸ“ø Screenshot saved: ${screenshotPath}`); } return screenshotPath; } /** * Extract data from Builder and Lot tab */ async function extractBuilderLotData() { log('šŸ“Š Extracting Builder and Lot data...'); // Get snapshot const snapshotResult = await execAgentBrowserJson(['snapshot', '-i'], 'Get interactive elements'); const snapshot = JSON.parse(snapshotResult); if (!snapshot || !snapshot.data || !snapshot.data.refs) { log(' āš ļø Could not get snapshot'); return { propertyAddress: '', city: '', state: '', zip: '', squareFootage: '', propertyType: '' }; } log(` Found ${Object.keys(snapshot.data.refs || {}).length} interactive elements`); // Extract property details using semantic locators let propertyAddress = ''; let city = ''; let state = ''; let zip = ''; // Try heading first (property address) for (const [ref, element] of Object.entries(snapshot.data.refs || {})) { if (element.role === 'heading') { const addressMatch = element.name.match(/^(\d+[^,\n]+),\s*([A-Za-z\s,]+),\s*([A-Z]{2})\s*(\d{5})/); if (addressMatch) { propertyAddress = element.name.trim(); city = addressMatch[1]?.trim() || ''; state = addressMatch[2]?.trim() || ''; zip = addressMatch[3]?.trim() || ''; log(` šŸ“ Address: ${element.name}`); break; } } } // Extract square footage from body text const bodyTextResult = await execAgentBrowserJson(['eval', 'document.body.innerText'], 'Get body text'); const bodyText = bodyTextResult?.data?.result || ''; const sfMatch = bodyText.match(/(\d+\.?\d*\s*k?\s*SF)/i); const squareFootage = sfMatch ? sfMatch[0] : ''; if (squareFootage) { log(` šŸ“ Square Footage: ${squareFootage}`); } // Extract property type const typePatterns = [ 'Warehouse', 'Office Building', 'Retail Stores', 'Industrial', 'General Industrial', 'Medical Building', 'School', 'Religious', 'Supermarket', 'Financial Building', 'Residential', 'Vacant Land', 'Tax Exempt', 'Mixed Use' ]; let propertyType = ''; for (const type of typePatterns) { if (bodyText.includes(type)) { propertyType = type; log(` šŸ¢ Property Type: ${type}`); break; } } return { propertyAddress, city, state, zip, squareFootage, propertyType }; } /** * Extract data from Owner tab (emails + phones + owner names) */ async function extractOwnerTabData() { log('šŸ‘¤ Extracting Owner tab data...'); // Extract owner names using semantic locators const ownerData = await execAgentBrowserJson(['eval', `({ ownerNames: [], emails: [], phones: [] });`], 'Get owner data object'); if (!ownerData || !ownerData.data?.result) { log(' āš ļø Could not get owner data object'); return { ownerNames: [], emails: [], phones: [] }; } const result = ownerData.data.result; // Extract owner names from page text (proven approach) const bodyTextResult = await execAgentBrowserJson(['eval', 'document.body.innerText'], 'Get body text'); const bodyText = bodyTextResult?.data?.result || ''; const ownerLines = bodyText.split('\n'); for (const line of ownerLines) { // Look for "Owner: X properties" pattern const ownsMatch = line.match(/Owner:\s*(\d+)\s+properties?\s*([A-Z][a-z]+)/i); if (ownsMatch && ownsMatch[2]) { const owner = ownsMatch[2].trim(); if (owner && owner.length > 3 && !result.ownerNames.includes(owner)) { result.ownerNames.push(owner); log(` šŸ‘¤ Owner: ${owner}`); } } } log(` šŸ‘¤ Owners found: ${result.ownerNames.length}`); // Extract emails using dual approach // 1. Mailto links const mailtoResult = await execAgentBrowserJson(['eval', `({ mailtoLinks: Array.from(document.querySelectorAll('a[href^="mailto:"]')).map(a => a.href.replace('mailto:', '')) });`], 'Extract mailto links'); if (mailtoResult && mailtoResult.data?.result?.mailtoLinks) { mailtoResult.data.result.mailtoLinks.forEach(email => { const cleanedEmail = email.trim(); if (cleanedEmail && cleanedEmail.length > 5 && !result.emails.includes(cleanedEmail)) { result.emails.push(cleanedEmail); } }); log(` šŸ“§ Emails from mailto links: ${result.emails.length}`); } // 2. Email patterns in text const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g; const emailMatches = bodyText.match(emailRegex) || []; if (emailMatches) { emailMatches.forEach(email => { if (!result.emails.includes(email)) { result.emails.push(email); } }); log(` šŸ“§ Emails from text regex: ${emailMatches.length}`); } log(` šŸ“§ Total emails: ${result.emails.length}`); // Extract phones using user-provided CSS selector const phoneResult = await execAgentBrowserJson(['eval', `({ phoneTexts: Array.from(document.querySelectorAll('p.MuiTypography-root.jss1797.jss1798.MuiTypography-body2')).map(p => p.textContent.trim()).filter(text => text.length >= 10) });`], 'Extract phones using CSS selector'); if (phoneResult && phoneResult.data?.result?.phoneTexts) { phoneResult.data.result.phoneTexts.forEach(phone => { // Clean phone numbers const cleanPhone = phone.replace(/[\s\-\(\)]/g, ''); if (cleanPhone.length >= 10 && !result.phones.includes(cleanPhone)) { result.phones.push(cleanPhone); } }); log(` šŸ“ž Phones found: ${result.phones.length}`); } log(` šŸ“ž Total phones: ${result.phones.length}`); return result; } /** * Extract property IDs from search results */ async function extractPropertyIds() { log('šŸ“ Extracting property IDs...'); const snapshot = await execAgentBrowserJson(['snapshot', '-c'], 'Get property links from search'); if (!snapshot || !snapshot.data || !snapshot.data.refs) { log(' āš ļø Could not get snapshot'); return []; } const propertyIds = []; // Find all property links from search results for (const [ref, element] of Object.entries(snapshot.data.refs || {})) { if (element.role === 'link') { const match = element.url?.match(/property\/([a-f0-9-]+)/); if (match) { propertyIds.push({ id: match[1], url: element.url }); } } } log(` āœ… Found ${propertyIds.length} property IDs`); return propertyIds; } /** * Main scraper function */ async function scrapeLeads() { log('šŸš€ Starting Reonomy Scraper v12 (AGENT-BROWSER EDITION)...\n'); // Check for saved auth state const savedState = await loadAuthState(); let isLoggedIn = false; // Step 1: Login to Reonomy (only if no saved state) if (!savedState) { log('\nšŸ“ Step 1: Checking login status...'); await execAgentBrowser(['open', 'https://app.reonomy.com/#!/login'], 'Open login page'); await sleep(2000); // Check if we're already logged in const snapshot = await execAgentBrowserJson(['snapshot', '-i'], 'Check if already logged in'); // Check if we see "Search Reonomy" button - indicates we're logged in const isAlreadyLoggedIn = Object.values(snapshot.data?.refs || {}).some( elem => elem.role === 'button' && elem.name === 'Search Reonomy' ); if (isAlreadyLoggedIn) { log('āœ… Already logged in!'); isLoggedIn = true; } else { log('šŸ” Not logged in, proceeding with login flow...'); if (!snapshot || !snapshot.data || !snapshot.data.refs) { log(' āš ļø Could not get login form snapshot'); throw new Error('Login form not found'); } // Find email and password inputs let emailRef = null; let passwordRef = null; let loginButtonRef = null; for (const [ref, element] of Object.entries(snapshot.data.refs || {})) { if (element.role === 'textbox') { const name = (element.name || element.placeholder || '').toLowerCase(); if (name.includes('email')) { emailRef = ref; } else if (name.includes('password')) { passwordRef = ref; } } else if (element.role === 'button' && element.name) { const name = element.name.toLowerCase(); if (name.includes('log in') || name.includes('sign in')) { loginButtonRef = ref; } } } if (!emailRef || !passwordRef || !loginButtonRef) { log(' āš ļø Could not find login form elements'); throw new Error('Login form not found'); } // Fill email using ref log(' šŸ“§ Filling email...'); await execAgentBrowser(['fill', emailRef, REONOMY_EMAIL], 'Fill email'); await sleep(500); // Fill password using ref log(' šŸ”’ Filling password...'); await execAgentBrowser(['fill', passwordRef, REONOMY_PASSWORD], 'Fill password'); await sleep(500); // Click login button using ref log(' šŸ”‘ Clicking login button...'); await execAgentBrowser(['click', loginButtonRef], 'Click login button'); await sleep(500); // Press Enter to submit the form log(' āŽ Pressing Enter to submit...'); await execAgentBrowser(['press', 'Enter'], 'Press Enter'); // Wait for login log(' ā³ Waiting for login...'); await sleep(15000); // Check if logged in const urlCheck = await execAgentBrowserJson(['eval', 'window.location.href'], 'Check current URL'); if (urlCheck?.data?.result && (urlCheck.data.result.includes('#!/search/') || urlCheck.data.result.includes('/!/home'))) { isLoggedIn = true; log('āœ… Successfully logged in!'); // Extract search ID from current URL if present const searchIdMatch = urlCheck.data.result.match(/#!\/search\/([a-f0-9-]+)/); if (searchIdMatch) { const currentSearchId = searchIdMatch[1]; // Save auth state for future use await saveAuthState(urlCheck.data.result); log('šŸ“ Search ID updated: ' + currentSearchId); SEARCH_ID = currentSearchId; } else { // Login went to home page, we'll navigate to search below log('šŸ  Logged in to home page, will navigate to search'); } } else { log('āš ļø Could not confirm login - URL does not match expected pattern'); throw new Error('Login may have failed'); } } } else { log('āœ… Found saved auth state! Skipping login flow.'); isLoggedIn = true; log(` Saved state: ${savedState.substring(0, 100)}...`); // Extract search ID from saved state const searchIdMatch = savedState.match(/#!\/search\/([a-f0-9-]+)/); if (searchIdMatch) { const currentSearchId = searchIdMatch[1]; SEARCH_ID = currentSearchId; log(`šŸ“ Search ID from saved state: ${currentSearchId}`); } else { log('āš ļø Could not extract search ID from saved state'); throw new Error('Could not extract search ID from saved auth state'); } } // Step 2: Navigate to search log('\nšŸ“ Step 2: Navigating to search...'); const searchUrl = `https://app.reonomy.com/#!/search/${SEARCH_ID}`; await execAgentBrowser(['open', searchUrl], 'Open search URL'); await sleep(3000); // Step 3: Extract property IDs log('\nšŸ“ Step 3: Extracting property IDs...'); const propertyIds = await extractPropertyIds(); if (propertyIds.length === 0) { log(' āš ļø No property IDs found.'); throw new Error('No properties found on search page.'); } log(` āœ… Found ${propertyIds.length} property IDs`); // Step 4: Process each property const propertiesToScrape = propertyIds.slice(0, MAX_PROPERTIES); log(`\nšŸ“ Step 4: Processing ${propertiesToScrape.length} properties...\n`); const leads = []; for (let i = 0; i < propertiesToScrape.length; i++) { const prop = propertiesToScrape[i]; log(`\n[${i + 1}/${propertiesToScrape.length}] Property ID: ${prop.id}`); // Navigate directly to ownership page (from your research) const ownershipUrl = `https://app.reonomy.com/#!/search/${SEARCH_ID}/property/${prop.id}/ownership`; log(` šŸ”— Navigating to ownership page...`); await execAgentBrowser(['open', ownershipUrl], 'Open ownership URL'); await sleep(5000); // Wait for Owner tab to load log(' ā³ Waiting for Owner tab to load...'); await sleep(8000); // Extract data from Builder and Lot tab log(' šŸ“Š Extracting Builder and Lot data...'); const builderLotData = await extractBuilderLotData(); // Wait a moment before extracting Owner tab await sleep(500); // Extract data from Owner tab log(' šŸ‘¤ Extracting Owner tab data...'); const ownerData = await extractOwnerTabData(); const lead = { scrapeDate: new Date().toISOString().split('T')[0], propertyId: prop.id, propertyUrl: ownershipUrl, ...builderLotData, ...ownerData, searchId: SEARCH_ID }; log(` šŸ“§ Emails: ${ownerData.emails.length}`); log(` šŸ“ž Phones: ${ownerData.phones.length}`); log(` šŸ‘¤ Owners: ${ownerData.ownerNames.length}`); log(` šŸ“ Address: ${builderLotData.propertyAddress || 'N/A'}`); leads.push(lead); // Screenshot for debugging (first 3 properties only) if (i < 3) { const screenshotPath = `/tmp/reonomy-v12-property-${i + 1}.png`; await takeScreenshot(screenshotPath); } } // Step 5: Save results if (leads.length > 0) { log(`\nāœ… Total leads scraped: ${leads.length}`); const outputData = { scrapeDate: new Date().toISOString(), searchId: SEARCH_ID, leadCount: leads.length, leads: leads }; fs.writeFileSync(OUTPUT_FILE, JSON.stringify(outputData, null, 2)); log(`šŸ’¾ Saved to: ${OUTPUT_FILE}`); } else { log('\nāš ļø No leads scraped.'); } log('\nāœ… Scraping complete!'); return { leadCount: leads.length, outputFile: OUTPUT_FILE }; } /** * Main execution */ (async () => { try { await scrapeLeads(); process.exit(0); } catch (error) { log(`\nāŒ Error: ${error.message}`); log(error.stack); // Take screenshot of error state try { await takeScreenshot('reonomy-v12-error.png'); log('šŸ“ø Error screenshot saved: /tmp/reonomy-v12-error.png'); } catch (e) { log('Could not save error screenshot'); } process.exit(1); } })();