#!/usr/bin/env node /** * Reonomy Scraper v14 - Self-Configuring Search + Extract * * Sets up search filters IN THE SAME browser session, then scrapes. * No more search ID handoff problem. * * ENV CONFIG: * REONOMY_STATE - Location filter (e.g., "New Jersey") * REONOMY_TYPES - Comma-separated property types (e.g., "Industrial") * REONOMY_MIN_SF - Min building area in SF (e.g., "50000") * REONOMY_SALE_FILTER - "not_within_10y", "not_within_5y", "not_within_2y", "within_10y", etc. * REONOMY_OWNER_PHONE - "true" to require phone * REONOMY_OWNER_EMAIL - "true" to require email * MAX_PROPERTIES - Max properties to scrape (default 20) * HEADLESS - "false" to show browser */ const { execSync } = require('child_process'); const fs = require('fs'); const path = require('path'); const CONFIG = { authStatePath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-auth.json'), outputPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-leads-v14.json'), logPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-scraper-v14.log'), dailyLogPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-daily-stats.json'), maxProperties: parseInt(process.env.MAX_PROPERTIES) || 20, maxDailyProperties: 50, email: process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com', password: process.env.REONOMY_PASSWORD || '9082166532', // Search filters searchState: process.env.REONOMY_STATE || 'New Jersey', propertyTypes: (process.env.REONOMY_TYPES || 'Industrial').split(',').map(s => s.trim()), minSF: process.env.REONOMY_MIN_SF || '', saleFilter: process.env.REONOMY_SALE_FILTER || '', // e.g., "not_within_10y" ownerPhone: process.env.REONOMY_OWNER_PHONE === 'true', ownerEmail: process.env.REONOMY_OWNER_EMAIL === 'true', }; function log(msg) { const timestamp = new Date().toISOString(); const line = `[${timestamp}] ${msg}`; console.log(line); fs.appendFileSync(CONFIG.logPath, line + '\n'); } function ab(cmd, options = {}) { const fullCmd = `agent-browser ${cmd}`; if (options.verbose !== false) log(` > ${fullCmd.substring(0, 120)}`); try { const result = execSync(fullCmd, { encoding: 'utf8', timeout: options.timeout || 30000, stdio: ['pipe', 'pipe', 'pipe'] }); return { success: true, output: result.trim() }; } catch (err) { const stderr = err.stderr?.toString() || err.message; if (options.verbose !== false) log(` ! Error: ${stderr.substring(0, 200)}`); return { success: false, error: stderr }; } } function sleep(ms) { return new Promise(r => setTimeout(r, ms)); } function randomDelay(min, max) { return sleep(Math.floor(Math.random() * (max - min + 1)) + min); } function shuffle(arr) { const a = [...arr]; for (let i = a.length - 1; i > 0; i--) { const j = Math.floor(Math.random() * (i + 1)); [a[i], a[j]] = [a[j], a[i]]; } return a; } function getDailyStats() { const today = new Date().toISOString().split('T')[0]; try { const data = JSON.parse(fs.readFileSync(CONFIG.dailyLogPath, 'utf8')); if (data.date === today) return data; } catch (e) {} return { date: today, propertiesScraped: 0, leadsFound: 0 }; } function saveDailyStats(stats) { fs.writeFileSync(CONFIG.dailyLogPath, JSON.stringify(stats, null, 2)); } // ── LOGIN ── async function login() { log(' Navigating to login...'); ab('open "https://app.reonomy.com/!/login"'); await sleep(4000); const snap = ab('snapshot -i'); if (!snap.output?.includes('textbox "Email"')) { const url = ab('eval "window.location.href"'); if (url.output?.includes('app.reonomy.com') && !url.output?.includes('login') && !url.output?.includes('auth.reonomy.com')) { log(' Already logged in!'); return true; } throw new Error('Login form not found'); } const emailRef = snap.output.match(/textbox "Email" \[ref=(e\d+)\]/)?.[1]; const passRef = snap.output.match(/textbox "Password" \[ref=(e\d+)\]/)?.[1]; const loginRef = snap.output.match(/button "Log In" \[ref=(e\d+)\]/)?.[1]; if (!emailRef || !passRef || !loginRef) throw new Error('Login form elements not found'); ab(`fill @${emailRef} "${CONFIG.email}"`); await sleep(1000); ab(`fill @${passRef} "${CONFIG.password}"`); await sleep(1000); ab(`click @${loginRef}`); await randomDelay(12000, 16000); const postUrl = ab('eval "window.location.href"'); if (postUrl.output?.includes('auth.reonomy.com') || postUrl.output?.includes('login')) { throw new Error('Login failed'); } ab(`state save "${CONFIG.authStatePath}"`); log(' Login successful!'); return true; } // ── BUILD SEARCH ── async function buildSearch() { log('\n== Building search with filters =='); // Go to search page via Advanced Search ab('open "https://app.reonomy.com/!/search"'); await sleep(5000); // Dismiss banners for (let i = 0; i < 5; i++) { const s = ab('snapshot -i', { verbose: false }); const lg = s.output?.match(/button "Let's Go" \[ref=(e\d+)\]/); if (!lg) break; ab(`click @${lg[1]}`, { verbose: false }); await sleep(800); } // Click Advanced Search if visible let snap = ab('snapshot -i'); const advRef = snap.output?.match(/link "Advanced Search" \[ref=(e\d+)\]/)?.[1]; if (advRef) { ab(`click @${advRef}`); await sleep(3000); } // ── Location ── log(` Setting location: ${CONFIG.searchState}`); // Take FRESH snapshot after Advanced Search click (refs changed) snap = ab('snapshot -i'); let searchBox = snap.output?.match(/textbox "(?:Search by address, location, or owner|Address, Location, or Owner)" \[ref=(e\d+)\]/)?.[1]; if (!searchBox) throw new Error('Search box not found'); // CRITICAL: Use click + press (individual keystrokes) instead of fill/type. // fill sets .value directly, skipping React synthetic onChange → autocomplete never fires. // press sends real keyboard events that React detects. ab(`click @${searchBox}`); await sleep(1000); ab('press Control+a'); await sleep(200); ab('press Backspace'); await sleep(500); for (const char of CONFIG.searchState) { if (char === ' ') ab('press Space'); else ab(`press ${char}`); await sleep(150); } await sleep(3000); // Retry up to 4 times to find the state suggestion let stateRef = null; for (let attempt = 0; attempt < 4 && !stateRef; attempt++) { snap = ab('snapshot -i'); stateRef = snap.output?.match(new RegExp(`menuitem "${CONFIG.searchState}" \\[ref=(e\\d+)\\]`))?.[1]; if (!stateRef) { log(` State not found (attempt ${attempt + 1}), retrying...`); // Re-find search box (ref might have changed) searchBox = snap.output?.match(/textbox "(?:Search by address|Address)[^"]*" \[ref=(e\d+)\]/)?.[1] || snap.output?.match(/textbox "[^"]*" \[ref=(e\d+)\]/)?.[1]; if (searchBox) { // Clear and retype char-by-char via press for React autocomplete ab(`click @${searchBox}`); await sleep(500); ab('press Control+a'); await sleep(200); ab('press Backspace'); await sleep(500); for (const char of CONFIG.searchState) { if (char === ' ') ab('press Space'); else ab(`press ${char}`); await sleep(150); } await sleep(4000); } else { await sleep(2000); } } } if (!stateRef) throw new Error(`State "${CONFIG.searchState}" not found in suggestions`); ab(`click @${stateRef}`); await sleep(6000); // ── Property Type ── log(` Setting property types: ${CONFIG.propertyTypes.join(', ')}`); snap = ab('snapshot -i'); const ptRef = snap.output?.match(/button "Property Type" \[ref=(e\d+)\]/)?.[1]; if (!ptRef) throw new Error('Property Type button not found'); ab(`click @${ptRef}`); await sleep(2000); snap = ab('snapshot -i'); for (const ptype of CONFIG.propertyTypes) { const cbRef = snap.output?.match(new RegExp(`checkbox "${ptype}" \\[ref=(e\\d+)\\]`))?.[1]; if (cbRef) { ab(`click @${cbRef}`); await sleep(500); } else { log(` ! Property type "${ptype}" not found in quick list, checking See All...`); // Try See All Property Types const seeAllRef = snap.output?.match(/button "See All Property Types" \[ref=(e\d+)\]/)?.[1]; if (seeAllRef) { ab(`click @${seeAllRef}`); await sleep(2000); snap = ab('snapshot -i'); const cbRef2 = snap.output?.match(new RegExp(`checkbox "${ptype}" \\[ref=(e\\d+)\\]`))?.[1]; if (cbRef2) { ab(`click @${cbRef2}`); await sleep(500); } else { log(` ! Could not find "${ptype}" checkbox`); } } } } // Click Apply for property type snap = ab('snapshot -i'); const ptApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1]; if (ptApply) { ab(`click @${ptApply}`); await sleep(5000); } // ── Building Area (SF) ── if (CONFIG.minSF) { log(` Setting min building area: ${CONFIG.minSF} SF`); snap = ab('snapshot -i'); const sizeRef = snap.output?.match(/button "Size" \[ref=(e\d+)\]/)?.[1]; if (sizeRef) { ab(`click @${sizeRef}`); await sleep(2000); // Click the Building Area min dropdown button to open presets // The full snapshot shows: Building Area (SF) with a button containing textbox "min" // We need the SECOND button with textbox "min" (first is Total Units) snap = ab('snapshot -i'); // Find the second min textbox (Building Area) const minRefs = [...(snap.output?.matchAll(/textbox "min" \[ref=(e\d+)\]/g) || [])]; if (minRefs.length >= 2) { const areaMinRef = minRefs[1][1]; // Click the field to open dropdown ab(`click @${areaMinRef}`); await sleep(1500); // Type the value ab(`type @${areaMinRef} "${CONFIG.minSF}"`); await sleep(1000); // Check for preset dropdown option (e.g., "50k sf") snap = ab('snapshot -i'); const sfK = Math.round(parseInt(CONFIG.minSF) / 1000); const presetPatterns = [`${sfK}k sf`, `${CONFIG.minSF}`, `${sfK},000`]; let presetClicked = false; for (const pat of presetPatterns) { const presetRef = snap.output?.match(new RegExp(`(?:menuitem|option|button|listitem) "${pat}[^"]*" \\[ref=(e\\d+)\\]`, 'i'))?.[1]; if (presetRef) { log(` Clicking preset: ${pat}`); ab(`click @${presetRef}`); presetClicked = true; await sleep(2000); break; } } if (!presetClicked) { // Press Enter to commit the typed value log(' No preset found, pressing Enter to commit'); ab(`eval "document.querySelectorAll('input[placeholder=\\"min\\"]')[1]?.dispatchEvent(new KeyboardEvent('keydown', {key: 'Enter', keyCode: 13, bubbles: true}))"`); await sleep(1000); } } // Now try to click Apply await sleep(1000); snap = ab('snapshot -i'); const sizeApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\](?!\s*\[disabled\])/)?.[1]; if (sizeApply) { ab(`click @${sizeApply}`); await sleep(5000); } else { // Check if filter tag already shows (e.g., "50000+ SF") const filterTag = snap.output?.match(/button "\d+.*SF" \[ref=(e\d+)\]/); if (filterTag) { log(' Size filter appears applied via tag'); } else { // Try pressing Escape then check log(' Pressing Escape to close size panel'); ab('eval "document.dispatchEvent(new KeyboardEvent(\'keydown\', {key: \'Escape\', bubbles: true}))"'); await sleep(2000); } } } // Verify size filter is applied snap = ab('snapshot -i'); const sfTag = snap.output?.match(/button "\d+.*SF" \[ref=(e\d+)\]/); if (sfTag) { log(` Size filter confirmed: ${sfTag[0]}`); } else { log(' WARNING: Size filter may not be applied'); } } // ── Sale Date Filter ── if (CONFIG.saleFilter) { log(` Setting sale filter: ${CONFIG.saleFilter}`); // Make sure no dropdowns are blocking await sleep(1000); snap = ab('snapshot -i'); // Close any open panels first by clicking the page body ab('eval "document.body.click()"'); await sleep(1000); snap = ab('snapshot -i'); const moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1]; if (moreRef) { ab(`click @${moreRef}`); await sleep(3000); // Click Sales tab snap = ab('snapshot -i'); const salesRef = snap.output?.match(/tab "Sales[^"]*" \[ref=(e\d+)\]/)?.[1]; if (salesRef) { ab(`click @${salesRef}`); await sleep(2000); // Parse filter config const [withinType, period] = CONFIG.saleFilter.split('_within_'); const notWithin = withinType === 'not'; const periodMap = { '90d': 'Past 90 days', '1y': 'Past year', '2y': 'Past 2 years', '5y': 'Past 5 years', '10y': 'Past 10 years' }; const periodText = periodMap[period] || 'Past 10 years'; // Click Not Within / Within using JS const withinLabel = notWithin ? 'Not Within' : 'Within'; ab(`eval " const divs = Array.from(document.querySelectorAll('div')); const btn = divs.find(d => d.textContent.trim() === '${withinLabel}' && d.className.includes('jss')); if (btn) btn.click(); '${withinLabel}: ' + !!btn; "`); await sleep(1500); ab(`eval " const divs = Array.from(document.querySelectorAll('div')); const btn = divs.find(d => d.textContent.trim() === '${periodText}' && d.className.includes('jss')); if (btn) btn.click(); '${periodText}: ' + !!btn; "`); await sleep(1500); // Click Apply in more filters snap = ab('snapshot -i'); const salesApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1]; if (salesApply) { ab(`click @${salesApply}`); await sleep(5000); } } } } // ── Owner filters (phone/email) ── if (CONFIG.ownerPhone || CONFIG.ownerEmail) { log(` Setting owner filters: phone=${CONFIG.ownerPhone}, email=${CONFIG.ownerEmail}`); snap = ab('snapshot -i'); let moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1]; // If More Filters panel is already open, look for the close/filter button if (!moreRef) { // Panel might already be open from sales filter const closeRef = snap.output?.match(/button "Close" \[ref=(e\d+)\]/)?.[1]; if (closeRef) { // Close and reopen ab(`click @${closeRef}`); await sleep(1000); snap = ab('snapshot -i'); moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1]; } } if (moreRef) { ab(`click @${moreRef}`); await sleep(2000); } // Click Owner tab snap = ab('snapshot -i'); const ownerTabRef = snap.output?.match(/tab "Owner[^"]*" \[ref=(e\d+)\]/)?.[1]; if (ownerTabRef) { ab(`click @${ownerTabRef}`); await sleep(2000); // Use JS to click phone/email toggles if (CONFIG.ownerPhone) { ab(`eval " const divs = Array.from(document.querySelectorAll('div')); const phone = divs.find(d => d.textContent.trim() === 'Includes Phone Number' && d.className.includes('jss')); if (phone) phone.click(); 'phone: ' + !!phone; "`); await sleep(1000); } if (CONFIG.ownerEmail) { ab(`eval " const divs = Array.from(document.querySelectorAll('div')); const email = divs.find(d => d.textContent.trim() === 'Includes Email Address' && d.className.includes('jss')); if (email) email.click(); 'email: ' + !!email; "`); await sleep(1000); } snap = ab('snapshot -i'); const ownerApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1]; if (ownerApply) { ab(`click @${ownerApply}`); await sleep(5000); } } } // ── Get search ID and property count ── await sleep(2000); const url = ab('eval "window.location.href"'); const searchIdMatch = url.output?.match(/search\/([a-f0-9-]+)/); const searchId = searchIdMatch ? searchIdMatch[1] : 'unknown'; snap = ab('snapshot'); const countMatch = snap.output?.match(/heading "([0-9,]+) properties"/); const propertyCount = countMatch ? countMatch[1] : '?'; log(`\n Search ready: ${propertyCount} properties`); log(` Search ID: ${searchId}`); log(` URL: ${url.output}`); return searchId; } // ── EXTRACT OWNERS FROM TAB ── function extractOwnersFromTab(snapshot) { if (!snapshot) return []; const owners = []; const lines = snapshot.split('\n'); let currentOwner = null; for (const line of lines) { const ownerMatch = line.match(/link "([^"]+)" \[ref=e\d+\]/); if (ownerMatch && !ownerMatch[1].includes('Call') && !ownerMatch[1].includes('Send') && !ownerMatch[1].includes('Sign') && !ownerMatch[1].includes('Advanced') && !ownerMatch[1].includes('http') && !ownerMatch[1].includes("Don't") && !ownerMatch[1].includes('Google') && !ownerMatch[1].includes('Terms') && !ownerMatch[1].includes('Report')) { const name = ownerMatch[1]; if (name.length > 2 && name.length < 80 && /[A-Z]/.test(name)) { if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) { owners.push(currentOwner); } const cleanName = name .replace(/\s+(?:President|CEO|Manager|Member|Director|Officer|Offi|Secretary|Treasurer|VP|Vice President|Partner|Owner|Agent|Trustee|Chairman|Principal|Chief (?:Executive|Financial|Operating|Marketing)|Senior (?:Account|Vice|Manager|Director)|SHAR(?:EHOLDER)?|Shareholder|Authorized (?:Person|Agent|Rep)|Registered Agent|Statutory Agent|General Partner|Limited Partner|Managing Member|Sole Member|Organizer).*$/i, '') .replace(/\s+(?:senior|chief|managing|authorized|registered|offi)$/i, '') .trim(); currentOwner = { name: cleanName, phones: [], emails: [] }; } } const phoneMatch = line.match(/button "(?:1-)?(\d{3}-\d{3}-\d{4})(?:\s+(\w+))?" \[ref=e\d+\]/); if (phoneMatch && currentOwner) { const phone = line.includes('1-') ? `1-${phoneMatch[1]}` : phoneMatch[1]; currentOwner.phones.push({ number: phone, type: phoneMatch[2] || 'Unknown' }); } const longPhoneMatch = line.match(/button "(\d{10,14})" \[ref=e\d+\]/); if (longPhoneMatch && currentOwner) { currentOwner.phones.push({ number: longPhoneMatch[1], type: 'Unknown' }); } const emailMatch = line.match(/button "([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})" \[ref=e\d+\]/); if (emailMatch && currentOwner) { currentOwner.emails.push(emailMatch[1]); } } if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) { owners.push(currentOwner); } // Dedupe const deduped = []; const seen = new Map(); for (const o of owners) { const key = o.name.toLowerCase(); if (seen.has(key)) { const ex = seen.get(key); for (const p of o.phones) { if (!ex.phones.find(ep => ep.number === p.number)) ex.phones.push(p); } for (const e of o.emails) { if (!ex.emails.includes(e)) ex.emails.push(e); } } else { seen.set(key, { ...o }); deduped.push(seen.get(key)); } } return deduped; } // ── SCRAPE PROPERTIES ── async function scrapeProperties(searchId) { log('\n== Scraping properties =='); const dailyStats = getDailyStats(); const remainingToday = CONFIG.maxDailyProperties - dailyStats.propertiesScraped; const maxThisRun = Math.min(CONFIG.maxProperties, remainingToday); log(` Daily: ${dailyStats.propertiesScraped} done, ${remainingToday} remaining, this run: max ${maxThisRun}`); if (maxThisRun <= 0) { log(' Daily limit reached!'); return []; } const leads = []; const processed = new Set(); // Track processed addresses // Dismiss banners for (let i = 0; i < 5; i++) { const s = ab('snapshot -i', { verbose: false }); const lg = s.output?.match(/button "Let's Go" \[ref=(e\d+)\]/); if (!lg) break; ab(`click @${lg[1]}`, { verbose: false }); await sleep(800); } const parseAddresses = (snapOutput) => { const props = []; const matches = snapOutput?.matchAll(/heading "([^"]+)" \[ref=(e\d+)\] \[level=6\]/g) || []; for (const m of matches) { const text = m[1]; if (text.includes('properties') || text.includes('Recently') || text.includes('Get the most') || text.includes('What would') || text.length < 10) continue; if (/\d+.*,\s*[A-Z]{2}\s*\d{5}/i.test(text) || /\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway|pl|cir|route|tpke|pkwy|pike|hwy|terr?|loop|pass|trail|sq|park|grove|run|plz)/i.test(text)) { props.push({ name: text.substring(0, 80), ref: m[2] }); } } return props; }; // Process properties one at a time: always use CURRENT page state let scraped = 0; let scrollCount = 0; const maxScrolls = 10; while (scraped < maxThisRun && scrollCount <= maxScrolls) { // Take fresh snapshot of current search results let snap = ab('snapshot'); const visible = parseAddresses(snap.output); // Find next unprocessed property const next = visible.find(p => !processed.has(p.name)); if (!next) { // All visible properties processed, scroll for more scrollCount++; log(` All visible processed, scrolling... (${scrollCount}/${maxScrolls})`); ab('scroll down 600', { verbose: false }); await sleep(2000); continue; } processed.add(next.name); scraped++; log(`\n [${scraped}/${maxThisRun}] ${next.name.substring(0, 60)}`); try { // Click the property (ref is fresh from current snapshot) let clickResult = ab(`click @${next.ref}`); if (!clickResult.success) { log(' Click failed, skipping'); dailyStats.propertiesScraped++; continue; } await randomDelay(5000, 8000); // Get property URL and ID const propUrl = ab('eval "window.location.href"'); const propertyId = propUrl.output?.match(/property\/([a-f0-9-]+)/)?.[1] || 'unknown'; // Verify we're on a property page (not still on search) if (!propUrl.output?.includes('/property/')) { log(' Did not navigate to property page, skipping'); dailyStats.propertiesScraped++; continue; } // Use address from search results (already validated by parseAddresses) // DO NOT overwrite with detail page headings — they contain pagination text like "1 of 2,792 properties" let propertyAddress = next.name; // Extract property info using eval on the detail page DOM const propertyInfo = {}; // Parse city/state/zip from the address string (e.g. "9835 N Virginia St, Reno, NV 89506") const addrParts = propertyAddress.match(/,\s*([^,]+),\s*([A-Z]{2})\s*(\d{5})/i); if (addrParts) { // Title-case the city propertyInfo.city = addrParts[1].trim().replace(/\b\w/g, c => c.toUpperCase()); propertyInfo.state = addrParts[2].toUpperCase(); propertyInfo.zip = addrParts[3]; } // Extract property details: get page text via eval, then parse in Node // Limit to first 5000 chars to avoid huge output issues const pageTextResult = ab('eval "document.body.innerText.substring(0, 5000)"', { timeout: 15000 }); const pageText = pageTextResult.output || ''; // Also try clicking Building and Lot tab for more details let fullPageText = pageText; // Tab name on Reonomy is "Building & Lot" (with ampersand) let blTabResult = ab('find role tab click --name "Building & Lot"', { timeout: 15000 }); if (!blTabResult.success) { // Try alternate name blTabResult = ab('find role tab click --name "Building and Lot"', { timeout: 10000 }); } if (blTabResult.success) { await sleep(3000); const blText = ab('eval "document.body.innerText.substring(0, 5000)"', { timeout: 15000 }); fullPageText = pageText + '\n' + (blText.output || ''); } // Save page text for debugging (first 2 properties only) if (scraped <= 2) { fs.writeFileSync(`/tmp/reonomy-pagetext-${scraped}.txt`, fullPageText.substring(0, 10000)); log(` Saved page text to /tmp/reonomy-pagetext-${scraped}.txt (${fullPageText.length} chars)`); } // NOTE: agent-browser eval returns innerText with literal \n (backslash-n) not actual newlines // Replace literal \n with actual newlines for easier parsing const normalizedText = fullPageText.replace(/\\n/g, '\n'); // Try to get a properly-cased address from the detail page text const properCaseAddr = normalizedText.match(/\n([\d]+ [^\n]+, [A-Z]{2} \d{5})\n/); if (properCaseAddr && propertyInfo.city && properCaseAddr[1].toLowerCase().includes(propertyInfo.city.toLowerCase())) { propertyAddress = properCaseAddr[1]; // Re-extract city with proper casing const newParts = propertyAddress.match(/,\s*([^,]+),\s*([A-Z]{2})\s*(\d{5})/i); if (newParts) { propertyInfo.city = newParts[1].trim(); } } // Strategy: First try B&L tab data (most accurate), then search listing, then fallback // 1. Try "Building Area\n178,880 sf" from the Building & Lot tab const baMatch = normalizedText.match(/Building Area\s+([\d,]+(?:\.\d+)?)\s*sf/i); if (baMatch) { propertyInfo.squareFootage = baMatch[1].replace(/,/g, ''); } // 2. Try the search listing format: find THIS property's address then grab the next "NNk SF" if (!propertyInfo.squareFootage) { // The property address in the detail page uses proper casing // Search for the address (case-insensitive) followed by NNk SF const escapedAddr = propertyAddress.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const addrSfMatch = normalizedText.match(new RegExp(escapedAddr + '\\s+((?:[\\d,]+(?:\\.\\d+)?)[km]?)\\s*(?:SF|sq\\.?\\s*ft)', 'i')); if (addrSfMatch) { let sfVal = addrSfMatch[1].replace(/,/g, ''); if (sfVal.toLowerCase().endsWith('k')) { sfVal = String(Math.round(parseFloat(sfVal.slice(0, -1)) * 1000)); } else if (sfVal.toLowerCase().endsWith('m')) { sfVal = String(Math.round(parseFloat(sfVal.slice(0, -1)) * 1000000)); } propertyInfo.squareFootage = sfVal; } } // 3. Fallback: find any SF value that's not the filter badge (> 500 and not right after "min") if (!propertyInfo.squareFootage) { const allSfMatches = [...normalizedText.matchAll(/([\d,]+(?:\.\d+)?[km]?)\s*(?:SF|sf|sq\.?\s*ft)/g)]; for (const m of allSfMatches) { const idx = m.index; const prefix = normalizedText.substring(Math.max(0, idx - 30), idx); if (prefix.includes('min:') || prefix.includes('min ') || prefix.includes('min\n')) continue; let sfVal = m[1].replace(/,/g, ''); if (sfVal.toLowerCase().endsWith('k')) { sfVal = String(Math.round(parseFloat(sfVal.slice(0, -1)) * 1000)); } else if (sfVal.toLowerCase().endsWith('m')) { sfVal = String(Math.round(parseFloat(sfVal.slice(0, -1)) * 1000000)); } if (parseInt(sfVal) > 500) { propertyInfo.squareFootage = sfVal; break; } } } // Year built: "Year Built\n2025" (B&L tab) or "Built in 2025" (search listing) const yearMatch = normalizedText.match(/Year Built\s+(\d{4})/i) || normalizedText.match(/Built in (\d{4})/i); if (yearMatch) propertyInfo.yearBuilt = yearMatch[1]; // Property type: try "Property Type\nIndustrial | General Industrial" from B&L tab // Use case-sensitive "Type" to avoid the filter badge "Property type: Industrial" (lowercase 't') const ptBLMatch = normalizedText.match(/Property Type\s*\n\s*([^\n]+)/); if (ptBLMatch && ptBLMatch[1].length < 60 && ptBLMatch[1].length > 2) { // Clean up "Industrial | General Industrial" → "General Industrial" const parts = ptBLMatch[1].split('|').map(s => s.trim()); propertyInfo.propertyType = parts[parts.length - 1]; } // Fallback: extract from "NNk SF TypeName" pattern in search listing if (!propertyInfo.propertyType) { const sfTypeMatch = normalizedText.match(/(?:SF|sf|sq\.?\s*ft)\s+((?:General |Light )?(?:Industrial|Warehouse|Manufacturing|Distribution|Flex|Storage|Transportation|Office|Retail|Mixed Use|Hotel|Factory|Special Purpose))/i); if (sfTypeMatch) propertyInfo.propertyType = sfTypeMatch[1].trim(); } // Lot size: "Lot Area Acres\n14.22 acres" or "14.22 Acres" const lotMatch = normalizedText.match(/Lot Area Acres\s+([\d.]+)\s*acres/i) || normalizedText.match(/([\d.]+)\s*Acres/i); if (lotMatch) propertyInfo.lotSize = lotMatch[1] + ' Acres'; // Units: "Total Units\n2" or "N Units" const unitsMatch = normalizedText.match(/Total Units\s+(\d+)/i) || normalizedText.match(/(\d+)\s*Units?(?!\w)/i); if (unitsMatch) propertyInfo.units = unitsMatch[1]; // Fallback: try snapshot text for SF if eval didn't get it if (!propertyInfo.squareFootage) { const iSnap = ab('snapshot -i'); const buildText = iSnap.output || ''; const sfFallback = buildText.match(/(\d[\d,]*(?:\.\d+)?)\s*(?:SF|Sq\.?\s*Ft)/i); if (sfFallback) propertyInfo.squareFootage = sfFallback[1].replace(/,/g, ''); } log(` ${propertyAddress} | ${JSON.stringify(propertyInfo)}`); // Click Owner tab log(' Opening Owner tab...'); ab('find role tab click --name "Owner"'); await randomDelay(4000, 6000); // Extract contacts const ownerSnap = ab('snapshot -i'); const owners = extractOwnersFromTab(ownerSnap.output || ''); if (owners.length === 0) { log(' No contacts found'); } else { const totalPhones = owners.reduce((s, o) => s + o.phones.length, 0); const totalEmails = owners.reduce((s, o) => s + o.emails.length, 0); log(` ${owners.length} owners, ${totalPhones} phones, ${totalEmails} emails`); leads.push({ scrapeDate: new Date().toISOString(), propertyId, propertyAddress, ...propertyInfo, owners: owners.map(o => ({ name: o.name, phones: o.phones, emails: o.emails })) }); dailyStats.leadsFound++; log(' Lead captured!'); } dailyStats.propertiesScraped++; saveDailyStats(dailyStats); // Navigate back to search ab(`open "https://app.reonomy.com/!/search/${searchId}"`); await randomDelay(5000, 8000); // Random longer break if (Math.random() < 0.15) { log(' Taking a short break...'); await randomDelay(8000, 15000); } } catch (propError) { log(` Error: ${propError.message}`); ab(`open "https://app.reonomy.com/!/search/${searchId}"`); await sleep(6000); dailyStats.propertiesScraped++; saveDailyStats(dailyStats); } } return leads; } // ── MAIN ── async function main() { // Clear log fs.writeFileSync(CONFIG.logPath, ''); log('=== Reonomy Scraper v14 ==='); log(`Filters: state=${CONFIG.searchState}, types=${CONFIG.propertyTypes}, minSF=${CONFIG.minSF}, sale=${CONFIG.saleFilter}`); log(`Owner filters: phone=${CONFIG.ownerPhone}, email=${CONFIG.ownerEmail}`); log(`Max properties: ${CONFIG.maxProperties}`); try { // Login log('\n== Step 1: Login =='); await login(); // Build search const searchId = await buildSearch(); // Scrape const leads = await scrapeProperties(searchId); // Save results log('\n== Saving results =='); let allLeads = []; try { const existing = JSON.parse(fs.readFileSync(CONFIG.outputPath, 'utf8')); allLeads = existing.leads || []; } catch (e) {} allLeads = [...allLeads, ...leads]; const output = { lastUpdated: new Date().toISOString(), searchId, filters: { state: CONFIG.searchState, propertyTypes: CONFIG.propertyTypes, minSF: CONFIG.minSF, saleFilter: CONFIG.saleFilter, ownerPhone: CONFIG.ownerPhone, ownerEmail: CONFIG.ownerEmail, }, totalLeads: allLeads.length, leads: allLeads }; fs.writeFileSync(CONFIG.outputPath, JSON.stringify(output, null, 2)); log(`Saved ${leads.length} new leads (${allLeads.length} total)`); return leads; } catch (error) { log(`\nFATAL: ${error.message}`); ab('screenshot /tmp/reonomy-v14-error.png'); throw error; } finally { log('\nClosing browser...'); ab('close'); } } main() .then(leads => { log(`\nDone! ${leads.length} leads scraped.`); process.exit(0); }) .catch(err => { log(`\nFailed: ${err.message}`); process.exit(1); });