clawdbot-workspace/reonomy-scraper-v14.js

770 lines
27 KiB
JavaScript
Executable File

#!/usr/bin/env node
/**
* Reonomy Scraper v14 - Self-Configuring Search + Extract
*
* Sets up search filters IN THE SAME browser session, then scrapes.
* No more search ID handoff problem.
*
* ENV CONFIG:
* REONOMY_STATE - Location filter (e.g., "New Jersey")
* REONOMY_TYPES - Comma-separated property types (e.g., "Industrial")
* REONOMY_MIN_SF - Min building area in SF (e.g., "50000")
* REONOMY_SALE_FILTER - "not_within_10y", "not_within_5y", "not_within_2y", "within_10y", etc.
* REONOMY_OWNER_PHONE - "true" to require phone
* REONOMY_OWNER_EMAIL - "true" to require email
* MAX_PROPERTIES - Max properties to scrape (default 20)
* HEADLESS - "false" to show browser
*/
const { execSync } = require('child_process');
const fs = require('fs');
const path = require('path');
const CONFIG = {
authStatePath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-auth.json'),
outputPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-leads-v14.json'),
logPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-scraper-v14.log'),
dailyLogPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-daily-stats.json'),
maxProperties: parseInt(process.env.MAX_PROPERTIES) || 20,
maxDailyProperties: 50,
email: process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com',
password: process.env.REONOMY_PASSWORD || '9082166532',
// Search filters
searchState: process.env.REONOMY_STATE || 'New Jersey',
propertyTypes: (process.env.REONOMY_TYPES || 'Industrial').split(',').map(s => s.trim()),
minSF: process.env.REONOMY_MIN_SF || '',
saleFilter: process.env.REONOMY_SALE_FILTER || '', // e.g., "not_within_10y"
ownerPhone: process.env.REONOMY_OWNER_PHONE === 'true',
ownerEmail: process.env.REONOMY_OWNER_EMAIL === 'true',
};
function log(msg) {
const timestamp = new Date().toISOString();
const line = `[${timestamp}] ${msg}`;
console.log(line);
fs.appendFileSync(CONFIG.logPath, line + '\n');
}
function ab(cmd, options = {}) {
const fullCmd = `agent-browser ${cmd}`;
if (options.verbose !== false) log(` > ${fullCmd.substring(0, 120)}`);
try {
const result = execSync(fullCmd, {
encoding: 'utf8',
timeout: options.timeout || 30000,
stdio: ['pipe', 'pipe', 'pipe']
});
return { success: true, output: result.trim() };
} catch (err) {
const stderr = err.stderr?.toString() || err.message;
if (options.verbose !== false) log(` ! Error: ${stderr.substring(0, 200)}`);
return { success: false, error: stderr };
}
}
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
function randomDelay(min, max) {
return sleep(Math.floor(Math.random() * (max - min + 1)) + min);
}
function shuffle(arr) {
const a = [...arr];
for (let i = a.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[a[i], a[j]] = [a[j], a[i]];
}
return a;
}
function getDailyStats() {
const today = new Date().toISOString().split('T')[0];
try {
const data = JSON.parse(fs.readFileSync(CONFIG.dailyLogPath, 'utf8'));
if (data.date === today) return data;
} catch (e) {}
return { date: today, propertiesScraped: 0, leadsFound: 0 };
}
function saveDailyStats(stats) {
fs.writeFileSync(CONFIG.dailyLogPath, JSON.stringify(stats, null, 2));
}
// ── LOGIN ──
async function login() {
log(' Navigating to login...');
ab('open "https://app.reonomy.com/#!/login"');
await sleep(4000);
const snap = ab('snapshot -i');
if (!snap.output?.includes('textbox "Email"')) {
const url = ab('eval "window.location.href"');
if (url.output?.includes('app.reonomy.com') && !url.output?.includes('login') && !url.output?.includes('auth.reonomy.com')) {
log(' Already logged in!');
return true;
}
throw new Error('Login form not found');
}
const emailRef = snap.output.match(/textbox "Email" \[ref=(e\d+)\]/)?.[1];
const passRef = snap.output.match(/textbox "Password" \[ref=(e\d+)\]/)?.[1];
const loginRef = snap.output.match(/button "Log In" \[ref=(e\d+)\]/)?.[1];
if (!emailRef || !passRef || !loginRef) throw new Error('Login form elements not found');
ab(`fill @${emailRef} "${CONFIG.email}"`);
await sleep(1000);
ab(`fill @${passRef} "${CONFIG.password}"`);
await sleep(1000);
ab(`click @${loginRef}`);
await randomDelay(12000, 16000);
const postUrl = ab('eval "window.location.href"');
if (postUrl.output?.includes('auth.reonomy.com') || postUrl.output?.includes('login')) {
throw new Error('Login failed');
}
ab(`state save "${CONFIG.authStatePath}"`);
log(' Login successful!');
return true;
}
// ── BUILD SEARCH ──
async function buildSearch() {
log('\n== Building search with filters ==');
// Go to search page via Advanced Search
ab('open "https://app.reonomy.com/#!/search"');
await sleep(5000);
// Dismiss banners
for (let i = 0; i < 5; i++) {
const s = ab('snapshot -i', { verbose: false });
const lg = s.output?.match(/button "Let's Go" \[ref=(e\d+)\]/);
if (!lg) break;
ab(`click @${lg[1]}`, { verbose: false });
await sleep(800);
}
// Click Advanced Search if visible
let snap = ab('snapshot -i');
const advRef = snap.output?.match(/link "Advanced Search" \[ref=(e\d+)\]/)?.[1];
if (advRef) {
ab(`click @${advRef}`);
await sleep(3000);
}
// ── Location ──
log(` Setting location: ${CONFIG.searchState}`);
// Take FRESH snapshot after Advanced Search click (refs changed)
snap = ab('snapshot -i');
let searchBox = snap.output?.match(/textbox "(?:Search by address, location, or owner|Address, Location, or Owner)" \[ref=(e\d+)\]/)?.[1];
if (!searchBox) throw new Error('Search box not found');
// Use fill to set the text, then manually trigger the autocomplete
ab(`fill @${searchBox} "${CONFIG.searchState}"`);
await sleep(3000);
// Retry up to 4 times to find the state suggestion
let stateRef = null;
for (let attempt = 0; attempt < 4 && !stateRef; attempt++) {
snap = ab('snapshot -i');
stateRef = snap.output?.match(new RegExp(`menuitem "${CONFIG.searchState}" \\[ref=(e\\d+)\\]`))?.[1];
if (!stateRef) {
log(` State not found (attempt ${attempt + 1}), retrying...`);
// Re-find search box (ref might have changed)
searchBox = snap.output?.match(/textbox "(?:Search by address|Address)[^"]*" \[ref=(e\d+)\]/)?.[1]
|| snap.output?.match(/textbox "[^"]*" \[ref=(e\d+)\]/)?.[1];
if (searchBox) {
// Clear and retype
ab(`fill @${searchBox} ""`);
await sleep(500);
ab(`fill @${searchBox} "${CONFIG.searchState}"`);
await sleep(3000);
} else {
await sleep(2000);
}
}
}
if (!stateRef) throw new Error(`State "${CONFIG.searchState}" not found in suggestions`);
ab(`click @${stateRef}`);
await sleep(6000);
// ── Property Type ──
log(` Setting property types: ${CONFIG.propertyTypes.join(', ')}`);
snap = ab('snapshot -i');
const ptRef = snap.output?.match(/button "Property Type" \[ref=(e\d+)\]/)?.[1];
if (!ptRef) throw new Error('Property Type button not found');
ab(`click @${ptRef}`);
await sleep(2000);
snap = ab('snapshot -i');
for (const ptype of CONFIG.propertyTypes) {
const cbRef = snap.output?.match(new RegExp(`checkbox "${ptype}" \\[ref=(e\\d+)\\]`))?.[1];
if (cbRef) {
ab(`click @${cbRef}`);
await sleep(500);
} else {
log(` ! Property type "${ptype}" not found in quick list, checking See All...`);
// Try See All Property Types
const seeAllRef = snap.output?.match(/button "See All Property Types" \[ref=(e\d+)\]/)?.[1];
if (seeAllRef) {
ab(`click @${seeAllRef}`);
await sleep(2000);
snap = ab('snapshot -i');
const cbRef2 = snap.output?.match(new RegExp(`checkbox "${ptype}" \\[ref=(e\\d+)\\]`))?.[1];
if (cbRef2) {
ab(`click @${cbRef2}`);
await sleep(500);
} else {
log(` ! Could not find "${ptype}" checkbox`);
}
}
}
}
// Click Apply for property type
snap = ab('snapshot -i');
const ptApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1];
if (ptApply) {
ab(`click @${ptApply}`);
await sleep(5000);
}
// ── Building Area (SF) ──
if (CONFIG.minSF) {
log(` Setting min building area: ${CONFIG.minSF} SF`);
snap = ab('snapshot -i');
const sizeRef = snap.output?.match(/button "Size" \[ref=(e\d+)\]/)?.[1];
if (sizeRef) {
ab(`click @${sizeRef}`);
await sleep(2000);
// Click the Building Area min dropdown button to open presets
// The full snapshot shows: Building Area (SF) with a button containing textbox "min"
// We need the SECOND button with textbox "min" (first is Total Units)
snap = ab('snapshot -i');
// Find the second min textbox (Building Area)
const minRefs = [...(snap.output?.matchAll(/textbox "min" \[ref=(e\d+)\]/g) || [])];
if (minRefs.length >= 2) {
const areaMinRef = minRefs[1][1];
// Click the field to open dropdown
ab(`click @${areaMinRef}`);
await sleep(1500);
// Type the value
ab(`type @${areaMinRef} "${CONFIG.minSF}"`);
await sleep(1000);
// Check for preset dropdown option (e.g., "50k sf")
snap = ab('snapshot -i');
const sfK = Math.round(parseInt(CONFIG.minSF) / 1000);
const presetPatterns = [`${sfK}k sf`, `${CONFIG.minSF}`, `${sfK},000`];
let presetClicked = false;
for (const pat of presetPatterns) {
const presetRef = snap.output?.match(new RegExp(`(?:menuitem|option|button|listitem) "${pat}[^"]*" \\[ref=(e\\d+)\\]`, 'i'))?.[1];
if (presetRef) {
log(` Clicking preset: ${pat}`);
ab(`click @${presetRef}`);
presetClicked = true;
await sleep(2000);
break;
}
}
if (!presetClicked) {
// Press Enter to commit the typed value
log(' No preset found, pressing Enter to commit');
ab(`eval "document.querySelectorAll('input[placeholder=\\"min\\"]')[1]?.dispatchEvent(new KeyboardEvent('keydown', {key: 'Enter', keyCode: 13, bubbles: true}))"`);
await sleep(1000);
}
}
// Now try to click Apply
await sleep(1000);
snap = ab('snapshot -i');
const sizeApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\](?!\s*\[disabled\])/)?.[1];
if (sizeApply) {
ab(`click @${sizeApply}`);
await sleep(5000);
} else {
// Check if filter tag already shows (e.g., "50000+ SF")
const filterTag = snap.output?.match(/button "\d+.*SF" \[ref=(e\d+)\]/);
if (filterTag) {
log(' Size filter appears applied via tag');
} else {
// Try pressing Escape then check
log(' Pressing Escape to close size panel');
ab('eval "document.dispatchEvent(new KeyboardEvent(\'keydown\', {key: \'Escape\', bubbles: true}))"');
await sleep(2000);
}
}
}
// Verify size filter is applied
snap = ab('snapshot -i');
const sfTag = snap.output?.match(/button "\d+.*SF" \[ref=(e\d+)\]/);
if (sfTag) {
log(` Size filter confirmed: ${sfTag[0]}`);
} else {
log(' WARNING: Size filter may not be applied');
}
}
// ── Sale Date Filter ──
if (CONFIG.saleFilter) {
log(` Setting sale filter: ${CONFIG.saleFilter}`);
// Make sure no dropdowns are blocking
await sleep(1000);
snap = ab('snapshot -i');
// Close any open panels first by clicking the page body
ab('eval "document.body.click()"');
await sleep(1000);
snap = ab('snapshot -i');
const moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1];
if (moreRef) {
ab(`click @${moreRef}`);
await sleep(3000);
// Click Sales tab
snap = ab('snapshot -i');
const salesRef = snap.output?.match(/tab "Sales[^"]*" \[ref=(e\d+)\]/)?.[1];
if (salesRef) {
ab(`click @${salesRef}`);
await sleep(2000);
// Parse filter config
const [withinType, period] = CONFIG.saleFilter.split('_within_');
const notWithin = withinType === 'not';
const periodMap = {
'90d': 'Past 90 days', '1y': 'Past year', '2y': 'Past 2 years',
'5y': 'Past 5 years', '10y': 'Past 10 years'
};
const periodText = periodMap[period] || 'Past 10 years';
// Click Not Within / Within using JS
const withinLabel = notWithin ? 'Not Within' : 'Within';
ab(`eval "
const divs = Array.from(document.querySelectorAll('div'));
const btn = divs.find(d => d.textContent.trim() === '${withinLabel}' && d.className.includes('jss'));
if (btn) btn.click();
'${withinLabel}: ' + !!btn;
"`);
await sleep(1500);
ab(`eval "
const divs = Array.from(document.querySelectorAll('div'));
const btn = divs.find(d => d.textContent.trim() === '${periodText}' && d.className.includes('jss'));
if (btn) btn.click();
'${periodText}: ' + !!btn;
"`);
await sleep(1500);
// Click Apply in more filters
snap = ab('snapshot -i');
const salesApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1];
if (salesApply) {
ab(`click @${salesApply}`);
await sleep(5000);
}
}
}
}
// ── Owner filters (phone/email) ──
if (CONFIG.ownerPhone || CONFIG.ownerEmail) {
log(` Setting owner filters: phone=${CONFIG.ownerPhone}, email=${CONFIG.ownerEmail}`);
snap = ab('snapshot -i');
let moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1];
// If More Filters panel is already open, look for the close/filter button
if (!moreRef) {
// Panel might already be open from sales filter
const closeRef = snap.output?.match(/button "Close" \[ref=(e\d+)\]/)?.[1];
if (closeRef) {
// Close and reopen
ab(`click @${closeRef}`);
await sleep(1000);
snap = ab('snapshot -i');
moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1];
}
}
if (moreRef) {
ab(`click @${moreRef}`);
await sleep(2000);
}
// Click Owner tab
snap = ab('snapshot -i');
const ownerTabRef = snap.output?.match(/tab "Owner[^"]*" \[ref=(e\d+)\]/)?.[1];
if (ownerTabRef) {
ab(`click @${ownerTabRef}`);
await sleep(2000);
// Use JS to click phone/email toggles
if (CONFIG.ownerPhone) {
ab(`eval "
const divs = Array.from(document.querySelectorAll('div'));
const phone = divs.find(d => d.textContent.trim() === 'Includes Phone Number' && d.className.includes('jss'));
if (phone) phone.click();
'phone: ' + !!phone;
"`);
await sleep(1000);
}
if (CONFIG.ownerEmail) {
ab(`eval "
const divs = Array.from(document.querySelectorAll('div'));
const email = divs.find(d => d.textContent.trim() === 'Includes Email Address' && d.className.includes('jss'));
if (email) email.click();
'email: ' + !!email;
"`);
await sleep(1000);
}
snap = ab('snapshot -i');
const ownerApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1];
if (ownerApply) {
ab(`click @${ownerApply}`);
await sleep(5000);
}
}
}
// ── Get search ID and property count ──
await sleep(2000);
const url = ab('eval "window.location.href"');
const searchIdMatch = url.output?.match(/search\/([a-f0-9-]+)/);
const searchId = searchIdMatch ? searchIdMatch[1] : 'unknown';
snap = ab('snapshot');
const countMatch = snap.output?.match(/heading "([0-9,]+) properties"/);
const propertyCount = countMatch ? countMatch[1] : '?';
log(`\n Search ready: ${propertyCount} properties`);
log(` Search ID: ${searchId}`);
log(` URL: ${url.output}`);
return searchId;
}
// ── EXTRACT OWNERS FROM TAB ──
function extractOwnersFromTab(snapshot) {
if (!snapshot) return [];
const owners = [];
const lines = snapshot.split('\n');
let currentOwner = null;
for (const line of lines) {
const ownerMatch = line.match(/link "([^"]+)" \[ref=e\d+\]/);
if (ownerMatch && !ownerMatch[1].includes('Call') && !ownerMatch[1].includes('Send')
&& !ownerMatch[1].includes('Sign') && !ownerMatch[1].includes('Advanced')
&& !ownerMatch[1].includes('http') && !ownerMatch[1].includes("Don't")
&& !ownerMatch[1].includes('Google') && !ownerMatch[1].includes('Terms')
&& !ownerMatch[1].includes('Report')) {
const name = ownerMatch[1];
if (name.length > 2 && name.length < 80 && /[A-Z]/.test(name)) {
if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) {
owners.push(currentOwner);
}
const cleanName = name.replace(/\s+(President|CEO|Manager|Member|Director|Officer|Secretary|Treasurer|VP|Vice President|Partner|Owner|Agent|Trustee|Chairman|Principal)$/i, '').trim();
currentOwner = { name: cleanName, phones: [], emails: [] };
}
}
const phoneMatch = line.match(/button "(?:1-)?(\d{3}-\d{3}-\d{4})(?:\s+(\w+))?" \[ref=e\d+\]/);
if (phoneMatch && currentOwner) {
const phone = line.includes('1-') ? `1-${phoneMatch[1]}` : phoneMatch[1];
currentOwner.phones.push({ number: phone, type: phoneMatch[2] || 'Unknown' });
}
const longPhoneMatch = line.match(/button "(\d{10,14})" \[ref=e\d+\]/);
if (longPhoneMatch && currentOwner) {
currentOwner.phones.push({ number: longPhoneMatch[1], type: 'Unknown' });
}
const emailMatch = line.match(/button "([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})" \[ref=e\d+\]/);
if (emailMatch && currentOwner) {
currentOwner.emails.push(emailMatch[1]);
}
}
if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) {
owners.push(currentOwner);
}
// Dedupe
const deduped = [];
const seen = new Map();
for (const o of owners) {
const key = o.name.toLowerCase();
if (seen.has(key)) {
const ex = seen.get(key);
for (const p of o.phones) { if (!ex.phones.find(ep => ep.number === p.number)) ex.phones.push(p); }
for (const e of o.emails) { if (!ex.emails.includes(e)) ex.emails.push(e); }
} else {
seen.set(key, { ...o });
deduped.push(seen.get(key));
}
}
return deduped;
}
// ── SCRAPE PROPERTIES ──
async function scrapeProperties(searchId) {
log('\n== Scraping properties ==');
const dailyStats = getDailyStats();
const remainingToday = CONFIG.maxDailyProperties - dailyStats.propertiesScraped;
const maxThisRun = Math.min(CONFIG.maxProperties, remainingToday);
log(` Daily: ${dailyStats.propertiesScraped} done, ${remainingToday} remaining, this run: max ${maxThisRun}`);
if (maxThisRun <= 0) {
log(' Daily limit reached!');
return [];
}
const leads = [];
const processed = new Set(); // Track processed addresses
// Dismiss banners
for (let i = 0; i < 5; i++) {
const s = ab('snapshot -i', { verbose: false });
const lg = s.output?.match(/button "Let's Go" \[ref=(e\d+)\]/);
if (!lg) break;
ab(`click @${lg[1]}`, { verbose: false });
await sleep(800);
}
const parseAddresses = (snapOutput) => {
const props = [];
const matches = snapOutput?.matchAll(/heading "([^"]+)" \[ref=(e\d+)\] \[level=6\]/g) || [];
for (const m of matches) {
const text = m[1];
if (text.includes('properties') || text.includes('Recently') ||
text.includes('Get the most') || text.includes('What would') ||
text.length < 10) continue;
if (/\d+.*,\s*[A-Z]{2}\s*\d{5}/i.test(text) ||
/\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway|pl|cir|route|tpke|pkwy|pike|hwy|terr?|loop|pass|trail|sq|park|grove|run|plz)/i.test(text)) {
props.push({ name: text.substring(0, 80), ref: m[2] });
}
}
return props;
};
// Process properties one at a time: always use CURRENT page state
let scraped = 0;
let scrollCount = 0;
const maxScrolls = 10;
while (scraped < maxThisRun && scrollCount <= maxScrolls) {
// Take fresh snapshot of current search results
let snap = ab('snapshot');
const visible = parseAddresses(snap.output);
// Find next unprocessed property
const next = visible.find(p => !processed.has(p.name));
if (!next) {
// All visible properties processed, scroll for more
scrollCount++;
log(` All visible processed, scrolling... (${scrollCount}/${maxScrolls})`);
ab('scroll down 600', { verbose: false });
await sleep(2000);
continue;
}
processed.add(next.name);
scraped++;
log(`\n [${scraped}/${maxThisRun}] ${next.name.substring(0, 60)}`);
try {
// Click the property (ref is fresh from current snapshot)
let clickResult = ab(`click @${next.ref}`);
if (!clickResult.success) {
log(' Click failed, skipping');
dailyStats.propertiesScraped++;
continue;
}
await randomDelay(5000, 8000);
// Get property URL and ID
const propUrl = ab('eval "window.location.href"');
const propertyId = propUrl.output?.match(/property\/([a-f0-9-]+)/)?.[1] || 'unknown';
// Verify we're on a property page (not still on search)
if (!propUrl.output?.includes('/property/')) {
log(' Did not navigate to property page, skipping');
dailyStats.propertiesScraped++;
continue;
}
// Get property address from page
snap = ab('snapshot');
const headings = [...(snap.output?.matchAll(/heading "([^"]+)" \[ref=e\d+\]/g) || [])];
let propertyAddress = next.name;
for (const h of headings) {
if (h[1].length > 5 && h[1] !== 'Owners' && h[1] !== 'Owner' && /\d/.test(h[1]) && /[a-zA-Z]/.test(h[1])) {
propertyAddress = h[1];
break;
}
}
// Extract property info
const iSnap = ab('snapshot -i');
const buildText = iSnap.output || '';
const propertyInfo = {};
const sfMatch = buildText.match(/(\d[\d,]*(?:\.\d+)?)\s*(?:SF|Sq\.?\s*Ft)/i) || next.name.match(/([\d.]+k?)\s*SF/i);
if (sfMatch) propertyInfo.squareFootage = sfMatch[1].replace(/,/g, '');
const typeMatch = buildText.match(/(?:Property Type|Type)[:\s]*([A-Za-z\s()]+?)(?:\n|$)/i);
if (typeMatch) propertyInfo.propertyType = typeMatch[1]?.trim();
if (!propertyInfo.propertyType) {
const typeFromName = next.name.match(/(?:Industrial|Warehouse|Manufacturing|Distribution|Flex|Storage|Factory)[^"]*/i);
if (typeFromName) propertyInfo.propertyType = typeFromName[0].trim();
}
const yearMatch = buildText.match(/(?:Year Built|Built)[:\s]*(\d{4})/i);
if (yearMatch) propertyInfo.yearBuilt = yearMatch[1];
const lotMatch = buildText.match(/([\d.]+)\s*Acre/i);
if (lotMatch) propertyInfo.lotSize = lotMatch[1] + ' Acres';
const unitsMatch = buildText.match(/(\d+)\s*Unit/i) || next.name.match(/(\d+)\s*Unit/i);
if (unitsMatch) propertyInfo.units = unitsMatch[1];
const addrParts = propertyAddress.match(/,\s*([^,]+),\s*([A-Z]{2})\s*(\d{5})/i);
if (addrParts) {
propertyInfo.city = addrParts[1].trim();
propertyInfo.state = addrParts[2];
propertyInfo.zip = addrParts[3];
}
log(` ${propertyAddress} | ${JSON.stringify(propertyInfo)}`);
// Click Owner tab
log(' Opening Owner tab...');
ab('find role tab click --name "Owner"');
await randomDelay(4000, 6000);
// Extract contacts
const ownerSnap = ab('snapshot -i');
const owners = extractOwnersFromTab(ownerSnap.output || '');
if (owners.length === 0) {
log(' No contacts found');
} else {
const totalPhones = owners.reduce((s, o) => s + o.phones.length, 0);
const totalEmails = owners.reduce((s, o) => s + o.emails.length, 0);
log(` ${owners.length} owners, ${totalPhones} phones, ${totalEmails} emails`);
leads.push({
scrapeDate: new Date().toISOString(),
propertyId,
propertyAddress,
...propertyInfo,
owners: owners.map(o => ({ name: o.name, phones: o.phones, emails: o.emails }))
});
dailyStats.leadsFound++;
log(' Lead captured!');
}
dailyStats.propertiesScraped++;
saveDailyStats(dailyStats);
// Navigate back to search
ab(`open "https://app.reonomy.com/#!/search/${searchId}"`);
await randomDelay(5000, 8000);
// Random longer break
if (Math.random() < 0.15) {
log(' Taking a short break...');
await randomDelay(8000, 15000);
}
} catch (propError) {
log(` Error: ${propError.message}`);
ab(`open "https://app.reonomy.com/#!/search/${searchId}"`);
await sleep(6000);
dailyStats.propertiesScraped++;
saveDailyStats(dailyStats);
}
}
return leads;
}
// ── MAIN ──
async function main() {
// Clear log
fs.writeFileSync(CONFIG.logPath, '');
log('=== Reonomy Scraper v14 ===');
log(`Filters: state=${CONFIG.searchState}, types=${CONFIG.propertyTypes}, minSF=${CONFIG.minSF}, sale=${CONFIG.saleFilter}`);
log(`Owner filters: phone=${CONFIG.ownerPhone}, email=${CONFIG.ownerEmail}`);
log(`Max properties: ${CONFIG.maxProperties}`);
try {
// Login
log('\n== Step 1: Login ==');
await login();
// Build search
const searchId = await buildSearch();
// Scrape
const leads = await scrapeProperties(searchId);
// Save results
log('\n== Saving results ==');
let allLeads = [];
try {
const existing = JSON.parse(fs.readFileSync(CONFIG.outputPath, 'utf8'));
allLeads = existing.leads || [];
} catch (e) {}
allLeads = [...allLeads, ...leads];
const output = {
lastUpdated: new Date().toISOString(),
searchId,
filters: {
state: CONFIG.searchState,
propertyTypes: CONFIG.propertyTypes,
minSF: CONFIG.minSF,
saleFilter: CONFIG.saleFilter,
ownerPhone: CONFIG.ownerPhone,
ownerEmail: CONFIG.ownerEmail,
},
totalLeads: allLeads.length,
leads: allLeads
};
fs.writeFileSync(CONFIG.outputPath, JSON.stringify(output, null, 2));
log(`Saved ${leads.length} new leads (${allLeads.length} total)`);
return leads;
} catch (error) {
log(`\nFATAL: ${error.message}`);
ab('screenshot /tmp/reonomy-v14-error.png');
throw error;
} finally {
log('\nClosing browser...');
ab('close');
}
}
main()
.then(leads => {
log(`\nDone! ${leads.length} leads scraped.`);
process.exit(0);
})
.catch(err => {
log(`\nFailed: ${err.message}`);
process.exit(1);
});