770 lines
27 KiB
JavaScript
Executable File
770 lines
27 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
/**
|
|
* Reonomy Scraper v14 - Self-Configuring Search + Extract
|
|
*
|
|
* Sets up search filters IN THE SAME browser session, then scrapes.
|
|
* No more search ID handoff problem.
|
|
*
|
|
* ENV CONFIG:
|
|
* REONOMY_STATE - Location filter (e.g., "New Jersey")
|
|
* REONOMY_TYPES - Comma-separated property types (e.g., "Industrial")
|
|
* REONOMY_MIN_SF - Min building area in SF (e.g., "50000")
|
|
* REONOMY_SALE_FILTER - "not_within_10y", "not_within_5y", "not_within_2y", "within_10y", etc.
|
|
* REONOMY_OWNER_PHONE - "true" to require phone
|
|
* REONOMY_OWNER_EMAIL - "true" to require email
|
|
* MAX_PROPERTIES - Max properties to scrape (default 20)
|
|
* HEADLESS - "false" to show browser
|
|
*/
|
|
|
|
const { execSync } = require('child_process');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const CONFIG = {
|
|
authStatePath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-auth.json'),
|
|
outputPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-leads-v14.json'),
|
|
logPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-scraper-v14.log'),
|
|
dailyLogPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-daily-stats.json'),
|
|
maxProperties: parseInt(process.env.MAX_PROPERTIES) || 20,
|
|
maxDailyProperties: 50,
|
|
email: process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com',
|
|
password: process.env.REONOMY_PASSWORD || '9082166532',
|
|
// Search filters
|
|
searchState: process.env.REONOMY_STATE || 'New Jersey',
|
|
propertyTypes: (process.env.REONOMY_TYPES || 'Industrial').split(',').map(s => s.trim()),
|
|
minSF: process.env.REONOMY_MIN_SF || '',
|
|
saleFilter: process.env.REONOMY_SALE_FILTER || '', // e.g., "not_within_10y"
|
|
ownerPhone: process.env.REONOMY_OWNER_PHONE === 'true',
|
|
ownerEmail: process.env.REONOMY_OWNER_EMAIL === 'true',
|
|
};
|
|
|
|
function log(msg) {
|
|
const timestamp = new Date().toISOString();
|
|
const line = `[${timestamp}] ${msg}`;
|
|
console.log(line);
|
|
fs.appendFileSync(CONFIG.logPath, line + '\n');
|
|
}
|
|
|
|
function ab(cmd, options = {}) {
|
|
const fullCmd = `agent-browser ${cmd}`;
|
|
if (options.verbose !== false) log(` > ${fullCmd.substring(0, 120)}`);
|
|
try {
|
|
const result = execSync(fullCmd, {
|
|
encoding: 'utf8',
|
|
timeout: options.timeout || 30000,
|
|
stdio: ['pipe', 'pipe', 'pipe']
|
|
});
|
|
return { success: true, output: result.trim() };
|
|
} catch (err) {
|
|
const stderr = err.stderr?.toString() || err.message;
|
|
if (options.verbose !== false) log(` ! Error: ${stderr.substring(0, 200)}`);
|
|
return { success: false, error: stderr };
|
|
}
|
|
}
|
|
|
|
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
|
|
|
function randomDelay(min, max) {
|
|
return sleep(Math.floor(Math.random() * (max - min + 1)) + min);
|
|
}
|
|
|
|
function shuffle(arr) {
|
|
const a = [...arr];
|
|
for (let i = a.length - 1; i > 0; i--) {
|
|
const j = Math.floor(Math.random() * (i + 1));
|
|
[a[i], a[j]] = [a[j], a[i]];
|
|
}
|
|
return a;
|
|
}
|
|
|
|
function getDailyStats() {
|
|
const today = new Date().toISOString().split('T')[0];
|
|
try {
|
|
const data = JSON.parse(fs.readFileSync(CONFIG.dailyLogPath, 'utf8'));
|
|
if (data.date === today) return data;
|
|
} catch (e) {}
|
|
return { date: today, propertiesScraped: 0, leadsFound: 0 };
|
|
}
|
|
|
|
function saveDailyStats(stats) {
|
|
fs.writeFileSync(CONFIG.dailyLogPath, JSON.stringify(stats, null, 2));
|
|
}
|
|
|
|
// ── LOGIN ──
|
|
async function login() {
|
|
log(' Navigating to login...');
|
|
ab('open "https://app.reonomy.com/#!/login"');
|
|
await sleep(4000);
|
|
|
|
const snap = ab('snapshot -i');
|
|
if (!snap.output?.includes('textbox "Email"')) {
|
|
const url = ab('eval "window.location.href"');
|
|
if (url.output?.includes('app.reonomy.com') && !url.output?.includes('login') && !url.output?.includes('auth.reonomy.com')) {
|
|
log(' Already logged in!');
|
|
return true;
|
|
}
|
|
throw new Error('Login form not found');
|
|
}
|
|
|
|
const emailRef = snap.output.match(/textbox "Email" \[ref=(e\d+)\]/)?.[1];
|
|
const passRef = snap.output.match(/textbox "Password" \[ref=(e\d+)\]/)?.[1];
|
|
const loginRef = snap.output.match(/button "Log In" \[ref=(e\d+)\]/)?.[1];
|
|
if (!emailRef || !passRef || !loginRef) throw new Error('Login form elements not found');
|
|
|
|
ab(`fill @${emailRef} "${CONFIG.email}"`);
|
|
await sleep(1000);
|
|
ab(`fill @${passRef} "${CONFIG.password}"`);
|
|
await sleep(1000);
|
|
ab(`click @${loginRef}`);
|
|
await randomDelay(12000, 16000);
|
|
|
|
const postUrl = ab('eval "window.location.href"');
|
|
if (postUrl.output?.includes('auth.reonomy.com') || postUrl.output?.includes('login')) {
|
|
throw new Error('Login failed');
|
|
}
|
|
|
|
ab(`state save "${CONFIG.authStatePath}"`);
|
|
log(' Login successful!');
|
|
return true;
|
|
}
|
|
|
|
// ── BUILD SEARCH ──
|
|
async function buildSearch() {
|
|
log('\n== Building search with filters ==');
|
|
|
|
// Go to search page via Advanced Search
|
|
ab('open "https://app.reonomy.com/#!/search"');
|
|
await sleep(5000);
|
|
|
|
// Dismiss banners
|
|
for (let i = 0; i < 5; i++) {
|
|
const s = ab('snapshot -i', { verbose: false });
|
|
const lg = s.output?.match(/button "Let's Go" \[ref=(e\d+)\]/);
|
|
if (!lg) break;
|
|
ab(`click @${lg[1]}`, { verbose: false });
|
|
await sleep(800);
|
|
}
|
|
|
|
// Click Advanced Search if visible
|
|
let snap = ab('snapshot -i');
|
|
const advRef = snap.output?.match(/link "Advanced Search" \[ref=(e\d+)\]/)?.[1];
|
|
if (advRef) {
|
|
ab(`click @${advRef}`);
|
|
await sleep(3000);
|
|
}
|
|
|
|
// ── Location ──
|
|
log(` Setting location: ${CONFIG.searchState}`);
|
|
|
|
// Take FRESH snapshot after Advanced Search click (refs changed)
|
|
snap = ab('snapshot -i');
|
|
let searchBox = snap.output?.match(/textbox "(?:Search by address, location, or owner|Address, Location, or Owner)" \[ref=(e\d+)\]/)?.[1];
|
|
if (!searchBox) throw new Error('Search box not found');
|
|
|
|
// Use fill to set the text, then manually trigger the autocomplete
|
|
ab(`fill @${searchBox} "${CONFIG.searchState}"`);
|
|
await sleep(3000);
|
|
|
|
// Retry up to 4 times to find the state suggestion
|
|
let stateRef = null;
|
|
for (let attempt = 0; attempt < 4 && !stateRef; attempt++) {
|
|
snap = ab('snapshot -i');
|
|
stateRef = snap.output?.match(new RegExp(`menuitem "${CONFIG.searchState}" \\[ref=(e\\d+)\\]`))?.[1];
|
|
if (!stateRef) {
|
|
log(` State not found (attempt ${attempt + 1}), retrying...`);
|
|
// Re-find search box (ref might have changed)
|
|
searchBox = snap.output?.match(/textbox "(?:Search by address|Address)[^"]*" \[ref=(e\d+)\]/)?.[1]
|
|
|| snap.output?.match(/textbox "[^"]*" \[ref=(e\d+)\]/)?.[1];
|
|
if (searchBox) {
|
|
// Clear and retype
|
|
ab(`fill @${searchBox} ""`);
|
|
await sleep(500);
|
|
ab(`fill @${searchBox} "${CONFIG.searchState}"`);
|
|
await sleep(3000);
|
|
} else {
|
|
await sleep(2000);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!stateRef) throw new Error(`State "${CONFIG.searchState}" not found in suggestions`);
|
|
|
|
ab(`click @${stateRef}`);
|
|
await sleep(6000);
|
|
|
|
// ── Property Type ──
|
|
log(` Setting property types: ${CONFIG.propertyTypes.join(', ')}`);
|
|
snap = ab('snapshot -i');
|
|
const ptRef = snap.output?.match(/button "Property Type" \[ref=(e\d+)\]/)?.[1];
|
|
if (!ptRef) throw new Error('Property Type button not found');
|
|
ab(`click @${ptRef}`);
|
|
await sleep(2000);
|
|
|
|
snap = ab('snapshot -i');
|
|
for (const ptype of CONFIG.propertyTypes) {
|
|
const cbRef = snap.output?.match(new RegExp(`checkbox "${ptype}" \\[ref=(e\\d+)\\]`))?.[1];
|
|
if (cbRef) {
|
|
ab(`click @${cbRef}`);
|
|
await sleep(500);
|
|
} else {
|
|
log(` ! Property type "${ptype}" not found in quick list, checking See All...`);
|
|
// Try See All Property Types
|
|
const seeAllRef = snap.output?.match(/button "See All Property Types" \[ref=(e\d+)\]/)?.[1];
|
|
if (seeAllRef) {
|
|
ab(`click @${seeAllRef}`);
|
|
await sleep(2000);
|
|
snap = ab('snapshot -i');
|
|
const cbRef2 = snap.output?.match(new RegExp(`checkbox "${ptype}" \\[ref=(e\\d+)\\]`))?.[1];
|
|
if (cbRef2) {
|
|
ab(`click @${cbRef2}`);
|
|
await sleep(500);
|
|
} else {
|
|
log(` ! Could not find "${ptype}" checkbox`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Click Apply for property type
|
|
snap = ab('snapshot -i');
|
|
const ptApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1];
|
|
if (ptApply) {
|
|
ab(`click @${ptApply}`);
|
|
await sleep(5000);
|
|
}
|
|
|
|
// ── Building Area (SF) ──
|
|
if (CONFIG.minSF) {
|
|
log(` Setting min building area: ${CONFIG.minSF} SF`);
|
|
snap = ab('snapshot -i');
|
|
const sizeRef = snap.output?.match(/button "Size" \[ref=(e\d+)\]/)?.[1];
|
|
if (sizeRef) {
|
|
ab(`click @${sizeRef}`);
|
|
await sleep(2000);
|
|
|
|
// Click the Building Area min dropdown button to open presets
|
|
// The full snapshot shows: Building Area (SF) with a button containing textbox "min"
|
|
// We need the SECOND button with textbox "min" (first is Total Units)
|
|
snap = ab('snapshot -i');
|
|
|
|
// Find the second min textbox (Building Area)
|
|
const minRefs = [...(snap.output?.matchAll(/textbox "min" \[ref=(e\d+)\]/g) || [])];
|
|
if (minRefs.length >= 2) {
|
|
const areaMinRef = minRefs[1][1];
|
|
// Click the field to open dropdown
|
|
ab(`click @${areaMinRef}`);
|
|
await sleep(1500);
|
|
|
|
// Type the value
|
|
ab(`type @${areaMinRef} "${CONFIG.minSF}"`);
|
|
await sleep(1000);
|
|
|
|
// Check for preset dropdown option (e.g., "50k sf")
|
|
snap = ab('snapshot -i');
|
|
const sfK = Math.round(parseInt(CONFIG.minSF) / 1000);
|
|
const presetPatterns = [`${sfK}k sf`, `${CONFIG.minSF}`, `${sfK},000`];
|
|
let presetClicked = false;
|
|
for (const pat of presetPatterns) {
|
|
const presetRef = snap.output?.match(new RegExp(`(?:menuitem|option|button|listitem) "${pat}[^"]*" \\[ref=(e\\d+)\\]`, 'i'))?.[1];
|
|
if (presetRef) {
|
|
log(` Clicking preset: ${pat}`);
|
|
ab(`click @${presetRef}`);
|
|
presetClicked = true;
|
|
await sleep(2000);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!presetClicked) {
|
|
// Press Enter to commit the typed value
|
|
log(' No preset found, pressing Enter to commit');
|
|
ab(`eval "document.querySelectorAll('input[placeholder=\\"min\\"]')[1]?.dispatchEvent(new KeyboardEvent('keydown', {key: 'Enter', keyCode: 13, bubbles: true}))"`);
|
|
await sleep(1000);
|
|
}
|
|
}
|
|
|
|
// Now try to click Apply
|
|
await sleep(1000);
|
|
snap = ab('snapshot -i');
|
|
const sizeApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\](?!\s*\[disabled\])/)?.[1];
|
|
if (sizeApply) {
|
|
ab(`click @${sizeApply}`);
|
|
await sleep(5000);
|
|
} else {
|
|
// Check if filter tag already shows (e.g., "50000+ SF")
|
|
const filterTag = snap.output?.match(/button "\d+.*SF" \[ref=(e\d+)\]/);
|
|
if (filterTag) {
|
|
log(' Size filter appears applied via tag');
|
|
} else {
|
|
// Try pressing Escape then check
|
|
log(' Pressing Escape to close size panel');
|
|
ab('eval "document.dispatchEvent(new KeyboardEvent(\'keydown\', {key: \'Escape\', bubbles: true}))"');
|
|
await sleep(2000);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Verify size filter is applied
|
|
snap = ab('snapshot -i');
|
|
const sfTag = snap.output?.match(/button "\d+.*SF" \[ref=(e\d+)\]/);
|
|
if (sfTag) {
|
|
log(` Size filter confirmed: ${sfTag[0]}`);
|
|
} else {
|
|
log(' WARNING: Size filter may not be applied');
|
|
}
|
|
}
|
|
|
|
// ── Sale Date Filter ──
|
|
if (CONFIG.saleFilter) {
|
|
log(` Setting sale filter: ${CONFIG.saleFilter}`);
|
|
// Make sure no dropdowns are blocking
|
|
await sleep(1000);
|
|
snap = ab('snapshot -i');
|
|
|
|
// Close any open panels first by clicking the page body
|
|
ab('eval "document.body.click()"');
|
|
await sleep(1000);
|
|
|
|
snap = ab('snapshot -i');
|
|
const moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1];
|
|
if (moreRef) {
|
|
ab(`click @${moreRef}`);
|
|
await sleep(3000);
|
|
|
|
// Click Sales tab
|
|
snap = ab('snapshot -i');
|
|
const salesRef = snap.output?.match(/tab "Sales[^"]*" \[ref=(e\d+)\]/)?.[1];
|
|
if (salesRef) {
|
|
ab(`click @${salesRef}`);
|
|
await sleep(2000);
|
|
|
|
// Parse filter config
|
|
const [withinType, period] = CONFIG.saleFilter.split('_within_');
|
|
const notWithin = withinType === 'not';
|
|
const periodMap = {
|
|
'90d': 'Past 90 days', '1y': 'Past year', '2y': 'Past 2 years',
|
|
'5y': 'Past 5 years', '10y': 'Past 10 years'
|
|
};
|
|
const periodText = periodMap[period] || 'Past 10 years';
|
|
|
|
// Click Not Within / Within using JS
|
|
const withinLabel = notWithin ? 'Not Within' : 'Within';
|
|
ab(`eval "
|
|
const divs = Array.from(document.querySelectorAll('div'));
|
|
const btn = divs.find(d => d.textContent.trim() === '${withinLabel}' && d.className.includes('jss'));
|
|
if (btn) btn.click();
|
|
'${withinLabel}: ' + !!btn;
|
|
"`);
|
|
await sleep(1500);
|
|
|
|
ab(`eval "
|
|
const divs = Array.from(document.querySelectorAll('div'));
|
|
const btn = divs.find(d => d.textContent.trim() === '${periodText}' && d.className.includes('jss'));
|
|
if (btn) btn.click();
|
|
'${periodText}: ' + !!btn;
|
|
"`);
|
|
await sleep(1500);
|
|
|
|
// Click Apply in more filters
|
|
snap = ab('snapshot -i');
|
|
const salesApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1];
|
|
if (salesApply) {
|
|
ab(`click @${salesApply}`);
|
|
await sleep(5000);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Owner filters (phone/email) ──
|
|
if (CONFIG.ownerPhone || CONFIG.ownerEmail) {
|
|
log(` Setting owner filters: phone=${CONFIG.ownerPhone}, email=${CONFIG.ownerEmail}`);
|
|
snap = ab('snapshot -i');
|
|
let moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1];
|
|
// If More Filters panel is already open, look for the close/filter button
|
|
if (!moreRef) {
|
|
// Panel might already be open from sales filter
|
|
const closeRef = snap.output?.match(/button "Close" \[ref=(e\d+)\]/)?.[1];
|
|
if (closeRef) {
|
|
// Close and reopen
|
|
ab(`click @${closeRef}`);
|
|
await sleep(1000);
|
|
snap = ab('snapshot -i');
|
|
moreRef = snap.output?.match(/button "More [Ff]ilters" \[ref=(e\d+)\]/)?.[1];
|
|
}
|
|
}
|
|
|
|
if (moreRef) {
|
|
ab(`click @${moreRef}`);
|
|
await sleep(2000);
|
|
}
|
|
|
|
// Click Owner tab
|
|
snap = ab('snapshot -i');
|
|
const ownerTabRef = snap.output?.match(/tab "Owner[^"]*" \[ref=(e\d+)\]/)?.[1];
|
|
if (ownerTabRef) {
|
|
ab(`click @${ownerTabRef}`);
|
|
await sleep(2000);
|
|
|
|
// Use JS to click phone/email toggles
|
|
if (CONFIG.ownerPhone) {
|
|
ab(`eval "
|
|
const divs = Array.from(document.querySelectorAll('div'));
|
|
const phone = divs.find(d => d.textContent.trim() === 'Includes Phone Number' && d.className.includes('jss'));
|
|
if (phone) phone.click();
|
|
'phone: ' + !!phone;
|
|
"`);
|
|
await sleep(1000);
|
|
}
|
|
if (CONFIG.ownerEmail) {
|
|
ab(`eval "
|
|
const divs = Array.from(document.querySelectorAll('div'));
|
|
const email = divs.find(d => d.textContent.trim() === 'Includes Email Address' && d.className.includes('jss'));
|
|
if (email) email.click();
|
|
'email: ' + !!email;
|
|
"`);
|
|
await sleep(1000);
|
|
}
|
|
|
|
snap = ab('snapshot -i');
|
|
const ownerApply = snap.output?.match(/button "Apply" \[ref=(e\d+)\]/)?.[1];
|
|
if (ownerApply) {
|
|
ab(`click @${ownerApply}`);
|
|
await sleep(5000);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Get search ID and property count ──
|
|
await sleep(2000);
|
|
const url = ab('eval "window.location.href"');
|
|
const searchIdMatch = url.output?.match(/search\/([a-f0-9-]+)/);
|
|
const searchId = searchIdMatch ? searchIdMatch[1] : 'unknown';
|
|
|
|
snap = ab('snapshot');
|
|
const countMatch = snap.output?.match(/heading "([0-9,]+) properties"/);
|
|
const propertyCount = countMatch ? countMatch[1] : '?';
|
|
|
|
log(`\n Search ready: ${propertyCount} properties`);
|
|
log(` Search ID: ${searchId}`);
|
|
log(` URL: ${url.output}`);
|
|
|
|
return searchId;
|
|
}
|
|
|
|
// ── EXTRACT OWNERS FROM TAB ──
|
|
function extractOwnersFromTab(snapshot) {
|
|
if (!snapshot) return [];
|
|
const owners = [];
|
|
const lines = snapshot.split('\n');
|
|
let currentOwner = null;
|
|
|
|
for (const line of lines) {
|
|
const ownerMatch = line.match(/link "([^"]+)" \[ref=e\d+\]/);
|
|
if (ownerMatch && !ownerMatch[1].includes('Call') && !ownerMatch[1].includes('Send')
|
|
&& !ownerMatch[1].includes('Sign') && !ownerMatch[1].includes('Advanced')
|
|
&& !ownerMatch[1].includes('http') && !ownerMatch[1].includes("Don't")
|
|
&& !ownerMatch[1].includes('Google') && !ownerMatch[1].includes('Terms')
|
|
&& !ownerMatch[1].includes('Report')) {
|
|
const name = ownerMatch[1];
|
|
if (name.length > 2 && name.length < 80 && /[A-Z]/.test(name)) {
|
|
if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) {
|
|
owners.push(currentOwner);
|
|
}
|
|
const cleanName = name.replace(/\s+(President|CEO|Manager|Member|Director|Officer|Secretary|Treasurer|VP|Vice President|Partner|Owner|Agent|Trustee|Chairman|Principal)$/i, '').trim();
|
|
currentOwner = { name: cleanName, phones: [], emails: [] };
|
|
}
|
|
}
|
|
|
|
const phoneMatch = line.match(/button "(?:1-)?(\d{3}-\d{3}-\d{4})(?:\s+(\w+))?" \[ref=e\d+\]/);
|
|
if (phoneMatch && currentOwner) {
|
|
const phone = line.includes('1-') ? `1-${phoneMatch[1]}` : phoneMatch[1];
|
|
currentOwner.phones.push({ number: phone, type: phoneMatch[2] || 'Unknown' });
|
|
}
|
|
|
|
const longPhoneMatch = line.match(/button "(\d{10,14})" \[ref=e\d+\]/);
|
|
if (longPhoneMatch && currentOwner) {
|
|
currentOwner.phones.push({ number: longPhoneMatch[1], type: 'Unknown' });
|
|
}
|
|
|
|
const emailMatch = line.match(/button "([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})" \[ref=e\d+\]/);
|
|
if (emailMatch && currentOwner) {
|
|
currentOwner.emails.push(emailMatch[1]);
|
|
}
|
|
}
|
|
|
|
if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) {
|
|
owners.push(currentOwner);
|
|
}
|
|
|
|
// Dedupe
|
|
const deduped = [];
|
|
const seen = new Map();
|
|
for (const o of owners) {
|
|
const key = o.name.toLowerCase();
|
|
if (seen.has(key)) {
|
|
const ex = seen.get(key);
|
|
for (const p of o.phones) { if (!ex.phones.find(ep => ep.number === p.number)) ex.phones.push(p); }
|
|
for (const e of o.emails) { if (!ex.emails.includes(e)) ex.emails.push(e); }
|
|
} else {
|
|
seen.set(key, { ...o });
|
|
deduped.push(seen.get(key));
|
|
}
|
|
}
|
|
return deduped;
|
|
}
|
|
|
|
// ── SCRAPE PROPERTIES ──
|
|
async function scrapeProperties(searchId) {
|
|
log('\n== Scraping properties ==');
|
|
const dailyStats = getDailyStats();
|
|
const remainingToday = CONFIG.maxDailyProperties - dailyStats.propertiesScraped;
|
|
const maxThisRun = Math.min(CONFIG.maxProperties, remainingToday);
|
|
log(` Daily: ${dailyStats.propertiesScraped} done, ${remainingToday} remaining, this run: max ${maxThisRun}`);
|
|
|
|
if (maxThisRun <= 0) {
|
|
log(' Daily limit reached!');
|
|
return [];
|
|
}
|
|
|
|
const leads = [];
|
|
const processed = new Set(); // Track processed addresses
|
|
|
|
// Dismiss banners
|
|
for (let i = 0; i < 5; i++) {
|
|
const s = ab('snapshot -i', { verbose: false });
|
|
const lg = s.output?.match(/button "Let's Go" \[ref=(e\d+)\]/);
|
|
if (!lg) break;
|
|
ab(`click @${lg[1]}`, { verbose: false });
|
|
await sleep(800);
|
|
}
|
|
|
|
const parseAddresses = (snapOutput) => {
|
|
const props = [];
|
|
const matches = snapOutput?.matchAll(/heading "([^"]+)" \[ref=(e\d+)\] \[level=6\]/g) || [];
|
|
for (const m of matches) {
|
|
const text = m[1];
|
|
if (text.includes('properties') || text.includes('Recently') ||
|
|
text.includes('Get the most') || text.includes('What would') ||
|
|
text.length < 10) continue;
|
|
if (/\d+.*,\s*[A-Z]{2}\s*\d{5}/i.test(text) ||
|
|
/\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway|pl|cir|route|tpke|pkwy|pike|hwy|terr?|loop|pass|trail|sq|park|grove|run|plz)/i.test(text)) {
|
|
props.push({ name: text.substring(0, 80), ref: m[2] });
|
|
}
|
|
}
|
|
return props;
|
|
};
|
|
|
|
// Process properties one at a time: always use CURRENT page state
|
|
let scraped = 0;
|
|
let scrollCount = 0;
|
|
const maxScrolls = 10;
|
|
|
|
while (scraped < maxThisRun && scrollCount <= maxScrolls) {
|
|
// Take fresh snapshot of current search results
|
|
let snap = ab('snapshot');
|
|
const visible = parseAddresses(snap.output);
|
|
|
|
// Find next unprocessed property
|
|
const next = visible.find(p => !processed.has(p.name));
|
|
|
|
if (!next) {
|
|
// All visible properties processed, scroll for more
|
|
scrollCount++;
|
|
log(` All visible processed, scrolling... (${scrollCount}/${maxScrolls})`);
|
|
ab('scroll down 600', { verbose: false });
|
|
await sleep(2000);
|
|
continue;
|
|
}
|
|
|
|
processed.add(next.name);
|
|
scraped++;
|
|
log(`\n [${scraped}/${maxThisRun}] ${next.name.substring(0, 60)}`);
|
|
|
|
try {
|
|
// Click the property (ref is fresh from current snapshot)
|
|
let clickResult = ab(`click @${next.ref}`);
|
|
if (!clickResult.success) {
|
|
log(' Click failed, skipping');
|
|
dailyStats.propertiesScraped++;
|
|
continue;
|
|
}
|
|
await randomDelay(5000, 8000);
|
|
|
|
// Get property URL and ID
|
|
const propUrl = ab('eval "window.location.href"');
|
|
const propertyId = propUrl.output?.match(/property\/([a-f0-9-]+)/)?.[1] || 'unknown';
|
|
|
|
// Verify we're on a property page (not still on search)
|
|
if (!propUrl.output?.includes('/property/')) {
|
|
log(' Did not navigate to property page, skipping');
|
|
dailyStats.propertiesScraped++;
|
|
continue;
|
|
}
|
|
|
|
// Get property address from page
|
|
snap = ab('snapshot');
|
|
const headings = [...(snap.output?.matchAll(/heading "([^"]+)" \[ref=e\d+\]/g) || [])];
|
|
let propertyAddress = next.name;
|
|
for (const h of headings) {
|
|
if (h[1].length > 5 && h[1] !== 'Owners' && h[1] !== 'Owner' && /\d/.test(h[1]) && /[a-zA-Z]/.test(h[1])) {
|
|
propertyAddress = h[1];
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Extract property info
|
|
const iSnap = ab('snapshot -i');
|
|
const buildText = iSnap.output || '';
|
|
const propertyInfo = {};
|
|
|
|
const sfMatch = buildText.match(/(\d[\d,]*(?:\.\d+)?)\s*(?:SF|Sq\.?\s*Ft)/i) || next.name.match(/([\d.]+k?)\s*SF/i);
|
|
if (sfMatch) propertyInfo.squareFootage = sfMatch[1].replace(/,/g, '');
|
|
|
|
const typeMatch = buildText.match(/(?:Property Type|Type)[:\s]*([A-Za-z\s()]+?)(?:\n|$)/i);
|
|
if (typeMatch) propertyInfo.propertyType = typeMatch[1]?.trim();
|
|
if (!propertyInfo.propertyType) {
|
|
const typeFromName = next.name.match(/(?:Industrial|Warehouse|Manufacturing|Distribution|Flex|Storage|Factory)[^"]*/i);
|
|
if (typeFromName) propertyInfo.propertyType = typeFromName[0].trim();
|
|
}
|
|
|
|
const yearMatch = buildText.match(/(?:Year Built|Built)[:\s]*(\d{4})/i);
|
|
if (yearMatch) propertyInfo.yearBuilt = yearMatch[1];
|
|
|
|
const lotMatch = buildText.match(/([\d.]+)\s*Acre/i);
|
|
if (lotMatch) propertyInfo.lotSize = lotMatch[1] + ' Acres';
|
|
|
|
const unitsMatch = buildText.match(/(\d+)\s*Unit/i) || next.name.match(/(\d+)\s*Unit/i);
|
|
if (unitsMatch) propertyInfo.units = unitsMatch[1];
|
|
|
|
const addrParts = propertyAddress.match(/,\s*([^,]+),\s*([A-Z]{2})\s*(\d{5})/i);
|
|
if (addrParts) {
|
|
propertyInfo.city = addrParts[1].trim();
|
|
propertyInfo.state = addrParts[2];
|
|
propertyInfo.zip = addrParts[3];
|
|
}
|
|
|
|
log(` ${propertyAddress} | ${JSON.stringify(propertyInfo)}`);
|
|
|
|
// Click Owner tab
|
|
log(' Opening Owner tab...');
|
|
ab('find role tab click --name "Owner"');
|
|
await randomDelay(4000, 6000);
|
|
|
|
// Extract contacts
|
|
const ownerSnap = ab('snapshot -i');
|
|
const owners = extractOwnersFromTab(ownerSnap.output || '');
|
|
|
|
if (owners.length === 0) {
|
|
log(' No contacts found');
|
|
} else {
|
|
const totalPhones = owners.reduce((s, o) => s + o.phones.length, 0);
|
|
const totalEmails = owners.reduce((s, o) => s + o.emails.length, 0);
|
|
log(` ${owners.length} owners, ${totalPhones} phones, ${totalEmails} emails`);
|
|
|
|
leads.push({
|
|
scrapeDate: new Date().toISOString(),
|
|
propertyId,
|
|
propertyAddress,
|
|
...propertyInfo,
|
|
owners: owners.map(o => ({ name: o.name, phones: o.phones, emails: o.emails }))
|
|
});
|
|
dailyStats.leadsFound++;
|
|
log(' Lead captured!');
|
|
}
|
|
|
|
dailyStats.propertiesScraped++;
|
|
saveDailyStats(dailyStats);
|
|
|
|
// Navigate back to search
|
|
ab(`open "https://app.reonomy.com/#!/search/${searchId}"`);
|
|
await randomDelay(5000, 8000);
|
|
|
|
// Random longer break
|
|
if (Math.random() < 0.15) {
|
|
log(' Taking a short break...');
|
|
await randomDelay(8000, 15000);
|
|
}
|
|
|
|
} catch (propError) {
|
|
log(` Error: ${propError.message}`);
|
|
ab(`open "https://app.reonomy.com/#!/search/${searchId}"`);
|
|
await sleep(6000);
|
|
dailyStats.propertiesScraped++;
|
|
saveDailyStats(dailyStats);
|
|
}
|
|
}
|
|
|
|
return leads;
|
|
}
|
|
|
|
// ── MAIN ──
|
|
async function main() {
|
|
// Clear log
|
|
fs.writeFileSync(CONFIG.logPath, '');
|
|
log('=== Reonomy Scraper v14 ===');
|
|
log(`Filters: state=${CONFIG.searchState}, types=${CONFIG.propertyTypes}, minSF=${CONFIG.minSF}, sale=${CONFIG.saleFilter}`);
|
|
log(`Owner filters: phone=${CONFIG.ownerPhone}, email=${CONFIG.ownerEmail}`);
|
|
log(`Max properties: ${CONFIG.maxProperties}`);
|
|
|
|
try {
|
|
// Login
|
|
log('\n== Step 1: Login ==');
|
|
await login();
|
|
|
|
// Build search
|
|
const searchId = await buildSearch();
|
|
|
|
// Scrape
|
|
const leads = await scrapeProperties(searchId);
|
|
|
|
// Save results
|
|
log('\n== Saving results ==');
|
|
let allLeads = [];
|
|
try {
|
|
const existing = JSON.parse(fs.readFileSync(CONFIG.outputPath, 'utf8'));
|
|
allLeads = existing.leads || [];
|
|
} catch (e) {}
|
|
|
|
allLeads = [...allLeads, ...leads];
|
|
|
|
const output = {
|
|
lastUpdated: new Date().toISOString(),
|
|
searchId,
|
|
filters: {
|
|
state: CONFIG.searchState,
|
|
propertyTypes: CONFIG.propertyTypes,
|
|
minSF: CONFIG.minSF,
|
|
saleFilter: CONFIG.saleFilter,
|
|
ownerPhone: CONFIG.ownerPhone,
|
|
ownerEmail: CONFIG.ownerEmail,
|
|
},
|
|
totalLeads: allLeads.length,
|
|
leads: allLeads
|
|
};
|
|
|
|
fs.writeFileSync(CONFIG.outputPath, JSON.stringify(output, null, 2));
|
|
log(`Saved ${leads.length} new leads (${allLeads.length} total)`);
|
|
|
|
return leads;
|
|
|
|
} catch (error) {
|
|
log(`\nFATAL: ${error.message}`);
|
|
ab('screenshot /tmp/reonomy-v14-error.png');
|
|
throw error;
|
|
} finally {
|
|
log('\nClosing browser...');
|
|
ab('close');
|
|
}
|
|
}
|
|
|
|
main()
|
|
.then(leads => {
|
|
log(`\nDone! ${leads.length} leads scraped.`);
|
|
process.exit(0);
|
|
})
|
|
.catch(err => {
|
|
log(`\nFailed: ${err.message}`);
|
|
process.exit(1);
|
|
});
|