clawdbot-workspace/reonomy-scraper-v13.js

581 lines
21 KiB
JavaScript
Executable File

#!/usr/bin/env node
/**
* Reonomy Scraper v13.1 - Agent-Browser Edition (Anti-Detection)
*
* PATCHED 2026-02-03: Reonomy now shows contacts directly on Owner tab
* (no more View Contacts → person page → modal flow)
*
* ANTI-DETECTION FEATURES:
* - Random delays (human-like timing)
* - Random property order
* - Occasional "distraction" actions
* - Session limits (max per run)
* - Daily tracking to avoid over-scraping
*/
const { execSync } = require('child_process');
const fs = require('fs');
const path = require('path');
// Config
const CONFIG = {
authStatePath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-auth.json'),
outputPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-leads-v13.json'),
logPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-scraper-v13.log'),
dailyLogPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-daily-stats.json'),
searchId: process.env.REONOMY_SEARCH_ID || '504a2d13-d88f-4213-9ac6-a7c8bc7c20c6',
maxProperties: parseInt(process.env.MAX_PROPERTIES) || 20,
maxDailyProperties: 50, // Don't exceed this per day
headless: process.env.HEADLESS !== 'false',
email: process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com',
password: process.env.REONOMY_PASSWORD || '9082166532',
};
// Anti-detection: Random delay between min and max ms
function randomDelay(minMs, maxMs) {
const delay = Math.floor(Math.random() * (maxMs - minMs + 1)) + minMs;
return new Promise(resolve => setTimeout(resolve, delay));
}
// Anti-detection: Shuffle array (Fisher-Yates)
function shuffle(array) {
const arr = [...array];
for (let i = arr.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[arr[i], arr[j]] = [arr[j], arr[i]];
}
return arr;
}
// Logging
function log(msg) {
const timestamp = new Date().toISOString();
const line = `[${timestamp}] ${msg}`;
console.log(line);
fs.appendFileSync(CONFIG.logPath, line + '\n');
}
// Run agent-browser command
function ab(cmd, options = {}) {
const fullCmd = `agent-browser ${cmd}`;
if (options.verbose !== false) {
log(` 🔧 ${fullCmd}`);
}
try {
const result = execSync(fullCmd, {
encoding: 'utf8',
timeout: options.timeout || 30000,
stdio: ['pipe', 'pipe', 'pipe']
});
return { success: true, output: result.trim() };
} catch (err) {
const stderr = err.stderr?.toString() || err.message;
if (options.verbose !== false) {
log(` ❌ Error: ${stderr.substring(0, 100)}`);
}
return { success: false, error: stderr };
}
}
// Anti-detection: Random "human" actions
async function humanize() {
const actions = [
() => ab('scroll down 200', { verbose: false }),
() => ab('scroll up 100', { verbose: false }),
() => randomDelay(500, 1500),
() => randomDelay(1000, 2000),
];
// 30% chance to do a random action
if (Math.random() < 0.3) {
const action = actions[Math.floor(Math.random() * actions.length)];
await action();
}
}
// Daily stats tracking
function getDailyStats() {
const today = new Date().toISOString().split('T')[0];
try {
const data = JSON.parse(fs.readFileSync(CONFIG.dailyLogPath, 'utf8'));
if (data.date === today) {
return data;
}
} catch (e) {}
return { date: today, propertiesScraped: 0, leadsFound: 0 };
}
function saveDailyStats(stats) {
fs.writeFileSync(CONFIG.dailyLogPath, JSON.stringify(stats, null, 2));
}
// Login to Reonomy
async function login() {
log(' Navigating to login page...');
ab('open "https://app.reonomy.com/#!/login"');
await randomDelay(3000, 5000);
const snapshot = ab('snapshot -i');
if (!snapshot.output?.includes('textbox "Email"')) {
const urlCheck = ab('eval "window.location.href"');
if (urlCheck.output?.includes('app.reonomy.com') && !urlCheck.output?.includes('login')) {
log(' Already logged in!');
return true;
}
throw new Error('Login form not found');
}
const emailMatch = snapshot.output.match(/textbox "Email" \[ref=(e\d+)\]/);
const passMatch = snapshot.output.match(/textbox "Password" \[ref=(e\d+)\]/);
const loginMatch = snapshot.output.match(/button "Log In" \[ref=(e\d+)\]/);
if (!emailMatch || !passMatch || !loginMatch) {
throw new Error('Could not find login form elements');
}
log(' Filling credentials...');
ab(`fill @${emailMatch[1]} "${CONFIG.email}"`);
await randomDelay(800, 1500);
ab(`fill @${passMatch[1]} "${CONFIG.password}"`);
await randomDelay(800, 1500);
log(' Clicking login...');
ab(`click @${loginMatch[1]}`);
await randomDelay(12000, 16000); // Human-like wait for login
const postLoginUrl = ab('eval "window.location.href"');
if (postLoginUrl.output?.includes('auth.reonomy.com') || postLoginUrl.output?.includes('login')) {
throw new Error('Login failed - still on login page');
}
log(' Saving auth state...');
ab(`state save "${CONFIG.authStatePath}"`);
log(' ✅ Login successful!');
return true;
}
// Extract owners + contacts directly from Owner tab snapshot (new Reonomy UI)
function extractOwnersFromTab(snapshot) {
if (!snapshot) return [];
const owners = [];
const lines = snapshot.split('\n');
let currentOwner = null;
for (const line of lines) {
// New owner starts with a link (person name) or "Show Portfolio Preview"
const ownerMatch = line.match(/link "([^"]+)" \[ref=e\d+\]/);
if (ownerMatch && !ownerMatch[1].includes('Call') && !ownerMatch[1].includes('Send')
&& !ownerMatch[1].includes('Sign') && !ownerMatch[1].includes('Advanced')
&& !ownerMatch[1].includes('http') && !ownerMatch[1].includes('Don\'t')) {
// Check if it looks like a person name (not a nav link)
const name = ownerMatch[1];
if (name.length > 2 && name.length < 80 && /[A-Z]/.test(name)) {
// Save previous owner
if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) {
owners.push(currentOwner);
}
// Strip titles from owner name
const cleanName = name.replace(/\s+(President|CEO|Manager|Member|Director|Officer|Secretary|Treasurer|VP|Vice President|Partner|Owner|Agent|Trustee|Chairman|Principal)$/i, '').trim();
currentOwner = { name: cleanName, phones: [], emails: [] };
}
}
// Phone: button "1-330-966-4686" or "718-652-3191 Residential" or "201-741-9321 Mobile"
const phoneMatch = line.match(/button "(?:1-)?(\d{3}-\d{3}-\d{4})(?:\s+(\w+))?" \[ref=e\d+\]/);
if (phoneMatch && currentOwner) {
const phone = phoneMatch[0].includes('1-') ? `1-${phoneMatch[1]}` : phoneMatch[1];
currentOwner.phones.push({
number: phone,
type: phoneMatch[2] || 'Unknown'
});
}
// Also match longer number strings like "12076267058202"
const longPhoneMatch = line.match(/button "(\d{10,14})" \[ref=e\d+\]/);
if (longPhoneMatch && currentOwner) {
currentOwner.phones.push({
number: longPhoneMatch[1],
type: 'Unknown'
});
}
// Email: button "arsenal8424@gmail.com"
const emailMatch = line.match(/button "([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})" \[ref=e\d+\]/);
if (emailMatch && currentOwner) {
currentOwner.emails.push(emailMatch[1]);
}
}
// Don't forget the last owner
if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) {
owners.push(currentOwner);
}
// Deduplicate owners by name (merge contacts if same person appears twice)
const deduped = [];
const seen = new Map();
for (const owner of owners) {
const key = owner.name.toLowerCase();
if (seen.has(key)) {
const existing = seen.get(key);
// Merge phones/emails
for (const p of owner.phones) {
if (!existing.phones.find(ep => ep.number === p.number)) existing.phones.push(p);
}
for (const e of owner.emails) {
if (!existing.emails.includes(e)) existing.emails.push(e);
}
} else {
const entry = { ...owner };
seen.set(key, entry);
deduped.push(entry);
}
}
return deduped;
}
// Main scraping function
async function scrape() {
log('🚀 Starting Reonomy Scraper v13 (ANTI-DETECTION MODE)');
// Check daily limits
const dailyStats = getDailyStats();
if (dailyStats.propertiesScraped >= CONFIG.maxDailyProperties) {
log(`⚠️ Daily limit reached (${dailyStats.propertiesScraped}/${CONFIG.maxDailyProperties}). Try again tomorrow.`);
return [];
}
const remainingToday = CONFIG.maxDailyProperties - dailyStats.propertiesScraped;
const maxThisRun = Math.min(CONFIG.maxProperties, remainingToday);
log(`📊 Daily stats: ${dailyStats.propertiesScraped} scraped today, ${remainingToday} remaining`);
log(`📊 This run: max ${maxThisRun} properties`);
const leads = [];
try {
// Step 1: Auth
log('\n📍 Step 1: Authenticating...');
let needsLogin = true;
if (fs.existsSync(CONFIG.authStatePath)) {
log(' Found existing auth state, testing...');
ab(`state load "${CONFIG.authStatePath}"`);
ab('open "https://app.reonomy.com/#!/home"');
await randomDelay(4000, 6000);
const testUrl = ab('eval "window.location.href"');
if (testUrl.output?.includes('app.reonomy.com') &&
!testUrl.output?.includes('auth.reonomy.com') &&
!testUrl.output?.includes('login')) {
log(' ✅ Session still valid!');
needsLogin = false;
} else {
log(' ⚠️ Session expired...');
}
}
if (needsLogin) {
await login();
}
// Step 2: Navigate to search
log('\n📍 Step 2: Navigating to search results...');
const searchUrl = `https://app.reonomy.com/#!/search/${CONFIG.searchId}`;
ab(`open "${searchUrl}"`);
await randomDelay(6000, 10000);
let urlCheck = ab('eval "window.location.href"');
if (urlCheck.output?.includes('auth.reonomy.com') || urlCheck.output?.includes('login')) {
log(' Session invalid, logging in...');
await login();
ab(`open "${searchUrl}"`);
await randomDelay(6000, 10000);
}
// Step 3: Get property list
log('\n📍 Step 3: Getting property list...');
await humanize();
const iSnapshot = ab('snapshot -i');
const properties = [];
// Find properties — can be buttons OR headings depending on search view
const allMatches = iSnapshot.output?.matchAll(/(?:button|heading) "([^"]+)" \[ref=(e\d+)\]/g) || [];
for (const match of allMatches) {
const text = match[1];
if (text.includes('Saved Searches') || text.includes('Help Center') ||
text.includes('More filters') || text.includes('View More') ||
text.includes('Let\'s Go') || text.includes('Learn about') ||
text.includes('properties') || text.length < 10) {
continue;
}
if (/\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway|pl|cir)/i.test(text)) {
properties.push({
name: text.substring(0, 60),
ref: match[2]
});
}
}
// Click "View More" to load additional properties if available
const viewMoreMatch = iSnapshot.output?.match(/button "View More" \[ref=(e\d+)\]/);
if (viewMoreMatch && properties.length < CONFIG.maxProperties) {
log(` Found ${properties.length} properties, clicking View More...`);
ab(`click @${viewMoreMatch[1]}`);
await randomDelay(3000, 5000);
const moreSnap = ab('snapshot -i');
const moreMatches = moreSnap.output?.matchAll(/(?:button|heading) "([^"]+)" \[ref=(e\d+)\]/g) || [];
for (const match of moreMatches) {
if (match[1].includes('Saved Searches') || match[1].includes('Help Center') ||
match[1].includes('More filters') || match[1].includes('View More') ||
match[1].includes('Let\'s Go') || match[1].includes('Learn about') ||
match[1].includes('properties') || match[1].length < 10) continue;
if (/\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway|pl|cir)/i.test(match[1])) {
const existing = properties.find(p => p.name === match[1].substring(0, 60));
if (!existing) {
properties.push({ name: match[1].substring(0, 60), ref: match[2] });
}
}
}
}
log(` Found ${properties.length} total properties`);
if (properties.length === 0) {
ab('screenshot /tmp/reonomy-v13-no-properties.png');
throw new Error('No properties found');
}
// Anti-detection: Shuffle and limit
const shuffledProps = shuffle(properties).slice(0, maxThisRun);
log(` Processing ${shuffledProps.length} properties (randomized order)`);
// Dismiss ALL notification banners (Reonomy shows multiple "Learn about..." popups)
for (let attempt = 0; attempt < 5; attempt++) {
const bannerSnap = ab('snapshot -i', { verbose: false });
const letsGoMatch = bannerSnap.output?.match(/button "Let's Go" \[ref=(e\d+)\]/);
if (!letsGoMatch) break;
log(` Dismissing notification banner (${attempt + 1})...`);
ab(`click @${letsGoMatch[1]}`);
await randomDelay(800, 1500);
}
// Step 4: Process properties
log('\n📍 Step 4: Processing properties...');
for (let i = 0; i < shuffledProps.length; i++) {
const prop = shuffledProps[i];
log(`\n --- Property ${i + 1}/${shuffledProps.length}: ${prop.name.substring(0, 50)} ---`);
await humanize();
try {
// If we used "Next property" button, we're already on the page
if (!prop._useCurrentPage) {
// Click property (retry with fresh snapshot if blocked)
let clickResult = ab(`click @${prop.ref}`);
if (!clickResult.success) {
log(' Retrying click with fresh snapshot...');
await randomDelay(1000, 2000);
const freshSnap = ab('snapshot -i');
// Find this property in fresh snapshot by address pattern
const escaped = prop.name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').substring(0, 30);
const freshMatch = freshSnap.output?.match(new RegExp(`(?:button|heading) "${escaped}[^"]*" \\[ref=(e\\d+)\\]`));
if (freshMatch) {
ab(`click @${freshMatch[1]}`);
} else {
log(' ❌ Property not found in fresh snapshot, skipping');
dailyStats.propertiesScraped++;
continue;
}
}
await randomDelay(5000, 8000);
}
const propUrl = ab('eval "window.location.href"');
const propIdMatch = propUrl.output?.match(/property\/([a-f0-9-]+)/);
const propertyId = propIdMatch ? propIdMatch[1] : 'unknown';
let propertyAddress = prop.name.replace(/\s+\d+(\.\d+)?k?\s+SF\s+.*$/, '').replace(/\s+\d+\s+Unit\s+.*$/, '').replace(/\s+\d+(\.\d+)?\s+Acre\s+.*$/, '').trim();
const titleSnap = ab('snapshot');
const headingMatch = titleSnap.output?.match(/heading "([^"]+)"/);
if (headingMatch && headingMatch[1] !== 'Owners' && headingMatch[1] !== 'Owner' && headingMatch[1].length > 5) {
propertyAddress = headingMatch[1];
}
// Extract property info from Building & Lot tab (default tab)
log(' Extracting property details...');
const buildingSnap = ab('snapshot -i');
const buildText = buildingSnap.output || '';
// Parse property details from the page
const propertyInfo = {};
const sfMatch = buildText.match(/(\d[\d,]*(?:\.\d+)?)\s*(?:SF|Sq\.?\s*Ft)/i) || prop.name.match(/([\d.]+k?)\s*SF/i);
if (sfMatch) propertyInfo.squareFootage = sfMatch[1].replace(/,/g, '');
const typeMatch = buildText.match(/(?:Property Type|Type)[:\s]*([A-Za-z\s()]+?)(?:\n|$)/i) || prop.name.match(/(?:Multi Family|Retail|Office|Industrial|Garage|Warehouse|Apartment|Mixed Use|Hotel|Restaurant)[^"]*/i);
if (typeMatch) propertyInfo.propertyType = typeMatch[1]?.trim() || typeMatch[0]?.trim();
const yearMatch = buildText.match(/(?:Year Built|Built)[:\s]*(\d{4})/i);
if (yearMatch) propertyInfo.yearBuilt = yearMatch[1];
const lotMatch = buildText.match(/([\d.]+)\s*Acre/i);
if (lotMatch) propertyInfo.lotSize = lotMatch[1] + ' Acres';
const unitsMatch = buildText.match(/(\d+)\s*Unit/i) || prop.name.match(/(\d+)\s*Unit/i);
if (unitsMatch) propertyInfo.units = unitsMatch[1];
// Try to get city/state/zip from address
const addrParts = propertyAddress.match(/,\s*([^,]+),\s*([A-Z]{2})\s*(\d{5})/i);
if (addrParts) {
propertyInfo.city = addrParts[1].trim();
propertyInfo.state = addrParts[2];
propertyInfo.zip = addrParts[3];
}
log(` Property: ${propertyAddress} | ${JSON.stringify(propertyInfo)}`);
// Click Owner tab
log(' Clicking Owner tab...');
await humanize();
ab('find role tab click --name "Owner"');
await randomDelay(4000, 6000);
// NEW: Extract contacts directly from Owner tab (no more View Contacts flow)
const ownerSnap = ab('snapshot -i');
const snapText = ownerSnap.output || '';
// Debug: log first few lines of Owner tab snapshot
if (i < 3) {
const snapLines = snapText.split('\n').slice(0, 15).join('\n');
log(` [DEBUG] Owner tab snapshot (first 15 lines):\n${snapLines}`);
}
const owners = extractOwnersFromTab(snapText);
if (owners.length === 0) {
log(' ⚠️ No contacts found on Owner tab');
// Take debug screenshot for first few failures
if (dailyStats.propertiesScraped < 3) {
ab(`screenshot /tmp/reonomy-v13-owner-debug-${i}.png`);
}
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
await randomDelay(6000, 10000);
dailyStats.propertiesScraped++;
continue;
}
const totalPhones = owners.reduce((sum, o) => sum + o.phones.length, 0);
const totalEmails = owners.reduce((sum, o) => sum + o.emails.length, 0);
log(` 👥 ${owners.length} owners, 📞 ${totalPhones} phones, 📧 ${totalEmails} emails`);
for (const owner of owners) {
log(`${owner.name}: ${owner.phones.length}P ${owner.emails.length}E`);
}
leads.push({
scrapeDate: new Date().toISOString(),
propertyId,
propertyAddress,
...propertyInfo,
owners: owners.map(o => ({
name: o.name,
phones: o.phones,
emails: o.emails
}))
});
dailyStats.leadsFound++;
log(' ✅ Lead captured!');
dailyStats.propertiesScraped++;
// Use "Next property" button if available (more reliable than navigating back)
const nextSnap = ab('snapshot -i', { verbose: false });
const nextMatch = nextSnap.output?.match(/button "Next property" \[ref=(e\d+)\]/);
if (nextMatch && i < shuffledProps.length - 1) {
log(' ➡️ Clicking Next property...');
ab(`click @${nextMatch[1]}`);
await randomDelay(5000, 8000);
// Skip the normal click flow for next iteration
shuffledProps[i + 1]._useCurrentPage = true;
} else {
// Return to search
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
await randomDelay(6000, 10000);
const postSnap2 = ab('snapshot -i', { verbose: false });
const postBanner = postSnap2.output?.match(/button "Let's Go" \[ref=(e\d+)\]/);
if (postBanner) {
ab(`click @${postBanner[1]}`, { verbose: false });
await randomDelay(500, 1000);
}
}
// Occasional longer break (anti-detection)
if (Math.random() < 0.2) {
log(' ☕ Taking a short break...');
await randomDelay(8000, 15000);
}
} catch (propError) {
log(` ❌ Error: ${propError.message}`);
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
await randomDelay(5000, 8000);
dailyStats.propertiesScraped++;
}
// Save progress
saveDailyStats(dailyStats);
}
// Step 5: Save results
log('\n📍 Step 5: Saving results...');
// Append to existing leads if file exists
let allLeads = [];
try {
const existing = JSON.parse(fs.readFileSync(CONFIG.outputPath, 'utf8'));
allLeads = existing.leads || [];
} catch (e) {}
allLeads = [...allLeads, ...leads];
const output = {
lastUpdated: new Date().toISOString(),
searchId: CONFIG.searchId,
totalLeads: allLeads.length,
leads: allLeads
};
fs.writeFileSync(CONFIG.outputPath, JSON.stringify(output, null, 2));
log(`✅ Saved ${leads.length} new leads (${allLeads.length} total)`);
saveDailyStats(dailyStats);
log(`📊 Daily total: ${dailyStats.propertiesScraped} properties, ${dailyStats.leadsFound} leads`);
} catch (error) {
log(`\n❌ Fatal error: ${error.message}`);
ab('screenshot /tmp/reonomy-v13-error.png');
throw error;
} finally {
log('\n🧹 Closing browser...');
ab('close');
}
return leads;
}
// Run
scrape()
.then(leads => {
log(`\n🎉 Done! Scraped ${leads.length} leads this run.`);
process.exit(0);
})
.catch(err => {
log(`\n💥 Scraper failed: ${err.message}`);
process.exit(1);
});