clawdbot-workspace/reonomy-scraper-v13.js

443 lines
14 KiB
JavaScript
Executable File

#!/usr/bin/env node
/**
* Reonomy Scraper v13 - Agent-Browser Edition (Anti-Detection)
*
* ANTI-DETECTION FEATURES:
* - Random delays (human-like timing)
* - Random property order
* - Occasional "distraction" actions
* - Session limits (max per run)
* - Daily tracking to avoid over-scraping
*/
const { execSync } = require('child_process');
const fs = require('fs');
const path = require('path');
// Config
const CONFIG = {
authStatePath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-auth.json'),
outputPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-leads-v13.json'),
logPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-scraper-v13.log'),
dailyLogPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-daily-stats.json'),
searchId: process.env.REONOMY_SEARCH_ID || 'bacfd104-fed5-4cc4-aba1-933f899de3f8',
maxProperties: parseInt(process.env.MAX_PROPERTIES) || 20,
maxDailyProperties: 50, // Don't exceed this per day
headless: process.env.HEADLESS !== 'false',
email: process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com',
password: process.env.REONOMY_PASSWORD || '9082166532',
};
// Anti-detection: Random delay between min and max ms
function randomDelay(minMs, maxMs) {
const delay = Math.floor(Math.random() * (maxMs - minMs + 1)) + minMs;
return new Promise(resolve => setTimeout(resolve, delay));
}
// Anti-detection: Shuffle array (Fisher-Yates)
function shuffle(array) {
const arr = [...array];
for (let i = arr.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[arr[i], arr[j]] = [arr[j], arr[i]];
}
return arr;
}
// Logging
function log(msg) {
const timestamp = new Date().toISOString();
const line = `[${timestamp}] ${msg}`;
console.log(line);
fs.appendFileSync(CONFIG.logPath, line + '\n');
}
// Run agent-browser command
function ab(cmd, options = {}) {
const fullCmd = `agent-browser ${cmd}`;
if (options.verbose !== false) {
log(` 🔧 ${fullCmd}`);
}
try {
const result = execSync(fullCmd, {
encoding: 'utf8',
timeout: options.timeout || 30000,
stdio: ['pipe', 'pipe', 'pipe']
});
return { success: true, output: result.trim() };
} catch (err) {
const stderr = err.stderr?.toString() || err.message;
if (options.verbose !== false) {
log(` ❌ Error: ${stderr.substring(0, 100)}`);
}
return { success: false, error: stderr };
}
}
// Anti-detection: Random "human" actions
async function humanize() {
const actions = [
() => ab('scroll down 200', { verbose: false }),
() => ab('scroll up 100', { verbose: false }),
() => randomDelay(500, 1500),
() => randomDelay(1000, 2000),
];
// 30% chance to do a random action
if (Math.random() < 0.3) {
const action = actions[Math.floor(Math.random() * actions.length)];
await action();
}
}
// Daily stats tracking
function getDailyStats() {
const today = new Date().toISOString().split('T')[0];
try {
const data = JSON.parse(fs.readFileSync(CONFIG.dailyLogPath, 'utf8'));
if (data.date === today) {
return data;
}
} catch (e) {}
return { date: today, propertiesScraped: 0, leadsFound: 0 };
}
function saveDailyStats(stats) {
fs.writeFileSync(CONFIG.dailyLogPath, JSON.stringify(stats, null, 2));
}
// Login to Reonomy
async function login() {
log(' Navigating to login page...');
ab('open "https://app.reonomy.com/#!/login"');
await randomDelay(3000, 5000);
const snapshot = ab('snapshot -i');
if (!snapshot.output?.includes('textbox "Email"')) {
const urlCheck = ab('eval "window.location.href"');
if (urlCheck.output?.includes('app.reonomy.com') && !urlCheck.output?.includes('login')) {
log(' Already logged in!');
return true;
}
throw new Error('Login form not found');
}
const emailMatch = snapshot.output.match(/textbox "Email" \[ref=(e\d+)\]/);
const passMatch = snapshot.output.match(/textbox "Password" \[ref=(e\d+)\]/);
const loginMatch = snapshot.output.match(/button "Log In" \[ref=(e\d+)\]/);
if (!emailMatch || !passMatch || !loginMatch) {
throw new Error('Could not find login form elements');
}
log(' Filling credentials...');
ab(`fill @${emailMatch[1]} "${CONFIG.email}"`);
await randomDelay(800, 1500);
ab(`fill @${passMatch[1]} "${CONFIG.password}"`);
await randomDelay(800, 1500);
log(' Clicking login...');
ab(`click @${loginMatch[1]}`);
await randomDelay(12000, 16000); // Human-like wait for login
const postLoginUrl = ab('eval "window.location.href"');
if (postLoginUrl.output?.includes('auth.reonomy.com') || postLoginUrl.output?.includes('login')) {
throw new Error('Login failed - still on login page');
}
log(' Saving auth state...');
ab(`state save "${CONFIG.authStatePath}"`);
log(' ✅ Login successful!');
return true;
}
// Extract contacts from modal snapshot
function extractContacts(snapshot) {
const phones = [];
const emails = [];
const phoneMatches = snapshot.matchAll(/button "(\d{3}-\d{3}-\d{4})\s+([^"]+)"/g);
for (const match of phoneMatches) {
phones.push({ number: match[1], source: match[2].trim() });
}
const emailMatches = snapshot.matchAll(/button "([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})"/g);
for (const match of emailMatches) {
emails.push(match[1]);
}
return { phones, emails };
}
// Main scraping function
async function scrape() {
log('🚀 Starting Reonomy Scraper v13 (ANTI-DETECTION MODE)');
// Check daily limits
const dailyStats = getDailyStats();
if (dailyStats.propertiesScraped >= CONFIG.maxDailyProperties) {
log(`⚠️ Daily limit reached (${dailyStats.propertiesScraped}/${CONFIG.maxDailyProperties}). Try again tomorrow.`);
return [];
}
const remainingToday = CONFIG.maxDailyProperties - dailyStats.propertiesScraped;
const maxThisRun = Math.min(CONFIG.maxProperties, remainingToday);
log(`📊 Daily stats: ${dailyStats.propertiesScraped} scraped today, ${remainingToday} remaining`);
log(`📊 This run: max ${maxThisRun} properties`);
const leads = [];
try {
// Step 1: Auth
log('\n📍 Step 1: Authenticating...');
let needsLogin = true;
if (fs.existsSync(CONFIG.authStatePath)) {
log(' Found existing auth state, testing...');
ab(`state load "${CONFIG.authStatePath}"`);
ab('open "https://app.reonomy.com/#!/home"');
await randomDelay(4000, 6000);
const testUrl = ab('eval "window.location.href"');
if (testUrl.output?.includes('app.reonomy.com') &&
!testUrl.output?.includes('auth.reonomy.com') &&
!testUrl.output?.includes('login')) {
log(' ✅ Session still valid!');
needsLogin = false;
} else {
log(' ⚠️ Session expired...');
}
}
if (needsLogin) {
await login();
}
// Step 2: Navigate to search
log('\n📍 Step 2: Navigating to search results...');
const searchUrl = `https://app.reonomy.com/#!/search/${CONFIG.searchId}`;
ab(`open "${searchUrl}"`);
await randomDelay(6000, 10000);
let urlCheck = ab('eval "window.location.href"');
if (urlCheck.output?.includes('auth.reonomy.com') || urlCheck.output?.includes('login')) {
log(' Session invalid, logging in...');
await login();
ab(`open "${searchUrl}"`);
await randomDelay(6000, 10000);
}
// Step 3: Get property list
log('\n📍 Step 3: Getting property list...');
await humanize();
const iSnapshot = ab('snapshot -i');
const properties = [];
// Find property buttons (addresses)
const buttonMatches = iSnapshot.output?.matchAll(/button "([^"]+)" \[ref=(e\d+)\]/g) || [];
for (const match of buttonMatches) {
if (match[1].includes('Saved Searches') ||
match[1].includes('Help Center') ||
match[1].includes('More filters') ||
match[1].length < 10) {
continue;
}
if (/\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway)/i.test(match[1])) {
properties.push({
name: match[1].substring(0, 60),
ref: match[2]
});
}
}
log(` Found ${properties.length} properties`);
if (properties.length === 0) {
ab('screenshot /tmp/reonomy-v13-no-properties.png');
throw new Error('No properties found');
}
// Anti-detection: Shuffle and limit
const shuffledProps = shuffle(properties).slice(0, maxThisRun);
log(` Processing ${shuffledProps.length} properties (randomized order)`);
// Step 4: Process properties
log('\n📍 Step 4: Processing properties...');
for (let i = 0; i < shuffledProps.length; i++) {
const prop = shuffledProps[i];
log(`\n --- Property ${i + 1}/${shuffledProps.length}: ${prop.name.substring(0, 40)}... ---`);
await humanize();
try {
// Click property
ab(`click @${prop.ref}`);
await randomDelay(5000, 8000);
const propUrl = ab('eval "window.location.href"');
const propIdMatch = propUrl.output?.match(/property\/([a-f0-9-]+)/);
const propertyId = propIdMatch ? propIdMatch[1] : 'unknown';
let propertyAddress = prop.name;
const titleSnap = ab('snapshot');
const headingMatch = titleSnap.output?.match(/heading "([^"]+)"/);
if (headingMatch) propertyAddress = headingMatch[1];
// Click Owner tab
log(' Clicking Owner tab...');
await humanize();
ab('find role tab click --name "Owner"');
await randomDelay(4000, 6000);
// Find View Contacts
const ownerSnap = ab('snapshot -i');
const vcMatch = ownerSnap.output?.match(/button "View Contacts \((\d+)\)" \[ref=(e\d+)\]/);
if (!vcMatch) {
log(' ⚠️ No View Contacts button');
ab('back');
await randomDelay(3000, 5000);
dailyStats.propertiesScraped++;
continue;
}
log(` Found ${vcMatch[1]} contacts`);
ab(`click @${vcMatch[2]}`);
await randomDelay(4000, 6000);
// Find person link
const companySnap = ab('snapshot');
const personMatch = companySnap.output?.match(/\/url: \/!\/person\/([a-f0-9-]+)/);
if (!personMatch) {
log(' ⚠️ No person link found');
ab('back');
await randomDelay(2000, 4000);
ab('back');
await randomDelay(3000, 5000);
dailyStats.propertiesScraped++;
continue;
}
const personId = personMatch[1];
// Get person name
const personNameMatch = companySnap.output?.match(/link "([^"]+)"[^\n]*\/url: \/!\/person/);
const personName = personNameMatch ? personNameMatch[1] : 'Unknown';
log(` Person: ${personName}`);
ab(`open "https://app.reonomy.com/!/person/${personId}"`);
await randomDelay(5000, 8000);
// Click Contact button
await humanize();
const personSnap = ab('snapshot -i');
const contactMatch = personSnap.output?.match(/button "Contact" \[ref=(e\d+)\]/);
if (!contactMatch) {
log(' ⚠️ No Contact button');
ab('back');
await randomDelay(3000, 5000);
dailyStats.propertiesScraped++;
continue;
}
ab(`click @${contactMatch[1]}`);
await randomDelay(2000, 4000);
// Extract contacts
const modalSnap = ab('snapshot -i');
const contacts = extractContacts(modalSnap.output || '');
log(` 📞 ${contacts.phones.length} phones, 📧 ${contacts.emails.length} emails`);
if (contacts.phones.length > 0 || contacts.emails.length > 0) {
leads.push({
scrapeDate: new Date().toISOString(),
propertyId,
propertyAddress,
personName,
personId,
phones: contacts.phones,
emails: contacts.emails
});
dailyStats.leadsFound++;
log(' ✅ Lead captured!');
}
dailyStats.propertiesScraped++;
// Close modal and return to search
ab('press Escape');
await randomDelay(1000, 2000);
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
await randomDelay(5000, 8000);
// Occasional longer break (anti-detection)
if (Math.random() < 0.2) {
log(' ☕ Taking a short break...');
await randomDelay(8000, 15000);
}
} catch (propError) {
log(` ❌ Error: ${propError.message}`);
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
await randomDelay(5000, 8000);
dailyStats.propertiesScraped++;
}
// Save progress
saveDailyStats(dailyStats);
}
// Step 5: Save results
log('\n📍 Step 5: Saving results...');
// Append to existing leads if file exists
let allLeads = [];
try {
const existing = JSON.parse(fs.readFileSync(CONFIG.outputPath, 'utf8'));
allLeads = existing.leads || [];
} catch (e) {}
allLeads = [...allLeads, ...leads];
const output = {
lastUpdated: new Date().toISOString(),
searchId: CONFIG.searchId,
totalLeads: allLeads.length,
leads: allLeads
};
fs.writeFileSync(CONFIG.outputPath, JSON.stringify(output, null, 2));
log(`✅ Saved ${leads.length} new leads (${allLeads.length} total)`);
saveDailyStats(dailyStats);
log(`📊 Daily total: ${dailyStats.propertiesScraped} properties, ${dailyStats.leadsFound} leads`);
} catch (error) {
log(`\n❌ Fatal error: ${error.message}`);
ab('screenshot /tmp/reonomy-v13-error.png');
throw error;
} finally {
log('\n🧹 Closing browser...');
ab('close');
}
return leads;
}
// Run
scrape()
.then(leads => {
log(`\n🎉 Done! Scraped ${leads.length} leads this run.`);
process.exit(0);
})
.catch(err => {
log(`\n💥 Scraper failed: ${err.message}`);
process.exit(1);
});