443 lines
14 KiB
JavaScript
Executable File
443 lines
14 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
/**
|
|
* Reonomy Scraper v13 - Agent-Browser Edition (Anti-Detection)
|
|
*
|
|
* ANTI-DETECTION FEATURES:
|
|
* - Random delays (human-like timing)
|
|
* - Random property order
|
|
* - Occasional "distraction" actions
|
|
* - Session limits (max per run)
|
|
* - Daily tracking to avoid over-scraping
|
|
*/
|
|
|
|
const { execSync } = require('child_process');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
// Config
|
|
const CONFIG = {
|
|
authStatePath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-auth.json'),
|
|
outputPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-leads-v13.json'),
|
|
logPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-scraper-v13.log'),
|
|
dailyLogPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-daily-stats.json'),
|
|
searchId: process.env.REONOMY_SEARCH_ID || 'bacfd104-fed5-4cc4-aba1-933f899de3f8',
|
|
maxProperties: parseInt(process.env.MAX_PROPERTIES) || 20,
|
|
maxDailyProperties: 50, // Don't exceed this per day
|
|
headless: process.env.HEADLESS !== 'false',
|
|
email: process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com',
|
|
password: process.env.REONOMY_PASSWORD || '9082166532',
|
|
};
|
|
|
|
// Anti-detection: Random delay between min and max ms
|
|
function randomDelay(minMs, maxMs) {
|
|
const delay = Math.floor(Math.random() * (maxMs - minMs + 1)) + minMs;
|
|
return new Promise(resolve => setTimeout(resolve, delay));
|
|
}
|
|
|
|
// Anti-detection: Shuffle array (Fisher-Yates)
|
|
function shuffle(array) {
|
|
const arr = [...array];
|
|
for (let i = arr.length - 1; i > 0; i--) {
|
|
const j = Math.floor(Math.random() * (i + 1));
|
|
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
}
|
|
return arr;
|
|
}
|
|
|
|
// Logging
|
|
function log(msg) {
|
|
const timestamp = new Date().toISOString();
|
|
const line = `[${timestamp}] ${msg}`;
|
|
console.log(line);
|
|
fs.appendFileSync(CONFIG.logPath, line + '\n');
|
|
}
|
|
|
|
// Run agent-browser command
|
|
function ab(cmd, options = {}) {
|
|
const fullCmd = `agent-browser ${cmd}`;
|
|
if (options.verbose !== false) {
|
|
log(` 🔧 ${fullCmd}`);
|
|
}
|
|
try {
|
|
const result = execSync(fullCmd, {
|
|
encoding: 'utf8',
|
|
timeout: options.timeout || 30000,
|
|
stdio: ['pipe', 'pipe', 'pipe']
|
|
});
|
|
return { success: true, output: result.trim() };
|
|
} catch (err) {
|
|
const stderr = err.stderr?.toString() || err.message;
|
|
if (options.verbose !== false) {
|
|
log(` ❌ Error: ${stderr.substring(0, 100)}`);
|
|
}
|
|
return { success: false, error: stderr };
|
|
}
|
|
}
|
|
|
|
// Anti-detection: Random "human" actions
|
|
async function humanize() {
|
|
const actions = [
|
|
() => ab('scroll down 200', { verbose: false }),
|
|
() => ab('scroll up 100', { verbose: false }),
|
|
() => randomDelay(500, 1500),
|
|
() => randomDelay(1000, 2000),
|
|
];
|
|
|
|
// 30% chance to do a random action
|
|
if (Math.random() < 0.3) {
|
|
const action = actions[Math.floor(Math.random() * actions.length)];
|
|
await action();
|
|
}
|
|
}
|
|
|
|
// Daily stats tracking
|
|
function getDailyStats() {
|
|
const today = new Date().toISOString().split('T')[0];
|
|
try {
|
|
const data = JSON.parse(fs.readFileSync(CONFIG.dailyLogPath, 'utf8'));
|
|
if (data.date === today) {
|
|
return data;
|
|
}
|
|
} catch (e) {}
|
|
return { date: today, propertiesScraped: 0, leadsFound: 0 };
|
|
}
|
|
|
|
function saveDailyStats(stats) {
|
|
fs.writeFileSync(CONFIG.dailyLogPath, JSON.stringify(stats, null, 2));
|
|
}
|
|
|
|
// Login to Reonomy
|
|
async function login() {
|
|
log(' Navigating to login page...');
|
|
ab('open "https://app.reonomy.com/#!/login"');
|
|
await randomDelay(3000, 5000);
|
|
|
|
const snapshot = ab('snapshot -i');
|
|
if (!snapshot.output?.includes('textbox "Email"')) {
|
|
const urlCheck = ab('eval "window.location.href"');
|
|
if (urlCheck.output?.includes('app.reonomy.com') && !urlCheck.output?.includes('login')) {
|
|
log(' Already logged in!');
|
|
return true;
|
|
}
|
|
throw new Error('Login form not found');
|
|
}
|
|
|
|
const emailMatch = snapshot.output.match(/textbox "Email" \[ref=(e\d+)\]/);
|
|
const passMatch = snapshot.output.match(/textbox "Password" \[ref=(e\d+)\]/);
|
|
const loginMatch = snapshot.output.match(/button "Log In" \[ref=(e\d+)\]/);
|
|
|
|
if (!emailMatch || !passMatch || !loginMatch) {
|
|
throw new Error('Could not find login form elements');
|
|
}
|
|
|
|
log(' Filling credentials...');
|
|
ab(`fill @${emailMatch[1]} "${CONFIG.email}"`);
|
|
await randomDelay(800, 1500);
|
|
ab(`fill @${passMatch[1]} "${CONFIG.password}"`);
|
|
await randomDelay(800, 1500);
|
|
|
|
log(' Clicking login...');
|
|
ab(`click @${loginMatch[1]}`);
|
|
await randomDelay(12000, 16000); // Human-like wait for login
|
|
|
|
const postLoginUrl = ab('eval "window.location.href"');
|
|
if (postLoginUrl.output?.includes('auth.reonomy.com') || postLoginUrl.output?.includes('login')) {
|
|
throw new Error('Login failed - still on login page');
|
|
}
|
|
|
|
log(' Saving auth state...');
|
|
ab(`state save "${CONFIG.authStatePath}"`);
|
|
|
|
log(' ✅ Login successful!');
|
|
return true;
|
|
}
|
|
|
|
// Extract contacts from modal snapshot
|
|
function extractContacts(snapshot) {
|
|
const phones = [];
|
|
const emails = [];
|
|
|
|
const phoneMatches = snapshot.matchAll(/button "(\d{3}-\d{3}-\d{4})\s+([^"]+)"/g);
|
|
for (const match of phoneMatches) {
|
|
phones.push({ number: match[1], source: match[2].trim() });
|
|
}
|
|
|
|
const emailMatches = snapshot.matchAll(/button "([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})"/g);
|
|
for (const match of emailMatches) {
|
|
emails.push(match[1]);
|
|
}
|
|
|
|
return { phones, emails };
|
|
}
|
|
|
|
// Main scraping function
|
|
async function scrape() {
|
|
log('🚀 Starting Reonomy Scraper v13 (ANTI-DETECTION MODE)');
|
|
|
|
// Check daily limits
|
|
const dailyStats = getDailyStats();
|
|
if (dailyStats.propertiesScraped >= CONFIG.maxDailyProperties) {
|
|
log(`⚠️ Daily limit reached (${dailyStats.propertiesScraped}/${CONFIG.maxDailyProperties}). Try again tomorrow.`);
|
|
return [];
|
|
}
|
|
|
|
const remainingToday = CONFIG.maxDailyProperties - dailyStats.propertiesScraped;
|
|
const maxThisRun = Math.min(CONFIG.maxProperties, remainingToday);
|
|
log(`📊 Daily stats: ${dailyStats.propertiesScraped} scraped today, ${remainingToday} remaining`);
|
|
log(`📊 This run: max ${maxThisRun} properties`);
|
|
|
|
const leads = [];
|
|
|
|
try {
|
|
// Step 1: Auth
|
|
log('\n📍 Step 1: Authenticating...');
|
|
|
|
let needsLogin = true;
|
|
if (fs.existsSync(CONFIG.authStatePath)) {
|
|
log(' Found existing auth state, testing...');
|
|
ab(`state load "${CONFIG.authStatePath}"`);
|
|
ab('open "https://app.reonomy.com/#!/home"');
|
|
await randomDelay(4000, 6000);
|
|
|
|
const testUrl = ab('eval "window.location.href"');
|
|
if (testUrl.output?.includes('app.reonomy.com') &&
|
|
!testUrl.output?.includes('auth.reonomy.com') &&
|
|
!testUrl.output?.includes('login')) {
|
|
log(' ✅ Session still valid!');
|
|
needsLogin = false;
|
|
} else {
|
|
log(' ⚠️ Session expired...');
|
|
}
|
|
}
|
|
|
|
if (needsLogin) {
|
|
await login();
|
|
}
|
|
|
|
// Step 2: Navigate to search
|
|
log('\n📍 Step 2: Navigating to search results...');
|
|
const searchUrl = `https://app.reonomy.com/#!/search/${CONFIG.searchId}`;
|
|
ab(`open "${searchUrl}"`);
|
|
await randomDelay(6000, 10000);
|
|
|
|
let urlCheck = ab('eval "window.location.href"');
|
|
if (urlCheck.output?.includes('auth.reonomy.com') || urlCheck.output?.includes('login')) {
|
|
log(' Session invalid, logging in...');
|
|
await login();
|
|
ab(`open "${searchUrl}"`);
|
|
await randomDelay(6000, 10000);
|
|
}
|
|
|
|
// Step 3: Get property list
|
|
log('\n📍 Step 3: Getting property list...');
|
|
await humanize();
|
|
|
|
const iSnapshot = ab('snapshot -i');
|
|
const properties = [];
|
|
|
|
// Find property buttons (addresses)
|
|
const buttonMatches = iSnapshot.output?.matchAll(/button "([^"]+)" \[ref=(e\d+)\]/g) || [];
|
|
for (const match of buttonMatches) {
|
|
if (match[1].includes('Saved Searches') ||
|
|
match[1].includes('Help Center') ||
|
|
match[1].includes('More filters') ||
|
|
match[1].length < 10) {
|
|
continue;
|
|
}
|
|
if (/\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway)/i.test(match[1])) {
|
|
properties.push({
|
|
name: match[1].substring(0, 60),
|
|
ref: match[2]
|
|
});
|
|
}
|
|
}
|
|
|
|
log(` Found ${properties.length} properties`);
|
|
|
|
if (properties.length === 0) {
|
|
ab('screenshot /tmp/reonomy-v13-no-properties.png');
|
|
throw new Error('No properties found');
|
|
}
|
|
|
|
// Anti-detection: Shuffle and limit
|
|
const shuffledProps = shuffle(properties).slice(0, maxThisRun);
|
|
log(` Processing ${shuffledProps.length} properties (randomized order)`);
|
|
|
|
// Step 4: Process properties
|
|
log('\n📍 Step 4: Processing properties...');
|
|
|
|
for (let i = 0; i < shuffledProps.length; i++) {
|
|
const prop = shuffledProps[i];
|
|
log(`\n --- Property ${i + 1}/${shuffledProps.length}: ${prop.name.substring(0, 40)}... ---`);
|
|
|
|
await humanize();
|
|
|
|
try {
|
|
// Click property
|
|
ab(`click @${prop.ref}`);
|
|
await randomDelay(5000, 8000);
|
|
|
|
const propUrl = ab('eval "window.location.href"');
|
|
const propIdMatch = propUrl.output?.match(/property\/([a-f0-9-]+)/);
|
|
const propertyId = propIdMatch ? propIdMatch[1] : 'unknown';
|
|
|
|
let propertyAddress = prop.name;
|
|
const titleSnap = ab('snapshot');
|
|
const headingMatch = titleSnap.output?.match(/heading "([^"]+)"/);
|
|
if (headingMatch) propertyAddress = headingMatch[1];
|
|
|
|
// Click Owner tab
|
|
log(' Clicking Owner tab...');
|
|
await humanize();
|
|
ab('find role tab click --name "Owner"');
|
|
await randomDelay(4000, 6000);
|
|
|
|
// Find View Contacts
|
|
const ownerSnap = ab('snapshot -i');
|
|
const vcMatch = ownerSnap.output?.match(/button "View Contacts \((\d+)\)" \[ref=(e\d+)\]/);
|
|
|
|
if (!vcMatch) {
|
|
log(' ⚠️ No View Contacts button');
|
|
ab('back');
|
|
await randomDelay(3000, 5000);
|
|
dailyStats.propertiesScraped++;
|
|
continue;
|
|
}
|
|
|
|
log(` Found ${vcMatch[1]} contacts`);
|
|
ab(`click @${vcMatch[2]}`);
|
|
await randomDelay(4000, 6000);
|
|
|
|
// Find person link
|
|
const companySnap = ab('snapshot');
|
|
const personMatch = companySnap.output?.match(/\/url: \/!\/person\/([a-f0-9-]+)/);
|
|
|
|
if (!personMatch) {
|
|
log(' ⚠️ No person link found');
|
|
ab('back');
|
|
await randomDelay(2000, 4000);
|
|
ab('back');
|
|
await randomDelay(3000, 5000);
|
|
dailyStats.propertiesScraped++;
|
|
continue;
|
|
}
|
|
|
|
const personId = personMatch[1];
|
|
|
|
// Get person name
|
|
const personNameMatch = companySnap.output?.match(/link "([^"]+)"[^\n]*\/url: \/!\/person/);
|
|
const personName = personNameMatch ? personNameMatch[1] : 'Unknown';
|
|
|
|
log(` Person: ${personName}`);
|
|
ab(`open "https://app.reonomy.com/!/person/${personId}"`);
|
|
await randomDelay(5000, 8000);
|
|
|
|
// Click Contact button
|
|
await humanize();
|
|
const personSnap = ab('snapshot -i');
|
|
const contactMatch = personSnap.output?.match(/button "Contact" \[ref=(e\d+)\]/);
|
|
|
|
if (!contactMatch) {
|
|
log(' ⚠️ No Contact button');
|
|
ab('back');
|
|
await randomDelay(3000, 5000);
|
|
dailyStats.propertiesScraped++;
|
|
continue;
|
|
}
|
|
|
|
ab(`click @${contactMatch[1]}`);
|
|
await randomDelay(2000, 4000);
|
|
|
|
// Extract contacts
|
|
const modalSnap = ab('snapshot -i');
|
|
const contacts = extractContacts(modalSnap.output || '');
|
|
|
|
log(` 📞 ${contacts.phones.length} phones, 📧 ${contacts.emails.length} emails`);
|
|
|
|
if (contacts.phones.length > 0 || contacts.emails.length > 0) {
|
|
leads.push({
|
|
scrapeDate: new Date().toISOString(),
|
|
propertyId,
|
|
propertyAddress,
|
|
personName,
|
|
personId,
|
|
phones: contacts.phones,
|
|
emails: contacts.emails
|
|
});
|
|
dailyStats.leadsFound++;
|
|
log(' ✅ Lead captured!');
|
|
}
|
|
|
|
dailyStats.propertiesScraped++;
|
|
|
|
// Close modal and return to search
|
|
ab('press Escape');
|
|
await randomDelay(1000, 2000);
|
|
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
|
|
await randomDelay(5000, 8000);
|
|
|
|
// Occasional longer break (anti-detection)
|
|
if (Math.random() < 0.2) {
|
|
log(' ☕ Taking a short break...');
|
|
await randomDelay(8000, 15000);
|
|
}
|
|
|
|
} catch (propError) {
|
|
log(` ❌ Error: ${propError.message}`);
|
|
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
|
|
await randomDelay(5000, 8000);
|
|
dailyStats.propertiesScraped++;
|
|
}
|
|
|
|
// Save progress
|
|
saveDailyStats(dailyStats);
|
|
}
|
|
|
|
// Step 5: Save results
|
|
log('\n📍 Step 5: Saving results...');
|
|
|
|
// Append to existing leads if file exists
|
|
let allLeads = [];
|
|
try {
|
|
const existing = JSON.parse(fs.readFileSync(CONFIG.outputPath, 'utf8'));
|
|
allLeads = existing.leads || [];
|
|
} catch (e) {}
|
|
|
|
allLeads = [...allLeads, ...leads];
|
|
|
|
const output = {
|
|
lastUpdated: new Date().toISOString(),
|
|
searchId: CONFIG.searchId,
|
|
totalLeads: allLeads.length,
|
|
leads: allLeads
|
|
};
|
|
|
|
fs.writeFileSync(CONFIG.outputPath, JSON.stringify(output, null, 2));
|
|
log(`✅ Saved ${leads.length} new leads (${allLeads.length} total)`);
|
|
|
|
saveDailyStats(dailyStats);
|
|
log(`📊 Daily total: ${dailyStats.propertiesScraped} properties, ${dailyStats.leadsFound} leads`);
|
|
|
|
} catch (error) {
|
|
log(`\n❌ Fatal error: ${error.message}`);
|
|
ab('screenshot /tmp/reonomy-v13-error.png');
|
|
throw error;
|
|
} finally {
|
|
log('\n🧹 Closing browser...');
|
|
ab('close');
|
|
}
|
|
|
|
return leads;
|
|
}
|
|
|
|
// Run
|
|
scrape()
|
|
.then(leads => {
|
|
log(`\n🎉 Done! Scraped ${leads.length} leads this run.`);
|
|
process.exit(0);
|
|
})
|
|
.catch(err => {
|
|
log(`\n💥 Scraper failed: ${err.message}`);
|
|
process.exit(1);
|
|
});
|