581 lines
21 KiB
JavaScript
Executable File
581 lines
21 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
/**
|
|
* Reonomy Scraper v13.1 - Agent-Browser Edition (Anti-Detection)
|
|
*
|
|
* PATCHED 2026-02-03: Reonomy now shows contacts directly on Owner tab
|
|
* (no more View Contacts → person page → modal flow)
|
|
*
|
|
* ANTI-DETECTION FEATURES:
|
|
* - Random delays (human-like timing)
|
|
* - Random property order
|
|
* - Occasional "distraction" actions
|
|
* - Session limits (max per run)
|
|
* - Daily tracking to avoid over-scraping
|
|
*/
|
|
|
|
const { execSync } = require('child_process');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
// Config
|
|
const CONFIG = {
|
|
authStatePath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-auth.json'),
|
|
outputPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-leads-v13.json'),
|
|
logPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-scraper-v13.log'),
|
|
dailyLogPath: path.join(process.env.HOME, '.clawdbot/workspace/reonomy-daily-stats.json'),
|
|
searchId: process.env.REONOMY_SEARCH_ID || '504a2d13-d88f-4213-9ac6-a7c8bc7c20c6',
|
|
maxProperties: parseInt(process.env.MAX_PROPERTIES) || 20,
|
|
maxDailyProperties: 50, // Don't exceed this per day
|
|
headless: process.env.HEADLESS !== 'false',
|
|
email: process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com',
|
|
password: process.env.REONOMY_PASSWORD || '9082166532',
|
|
};
|
|
|
|
// Anti-detection: Random delay between min and max ms
|
|
function randomDelay(minMs, maxMs) {
|
|
const delay = Math.floor(Math.random() * (maxMs - minMs + 1)) + minMs;
|
|
return new Promise(resolve => setTimeout(resolve, delay));
|
|
}
|
|
|
|
// Anti-detection: Shuffle array (Fisher-Yates)
|
|
function shuffle(array) {
|
|
const arr = [...array];
|
|
for (let i = arr.length - 1; i > 0; i--) {
|
|
const j = Math.floor(Math.random() * (i + 1));
|
|
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
}
|
|
return arr;
|
|
}
|
|
|
|
// Logging
|
|
function log(msg) {
|
|
const timestamp = new Date().toISOString();
|
|
const line = `[${timestamp}] ${msg}`;
|
|
console.log(line);
|
|
fs.appendFileSync(CONFIG.logPath, line + '\n');
|
|
}
|
|
|
|
// Run agent-browser command
|
|
function ab(cmd, options = {}) {
|
|
const fullCmd = `agent-browser ${cmd}`;
|
|
if (options.verbose !== false) {
|
|
log(` 🔧 ${fullCmd}`);
|
|
}
|
|
try {
|
|
const result = execSync(fullCmd, {
|
|
encoding: 'utf8',
|
|
timeout: options.timeout || 30000,
|
|
stdio: ['pipe', 'pipe', 'pipe']
|
|
});
|
|
return { success: true, output: result.trim() };
|
|
} catch (err) {
|
|
const stderr = err.stderr?.toString() || err.message;
|
|
if (options.verbose !== false) {
|
|
log(` ❌ Error: ${stderr.substring(0, 100)}`);
|
|
}
|
|
return { success: false, error: stderr };
|
|
}
|
|
}
|
|
|
|
// Anti-detection: Random "human" actions
|
|
async function humanize() {
|
|
const actions = [
|
|
() => ab('scroll down 200', { verbose: false }),
|
|
() => ab('scroll up 100', { verbose: false }),
|
|
() => randomDelay(500, 1500),
|
|
() => randomDelay(1000, 2000),
|
|
];
|
|
|
|
// 30% chance to do a random action
|
|
if (Math.random() < 0.3) {
|
|
const action = actions[Math.floor(Math.random() * actions.length)];
|
|
await action();
|
|
}
|
|
}
|
|
|
|
// Daily stats tracking
|
|
function getDailyStats() {
|
|
const today = new Date().toISOString().split('T')[0];
|
|
try {
|
|
const data = JSON.parse(fs.readFileSync(CONFIG.dailyLogPath, 'utf8'));
|
|
if (data.date === today) {
|
|
return data;
|
|
}
|
|
} catch (e) {}
|
|
return { date: today, propertiesScraped: 0, leadsFound: 0 };
|
|
}
|
|
|
|
function saveDailyStats(stats) {
|
|
fs.writeFileSync(CONFIG.dailyLogPath, JSON.stringify(stats, null, 2));
|
|
}
|
|
|
|
// Login to Reonomy
|
|
async function login() {
|
|
log(' Navigating to login page...');
|
|
ab('open "https://app.reonomy.com/#!/login"');
|
|
await randomDelay(3000, 5000);
|
|
|
|
const snapshot = ab('snapshot -i');
|
|
if (!snapshot.output?.includes('textbox "Email"')) {
|
|
const urlCheck = ab('eval "window.location.href"');
|
|
if (urlCheck.output?.includes('app.reonomy.com') && !urlCheck.output?.includes('login')) {
|
|
log(' Already logged in!');
|
|
return true;
|
|
}
|
|
throw new Error('Login form not found');
|
|
}
|
|
|
|
const emailMatch = snapshot.output.match(/textbox "Email" \[ref=(e\d+)\]/);
|
|
const passMatch = snapshot.output.match(/textbox "Password" \[ref=(e\d+)\]/);
|
|
const loginMatch = snapshot.output.match(/button "Log In" \[ref=(e\d+)\]/);
|
|
|
|
if (!emailMatch || !passMatch || !loginMatch) {
|
|
throw new Error('Could not find login form elements');
|
|
}
|
|
|
|
log(' Filling credentials...');
|
|
ab(`fill @${emailMatch[1]} "${CONFIG.email}"`);
|
|
await randomDelay(800, 1500);
|
|
ab(`fill @${passMatch[1]} "${CONFIG.password}"`);
|
|
await randomDelay(800, 1500);
|
|
|
|
log(' Clicking login...');
|
|
ab(`click @${loginMatch[1]}`);
|
|
await randomDelay(12000, 16000); // Human-like wait for login
|
|
|
|
const postLoginUrl = ab('eval "window.location.href"');
|
|
if (postLoginUrl.output?.includes('auth.reonomy.com') || postLoginUrl.output?.includes('login')) {
|
|
throw new Error('Login failed - still on login page');
|
|
}
|
|
|
|
log(' Saving auth state...');
|
|
ab(`state save "${CONFIG.authStatePath}"`);
|
|
|
|
log(' ✅ Login successful!');
|
|
return true;
|
|
}
|
|
|
|
// Extract owners + contacts directly from Owner tab snapshot (new Reonomy UI)
|
|
function extractOwnersFromTab(snapshot) {
|
|
if (!snapshot) return [];
|
|
|
|
const owners = [];
|
|
const lines = snapshot.split('\n');
|
|
|
|
let currentOwner = null;
|
|
|
|
for (const line of lines) {
|
|
// New owner starts with a link (person name) or "Show Portfolio Preview"
|
|
const ownerMatch = line.match(/link "([^"]+)" \[ref=e\d+\]/);
|
|
if (ownerMatch && !ownerMatch[1].includes('Call') && !ownerMatch[1].includes('Send')
|
|
&& !ownerMatch[1].includes('Sign') && !ownerMatch[1].includes('Advanced')
|
|
&& !ownerMatch[1].includes('http') && !ownerMatch[1].includes('Don\'t')) {
|
|
// Check if it looks like a person name (not a nav link)
|
|
const name = ownerMatch[1];
|
|
if (name.length > 2 && name.length < 80 && /[A-Z]/.test(name)) {
|
|
// Save previous owner
|
|
if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) {
|
|
owners.push(currentOwner);
|
|
}
|
|
// Strip titles from owner name
|
|
const cleanName = name.replace(/\s+(President|CEO|Manager|Member|Director|Officer|Secretary|Treasurer|VP|Vice President|Partner|Owner|Agent|Trustee|Chairman|Principal)$/i, '').trim();
|
|
currentOwner = { name: cleanName, phones: [], emails: [] };
|
|
}
|
|
}
|
|
|
|
// Phone: button "1-330-966-4686" or "718-652-3191 Residential" or "201-741-9321 Mobile"
|
|
const phoneMatch = line.match(/button "(?:1-)?(\d{3}-\d{3}-\d{4})(?:\s+(\w+))?" \[ref=e\d+\]/);
|
|
if (phoneMatch && currentOwner) {
|
|
const phone = phoneMatch[0].includes('1-') ? `1-${phoneMatch[1]}` : phoneMatch[1];
|
|
currentOwner.phones.push({
|
|
number: phone,
|
|
type: phoneMatch[2] || 'Unknown'
|
|
});
|
|
}
|
|
|
|
// Also match longer number strings like "12076267058202"
|
|
const longPhoneMatch = line.match(/button "(\d{10,14})" \[ref=e\d+\]/);
|
|
if (longPhoneMatch && currentOwner) {
|
|
currentOwner.phones.push({
|
|
number: longPhoneMatch[1],
|
|
type: 'Unknown'
|
|
});
|
|
}
|
|
|
|
// Email: button "arsenal8424@gmail.com"
|
|
const emailMatch = line.match(/button "([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})" \[ref=e\d+\]/);
|
|
if (emailMatch && currentOwner) {
|
|
currentOwner.emails.push(emailMatch[1]);
|
|
}
|
|
}
|
|
|
|
// Don't forget the last owner
|
|
if (currentOwner && (currentOwner.phones.length > 0 || currentOwner.emails.length > 0)) {
|
|
owners.push(currentOwner);
|
|
}
|
|
|
|
// Deduplicate owners by name (merge contacts if same person appears twice)
|
|
const deduped = [];
|
|
const seen = new Map();
|
|
for (const owner of owners) {
|
|
const key = owner.name.toLowerCase();
|
|
if (seen.has(key)) {
|
|
const existing = seen.get(key);
|
|
// Merge phones/emails
|
|
for (const p of owner.phones) {
|
|
if (!existing.phones.find(ep => ep.number === p.number)) existing.phones.push(p);
|
|
}
|
|
for (const e of owner.emails) {
|
|
if (!existing.emails.includes(e)) existing.emails.push(e);
|
|
}
|
|
} else {
|
|
const entry = { ...owner };
|
|
seen.set(key, entry);
|
|
deduped.push(entry);
|
|
}
|
|
}
|
|
|
|
return deduped;
|
|
}
|
|
|
|
// Main scraping function
|
|
async function scrape() {
|
|
log('🚀 Starting Reonomy Scraper v13 (ANTI-DETECTION MODE)');
|
|
|
|
// Check daily limits
|
|
const dailyStats = getDailyStats();
|
|
if (dailyStats.propertiesScraped >= CONFIG.maxDailyProperties) {
|
|
log(`⚠️ Daily limit reached (${dailyStats.propertiesScraped}/${CONFIG.maxDailyProperties}). Try again tomorrow.`);
|
|
return [];
|
|
}
|
|
|
|
const remainingToday = CONFIG.maxDailyProperties - dailyStats.propertiesScraped;
|
|
const maxThisRun = Math.min(CONFIG.maxProperties, remainingToday);
|
|
log(`📊 Daily stats: ${dailyStats.propertiesScraped} scraped today, ${remainingToday} remaining`);
|
|
log(`📊 This run: max ${maxThisRun} properties`);
|
|
|
|
const leads = [];
|
|
|
|
try {
|
|
// Step 1: Auth
|
|
log('\n📍 Step 1: Authenticating...');
|
|
|
|
let needsLogin = true;
|
|
if (fs.existsSync(CONFIG.authStatePath)) {
|
|
log(' Found existing auth state, testing...');
|
|
ab(`state load "${CONFIG.authStatePath}"`);
|
|
ab('open "https://app.reonomy.com/#!/home"');
|
|
await randomDelay(4000, 6000);
|
|
|
|
const testUrl = ab('eval "window.location.href"');
|
|
if (testUrl.output?.includes('app.reonomy.com') &&
|
|
!testUrl.output?.includes('auth.reonomy.com') &&
|
|
!testUrl.output?.includes('login')) {
|
|
log(' ✅ Session still valid!');
|
|
needsLogin = false;
|
|
} else {
|
|
log(' ⚠️ Session expired...');
|
|
}
|
|
}
|
|
|
|
if (needsLogin) {
|
|
await login();
|
|
}
|
|
|
|
// Step 2: Navigate to search
|
|
log('\n📍 Step 2: Navigating to search results...');
|
|
const searchUrl = `https://app.reonomy.com/#!/search/${CONFIG.searchId}`;
|
|
ab(`open "${searchUrl}"`);
|
|
await randomDelay(6000, 10000);
|
|
|
|
let urlCheck = ab('eval "window.location.href"');
|
|
if (urlCheck.output?.includes('auth.reonomy.com') || urlCheck.output?.includes('login')) {
|
|
log(' Session invalid, logging in...');
|
|
await login();
|
|
ab(`open "${searchUrl}"`);
|
|
await randomDelay(6000, 10000);
|
|
}
|
|
|
|
// Step 3: Get property list
|
|
log('\n📍 Step 3: Getting property list...');
|
|
await humanize();
|
|
|
|
const iSnapshot = ab('snapshot -i');
|
|
const properties = [];
|
|
|
|
// Find properties — can be buttons OR headings depending on search view
|
|
const allMatches = iSnapshot.output?.matchAll(/(?:button|heading) "([^"]+)" \[ref=(e\d+)\]/g) || [];
|
|
for (const match of allMatches) {
|
|
const text = match[1];
|
|
if (text.includes('Saved Searches') || text.includes('Help Center') ||
|
|
text.includes('More filters') || text.includes('View More') ||
|
|
text.includes('Let\'s Go') || text.includes('Learn about') ||
|
|
text.includes('properties') || text.length < 10) {
|
|
continue;
|
|
}
|
|
if (/\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway|pl|cir)/i.test(text)) {
|
|
properties.push({
|
|
name: text.substring(0, 60),
|
|
ref: match[2]
|
|
});
|
|
}
|
|
}
|
|
|
|
// Click "View More" to load additional properties if available
|
|
const viewMoreMatch = iSnapshot.output?.match(/button "View More" \[ref=(e\d+)\]/);
|
|
if (viewMoreMatch && properties.length < CONFIG.maxProperties) {
|
|
log(` Found ${properties.length} properties, clicking View More...`);
|
|
ab(`click @${viewMoreMatch[1]}`);
|
|
await randomDelay(3000, 5000);
|
|
const moreSnap = ab('snapshot -i');
|
|
const moreMatches = moreSnap.output?.matchAll(/(?:button|heading) "([^"]+)" \[ref=(e\d+)\]/g) || [];
|
|
for (const match of moreMatches) {
|
|
if (match[1].includes('Saved Searches') || match[1].includes('Help Center') ||
|
|
match[1].includes('More filters') || match[1].includes('View More') ||
|
|
match[1].includes('Let\'s Go') || match[1].includes('Learn about') ||
|
|
match[1].includes('properties') || match[1].length < 10) continue;
|
|
if (/\d+.*(?:st|ave|blvd|dr|ln|rd|way|ct|highway|pl|cir)/i.test(match[1])) {
|
|
const existing = properties.find(p => p.name === match[1].substring(0, 60));
|
|
if (!existing) {
|
|
properties.push({ name: match[1].substring(0, 60), ref: match[2] });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
log(` Found ${properties.length} total properties`);
|
|
|
|
if (properties.length === 0) {
|
|
ab('screenshot /tmp/reonomy-v13-no-properties.png');
|
|
throw new Error('No properties found');
|
|
}
|
|
|
|
// Anti-detection: Shuffle and limit
|
|
const shuffledProps = shuffle(properties).slice(0, maxThisRun);
|
|
log(` Processing ${shuffledProps.length} properties (randomized order)`);
|
|
|
|
// Dismiss ALL notification banners (Reonomy shows multiple "Learn about..." popups)
|
|
for (let attempt = 0; attempt < 5; attempt++) {
|
|
const bannerSnap = ab('snapshot -i', { verbose: false });
|
|
const letsGoMatch = bannerSnap.output?.match(/button "Let's Go" \[ref=(e\d+)\]/);
|
|
if (!letsGoMatch) break;
|
|
log(` Dismissing notification banner (${attempt + 1})...`);
|
|
ab(`click @${letsGoMatch[1]}`);
|
|
await randomDelay(800, 1500);
|
|
}
|
|
|
|
// Step 4: Process properties
|
|
log('\n📍 Step 4: Processing properties...');
|
|
|
|
for (let i = 0; i < shuffledProps.length; i++) {
|
|
const prop = shuffledProps[i];
|
|
log(`\n --- Property ${i + 1}/${shuffledProps.length}: ${prop.name.substring(0, 50)} ---`);
|
|
|
|
await humanize();
|
|
|
|
try {
|
|
// If we used "Next property" button, we're already on the page
|
|
if (!prop._useCurrentPage) {
|
|
// Click property (retry with fresh snapshot if blocked)
|
|
let clickResult = ab(`click @${prop.ref}`);
|
|
if (!clickResult.success) {
|
|
log(' Retrying click with fresh snapshot...');
|
|
await randomDelay(1000, 2000);
|
|
const freshSnap = ab('snapshot -i');
|
|
// Find this property in fresh snapshot by address pattern
|
|
const escaped = prop.name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').substring(0, 30);
|
|
const freshMatch = freshSnap.output?.match(new RegExp(`(?:button|heading) "${escaped}[^"]*" \\[ref=(e\\d+)\\]`));
|
|
if (freshMatch) {
|
|
ab(`click @${freshMatch[1]}`);
|
|
} else {
|
|
log(' ❌ Property not found in fresh snapshot, skipping');
|
|
dailyStats.propertiesScraped++;
|
|
continue;
|
|
}
|
|
}
|
|
await randomDelay(5000, 8000);
|
|
}
|
|
|
|
const propUrl = ab('eval "window.location.href"');
|
|
const propIdMatch = propUrl.output?.match(/property\/([a-f0-9-]+)/);
|
|
const propertyId = propIdMatch ? propIdMatch[1] : 'unknown';
|
|
|
|
let propertyAddress = prop.name.replace(/\s+\d+(\.\d+)?k?\s+SF\s+.*$/, '').replace(/\s+\d+\s+Unit\s+.*$/, '').replace(/\s+\d+(\.\d+)?\s+Acre\s+.*$/, '').trim();
|
|
const titleSnap = ab('snapshot');
|
|
const headingMatch = titleSnap.output?.match(/heading "([^"]+)"/);
|
|
if (headingMatch && headingMatch[1] !== 'Owners' && headingMatch[1] !== 'Owner' && headingMatch[1].length > 5) {
|
|
propertyAddress = headingMatch[1];
|
|
}
|
|
|
|
// Extract property info from Building & Lot tab (default tab)
|
|
log(' Extracting property details...');
|
|
const buildingSnap = ab('snapshot -i');
|
|
const buildText = buildingSnap.output || '';
|
|
|
|
// Parse property details from the page
|
|
const propertyInfo = {};
|
|
const sfMatch = buildText.match(/(\d[\d,]*(?:\.\d+)?)\s*(?:SF|Sq\.?\s*Ft)/i) || prop.name.match(/([\d.]+k?)\s*SF/i);
|
|
if (sfMatch) propertyInfo.squareFootage = sfMatch[1].replace(/,/g, '');
|
|
|
|
const typeMatch = buildText.match(/(?:Property Type|Type)[:\s]*([A-Za-z\s()]+?)(?:\n|$)/i) || prop.name.match(/(?:Multi Family|Retail|Office|Industrial|Garage|Warehouse|Apartment|Mixed Use|Hotel|Restaurant)[^"]*/i);
|
|
if (typeMatch) propertyInfo.propertyType = typeMatch[1]?.trim() || typeMatch[0]?.trim();
|
|
|
|
const yearMatch = buildText.match(/(?:Year Built|Built)[:\s]*(\d{4})/i);
|
|
if (yearMatch) propertyInfo.yearBuilt = yearMatch[1];
|
|
|
|
const lotMatch = buildText.match(/([\d.]+)\s*Acre/i);
|
|
if (lotMatch) propertyInfo.lotSize = lotMatch[1] + ' Acres';
|
|
|
|
const unitsMatch = buildText.match(/(\d+)\s*Unit/i) || prop.name.match(/(\d+)\s*Unit/i);
|
|
if (unitsMatch) propertyInfo.units = unitsMatch[1];
|
|
|
|
// Try to get city/state/zip from address
|
|
const addrParts = propertyAddress.match(/,\s*([^,]+),\s*([A-Z]{2})\s*(\d{5})/i);
|
|
if (addrParts) {
|
|
propertyInfo.city = addrParts[1].trim();
|
|
propertyInfo.state = addrParts[2];
|
|
propertyInfo.zip = addrParts[3];
|
|
}
|
|
|
|
log(` Property: ${propertyAddress} | ${JSON.stringify(propertyInfo)}`);
|
|
|
|
// Click Owner tab
|
|
log(' Clicking Owner tab...');
|
|
await humanize();
|
|
ab('find role tab click --name "Owner"');
|
|
await randomDelay(4000, 6000);
|
|
|
|
// NEW: Extract contacts directly from Owner tab (no more View Contacts flow)
|
|
const ownerSnap = ab('snapshot -i');
|
|
const snapText = ownerSnap.output || '';
|
|
|
|
// Debug: log first few lines of Owner tab snapshot
|
|
if (i < 3) {
|
|
const snapLines = snapText.split('\n').slice(0, 15).join('\n');
|
|
log(` [DEBUG] Owner tab snapshot (first 15 lines):\n${snapLines}`);
|
|
}
|
|
|
|
const owners = extractOwnersFromTab(snapText);
|
|
|
|
if (owners.length === 0) {
|
|
log(' ⚠️ No contacts found on Owner tab');
|
|
// Take debug screenshot for first few failures
|
|
if (dailyStats.propertiesScraped < 3) {
|
|
ab(`screenshot /tmp/reonomy-v13-owner-debug-${i}.png`);
|
|
}
|
|
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
|
|
await randomDelay(6000, 10000);
|
|
dailyStats.propertiesScraped++;
|
|
continue;
|
|
}
|
|
|
|
const totalPhones = owners.reduce((sum, o) => sum + o.phones.length, 0);
|
|
const totalEmails = owners.reduce((sum, o) => sum + o.emails.length, 0);
|
|
log(` 👥 ${owners.length} owners, 📞 ${totalPhones} phones, 📧 ${totalEmails} emails`);
|
|
|
|
for (const owner of owners) {
|
|
log(` → ${owner.name}: ${owner.phones.length}P ${owner.emails.length}E`);
|
|
}
|
|
|
|
leads.push({
|
|
scrapeDate: new Date().toISOString(),
|
|
propertyId,
|
|
propertyAddress,
|
|
...propertyInfo,
|
|
owners: owners.map(o => ({
|
|
name: o.name,
|
|
phones: o.phones,
|
|
emails: o.emails
|
|
}))
|
|
});
|
|
dailyStats.leadsFound++;
|
|
log(' ✅ Lead captured!');
|
|
|
|
dailyStats.propertiesScraped++;
|
|
|
|
// Use "Next property" button if available (more reliable than navigating back)
|
|
const nextSnap = ab('snapshot -i', { verbose: false });
|
|
const nextMatch = nextSnap.output?.match(/button "Next property" \[ref=(e\d+)\]/);
|
|
if (nextMatch && i < shuffledProps.length - 1) {
|
|
log(' ➡️ Clicking Next property...');
|
|
ab(`click @${nextMatch[1]}`);
|
|
await randomDelay(5000, 8000);
|
|
// Skip the normal click flow for next iteration
|
|
shuffledProps[i + 1]._useCurrentPage = true;
|
|
} else {
|
|
// Return to search
|
|
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
|
|
await randomDelay(6000, 10000);
|
|
const postSnap2 = ab('snapshot -i', { verbose: false });
|
|
const postBanner = postSnap2.output?.match(/button "Let's Go" \[ref=(e\d+)\]/);
|
|
if (postBanner) {
|
|
ab(`click @${postBanner[1]}`, { verbose: false });
|
|
await randomDelay(500, 1000);
|
|
}
|
|
}
|
|
|
|
// Occasional longer break (anti-detection)
|
|
if (Math.random() < 0.2) {
|
|
log(' ☕ Taking a short break...');
|
|
await randomDelay(8000, 15000);
|
|
}
|
|
|
|
} catch (propError) {
|
|
log(` ❌ Error: ${propError.message}`);
|
|
ab(`open "https://app.reonomy.com/#!/search/${CONFIG.searchId}"`);
|
|
await randomDelay(5000, 8000);
|
|
dailyStats.propertiesScraped++;
|
|
}
|
|
|
|
// Save progress
|
|
saveDailyStats(dailyStats);
|
|
}
|
|
|
|
// Step 5: Save results
|
|
log('\n📍 Step 5: Saving results...');
|
|
|
|
// Append to existing leads if file exists
|
|
let allLeads = [];
|
|
try {
|
|
const existing = JSON.parse(fs.readFileSync(CONFIG.outputPath, 'utf8'));
|
|
allLeads = existing.leads || [];
|
|
} catch (e) {}
|
|
|
|
allLeads = [...allLeads, ...leads];
|
|
|
|
const output = {
|
|
lastUpdated: new Date().toISOString(),
|
|
searchId: CONFIG.searchId,
|
|
totalLeads: allLeads.length,
|
|
leads: allLeads
|
|
};
|
|
|
|
fs.writeFileSync(CONFIG.outputPath, JSON.stringify(output, null, 2));
|
|
log(`✅ Saved ${leads.length} new leads (${allLeads.length} total)`);
|
|
|
|
saveDailyStats(dailyStats);
|
|
log(`📊 Daily total: ${dailyStats.propertiesScraped} properties, ${dailyStats.leadsFound} leads`);
|
|
|
|
} catch (error) {
|
|
log(`\n❌ Fatal error: ${error.message}`);
|
|
ab('screenshot /tmp/reonomy-v13-error.png');
|
|
throw error;
|
|
} finally {
|
|
log('\n🧹 Closing browser...');
|
|
ab('close');
|
|
}
|
|
|
|
return leads;
|
|
}
|
|
|
|
// Run
|
|
scrape()
|
|
.then(leads => {
|
|
log(`\n🎉 Done! Scraped ${leads.length} leads this run.`);
|
|
process.exit(0);
|
|
})
|
|
.catch(err => {
|
|
log(`\n💥 Scraper failed: ${err.message}`);
|
|
process.exit(1);
|
|
});
|