#!/usr/bin/env node /** * Reonomy Scraper v7 - FIXED CLICK-THROUGH * * Key changes: * 1. Removed invalid await inside page.evaluate() * 2. Fixed page.evaluateHandle() usage * 3. Better error handling */ const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); // Configuration const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com'; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532'; const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'Eatontown, NJ'; const HEADLESS = process.env.HEADLESS === 'true'; const MAX_PROPERTIES = 20; const PAGE_LOAD_DELAY_MS = 5000; // Output files const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads-v7-fixed.json'); const LOG_FILE = path.join(__dirname, 'reonomy-scraper-v7-fixed.log'); function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Apply advanced filters */ async function applyAdvancedFilters(page) { log('šŸ” Applying advanced filters: Has Phone + Has Email...'); // Look for "More Filters" button const moreFiltersBtn = await page.waitForSelector('button:has-text("More Filters"), button[aria-label*="Filters"], button:has-text("Filters")', { timeout: 15000 }).catch(() => null); if (moreFiltersBtn) { await moreFiltersBtn.click(); await sleep(2000); } // Look for "Has Phone" filter const hasPhoneFilter = await page.evaluate(() => { const labels = Array.from(document.querySelectorAll('label, span, div')); const phoneFilter = labels.find(el => { const text = el.textContent?.toLowerCase() || ''; return text.includes('phone') || text.includes('has phone'); }); return phoneFilter ? phoneFilter.textContent : null; }).catch(() => null); if (hasPhoneFilter) { // Find the input/checkbox associated with this label const checkboxInfo = await page.evaluate((filterText) => { const labels = Array.from(document.querySelectorAll('label, span, div')); const label = labels.find(el => { const text = el.textContent?.toLowerCase() || ''; return text.includes('phone') || text.includes('has phone'); }); if (!label) return null; const parent = label.closest('div, form, label'); if (!parent) return null; const input = parent.querySelector('input[type="checkbox"], input[type="radio"]'); return input ? { tag: input.tagName, id: input.id } : null; }, hasPhoneFilter).catch(() => null); if (checkboxInfo && checkboxInfo.tag === 'INPUT') { log(` āœ… Found Has Phone checkbox: ${checkboxInfo.id}`); // Check if it's already checked, if not, click it const isChecked = await page.evaluate((id) => { const input = document.getElementById(id); return input ? input.checked : false; }, checkboxInfo.id).catch(() => false); if (!isChecked) { await page.evaluate((id) => { const input = document.getElementById(id); if (input) input.click(); }, checkboxInfo.id).catch(() => { // Try clicking the label log(` āš ļø Could not click checkbox, trying label click...`); page.evaluate((filterText) => { const labels = Array.from(document.querySelectorAll('label')); const label = labels.find(el => { const text = el.textContent?.toLowerCase() || ''; return text.includes('phone') || text.includes('has phone'); }); if (label) label.click(); }, hasPhoneFilter).catch(() => {}); }); } } } // Look for "Has Email" filter const hasEmailFilter = await page.evaluate(() => { const labels = Array.from(document.querySelectorAll('label, span, div')); const emailFilter = labels.find(el => { const text = el.textContent?.toLowerCase() || ''; return text.includes('email') || text.includes('has email'); }); return emailFilter ? emailFilter.textContent : null; }).catch(() => null); if (hasEmailFilter) { const checkboxInfo = await page.evaluate((filterText) => { const labels = Array.from(document.querySelectorAll('label, span, div')); const label = labels.find(el => { const text = el.textContent?.toLowerCase() || ''; return text.includes('email') || text.includes('has email'); }); if (!label) return null; const parent = label.closest('div, form, label'); if (!parent) return null; const input = parent.querySelector('input[type="checkbox"], input[type="radio"]'); return input ? { tag: input.tagName, id: input.id } : null; }, hasEmailFilter).catch(() => null); if (checkboxInfo && checkboxInfo.tag === 'INPUT') { log(` āœ… Found Has Email checkbox: ${checkboxInfo.id}`); const isChecked = await page.evaluate((id) => { const input = document.getElementById(id); return input ? input.checked : false; }, checkboxInfo.id).catch(() => false); if (!isChecked) { await page.evaluate((id) => { const input = document.getElementById(id); if (input) input.click(); }, checkboxInfo.id).catch(() => { page.evaluate((filterText) => { const labels = Array.from(document.querySelectorAll('label')); const label = labels.find(el => { const text = el.textContent?.toLowerCase() || ''; return text.includes('email') || text.includes('has email'); }); if (label) label.click(); }, hasEmailFilter).catch(() => {}); }); } } } await sleep(2000); } /** * Extract contact info from property page */ async function extractContactInfoFromProperty(page) { const contactInfo = await page.evaluate(() => { const info = { emails: [], phones: [], address: '', owners: [], pageTitle: document.title }; // Extract emails from mailto: links document.querySelectorAll('a[href^="mailto:"]').forEach(a => { const email = a.href.replace('mailto:', ''); if (email && email.length > 5) { info.emails.push(email); } }); // Extract phones from tel: links document.querySelectorAll('a[href^="tel:"]').forEach(a => { const phone = a.href.replace('tel:', ''); if (phone && phone.length > 7) { info.phones.push(phone); } }); // Extract property address const addressMatch = document.body.innerText.match(/^(\d+[^,]+),\s*([A-Za-z\s]+),\s*([A-Z]{2})\s*(\d{5})/); if (addressMatch) { info.address = addressMatch[0]; } // Look for owner names const ownerPattern = /Owns\s+(\d+)\s+properties?\s+([A-Za-z\s,]+)/i; const ownerMatch = document.body.innerText.match(ownerPattern); if (ownerMatch) { info.owners.push(ownerMatch[2]?.trim()); } return info; }); return contactInfo; } /** * Main scraper */ async function scrapeLeads() { log('šŸš€ Starting Reonomy Scraper v7 (FIXED)...\n'); const browser = await puppeteer.launch({ headless: HEADLESS ? 'new' : false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); const leads = []; try { // Login log('šŸ“ Step 1: Logging into Reonomy...'); await page.goto('https://app.reonomy.com/#!/account', { waitUntil: 'domcontentloaded', timeout: 60000 }); await sleep(2000); await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 }); await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 }); await page.click('button[type="submit"]'); log('ā³ Waiting for login...'); await sleep(10000); // Check if logged in const url = page.url(); if (url.includes('login') || url.includes('auth')) { throw new Error('Login failed. Please check credentials.'); } log('āœ… Successfully logged in!'); // Navigate to search log('\nšŸ“ Step 2: Navigating to search...'); await page.goto('https://app.reonomy.com/#!/search', { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); // Apply advanced filters log('\nšŸ“ Step 3: Applying advanced filters...'); await applyAdvancedFilters(page); // Perform search log(`šŸ“ Step 4: Searching for: ${SEARCH_LOCATION}...`); const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="Search"]', { timeout: 10000 }).catch(() => { return page.waitForSelector('input[type="text"]', { timeout: 5000 }); }); if (searchInput) { await searchInput.click({ clickCount: 3 }); await searchInput.type(SEARCH_LOCATION, { delay: 100 }); await sleep(1000); await page.keyboard.press('Enter'); log('ā³ Searching...'); await sleep(5000); } // Extract search ID const urlMatch = page.url().match(/search\/([a-f0-9-]+)/); if (!urlMatch) { throw new Error('Could not extract search ID from URL'); } const searchId = urlMatch[1]; log(`āœ… Search ID: ${searchId}`); // Extract property IDs log('\nšŸ“ Step 5: Extracting property IDs...'); const propertyIds = await page.evaluate(() => { const ids = []; const links = document.querySelectorAll('a[href*="/property/"]'); links.forEach(link => { const href = link.href; const match = href.match(/property\/([a-f0-9-]+)/); if (match) { ids.push({ id: match[1], url: href }); } }); return ids; }); log(`āœ… Found ${propertyIds.length} property IDs`); if (propertyIds.length === 0) { log('āš ļø No property IDs found. The page structure may have changed.'); throw new Error('No properties found on search page.'); } // Limit properties const propertiesToScrape = propertyIds.slice(0, MAX_PROPERTIES); log(`\nšŸ“ Step 6: Clicking through ${propertiesToScrape.length} properties...`); for (let i = 0; i < propertiesToScrape.length; i++) { const prop = propertiesToScrape[i]; log(`\n[${i + 1}/${propertiesToScrape.length}] Property ID: ${prop.id}`); // Click on property button log(' šŸ”— Clicking property button...'); try { // Find and click the button with the property link await page.evaluate((propData) => { const buttons = Array.from(document.querySelectorAll('button')); const target = buttons.find(b => { const link = b.querySelector('a[href*="/property/"]'); return link && link.href.includes(propData.id); }); if (target) { target.scrollIntoView({ behavior: 'smooth', block: 'center' }); target.click(); } else { // Try to find button by text if no matching link const textButton = buttons.find(b => b.textContent.includes(propData.id)); if (textButton) { textButton.scrollIntoView({ behavior: 'smooth', block: 'center' }); textButton.click(); } } }, { id: prop.id }); } catch (e) { log(` āš ļø Could not click property: ${e.message}`); } await sleep(3000); // Wait for property page to load log(' ā³ Waiting for property page to load...'); await sleep(PAGE_LOAD_DELAY_MS); // Extract contact info from property page log(' šŸ“Š Extracting contact info...'); const contactInfo = await extractContactInfoFromProperty(page); log(` šŸ“§ Emails: ${contactInfo.emails.length} found: ${contactInfo.emails.join(', ') || 'none'}`); log(` šŸ“ž Phones: ${contactInfo.phones.length} found: ${contactInfo.phones.join(', ') || 'none'}`); const lead = { scrapeDate: new Date().toISOString().split('T')[0], propertyId: prop.id, propertyUrl: page.url(), address: contactInfo.address || '', emails: contactInfo.emails, phones: contactInfo.phones, owners: contactInfo.owners, pageTitle: contactInfo.pageTitle, searchLocation: SEARCH_LOCATION, searchId: searchId }; leads.push(lead); // Go back to search results log(' šŸ”™ Going back to search results...'); await page.goto(`https://app.reonomy.com/#!/search/${searchId}`, { waitUntil: 'networkidle2', timeout: 30000 }); await sleep(2000); // Rate limiting const rateDelay = 2000; log(` āø Rate limit delay: ${rateDelay}ms...`); await sleep(rateDelay); } // Save results if (leads.length > 0) { log(`\nāœ… Total leads scraped: ${leads.length}`); const outputData = { scrapeDate: new Date().toISOString(), location: SEARCH_LOCATION, searchId: searchId, leadCount: leads.length, leads: leads }; fs.writeFileSync(OUTPUT_FILE, JSON.stringify(outputData, null, 2)); log(`šŸ’¾ Saved to: ${OUTPUT_FILE}`); } else { log('\nāš ļø No leads scraped.'); } log('\nāœ… Scraping complete!'); return { leadCount: leads.length, outputFile: OUTPUT_FILE }; } catch (error) { log(`\nāŒ Error: ${error.message}`); log(error.stack); try { await page.screenshot({ path: '/tmp/reonomy-v7-error.png', fullPage: true }); log('šŸ“ø Error screenshot saved: /tmp/reonomy-v7-error.png'); } catch (e) {} throw error; } finally { await browser.close(); log('\nšŸ”š Browser closed'); } } // Run scrapeLeads() .then(result => { log(`\nšŸŽ‰ Success! ${result.leadCount} leads scraped.`); console.log(`\nšŸ’¾ View your leads at: ${result.outputFile}`); process.exit(0); }) .catch(error => { log(`\nšŸ’„ Scraper failed: ${error.message}`); process.exit(1); });