#!/usr/bin/env node /** * Reonomy Scraper v3 - Corrected URL Pattern & Selectors * * Based on DOM analysis: * - Correct URL: /search/{search-id}/property/{property-id}/ownership * - Email selector: a[href^="mailto:"] * - Phone selector: a[href^="tel:"] */ const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); // Configuration const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com'; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532'; const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'Eatontown, NJ'; const HEADLESS = process.env.HEADLESS === 'true'; const MAX_PROPERTIES = 10; // Number of properties to scrape const PAGE_DELAY_MS = 3000; // Rate limiting delay // Output files const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads-v3.json'); const LOG_FILE = path.join(__dirname, 'reonomy-scraper-v3.log'); function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Extract contact info from ownership page */ async function extractContactInfo(page) { return await page.evaluate(() => { const info = { emails: [], phones: [], owners: [], address: '', propertyDetails: {} }; // Extract emails document.querySelectorAll('a[href^="mailto:"]').forEach(a => { const email = a.href.replace('mailto:', ''); if (email && email.length > 5) { info.emails.push(email); } }); // Extract phones document.querySelectorAll('a[href^="tel:"]').forEach(a => { const phone = a.href.replace('tel:', ''); if (phone && phone.length > 7) { info.phones.push(phone); } }); // Extract property address const addressMatch = document.body.innerText.match(/^(\d+[^,]+),\s*([A-Za-z\s]+),\s*([A-Z]{2})\s*(\d{5})/); if (addressMatch) { info.address = addressMatch[0]; } // Look for owner names (from page structure discovered) const ownerPattern = /Owns\s+(\d+)\s+properties?\s+([A-Za-z\s,]+)/i; const ownerMatch = document.body.innerText.match(ownerPattern); if (ownerMatch) { info.owners.push(ownerMatch[2]?.trim()); } return info; }); } /** * Main scraper */ async function scrapeLeads() { log('šŸš€ Starting Reonomy Scraper v3...\n'); const browser = await puppeteer.launch({ headless: HEADLESS ? 'new' : false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); const leads = []; try { // Login log('šŸ“ Step 1: Logging into Reonomy...'); await page.goto('https://app.reonomy.com/#!/account', { waitUntil: 'domcontentloaded', timeout: 60000 }); await sleep(2000); await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 }); await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 }); await page.click('button[type="submit"]'); log('ā³ Waiting for login...'); await sleep(10000); // Check if logged in const url = page.url(); if (url.includes('login') || url.includes('auth')) { throw new Error('Login failed. Please check credentials.'); } log('āœ… Successfully logged in!'); // Navigate to search log(`\nšŸ“ Step 2: Navigating to search...`); await page.goto('https://app.reonomy.com/#!/search', { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); // Perform search log(`šŸ“ Step 3: Searching for: ${SEARCH_LOCATION}...`); const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="Search"]', { timeout: 10000 }).catch(() => { return page.waitForSelector('input[type="text"]', { timeout: 5000 }); }); if (searchInput) { await searchInput.click({ clickCount: 3 }); await searchInput.type(SEARCH_LOCATION, { delay: 100 }); await sleep(1000); await page.keyboard.press('Enter'); log('ā³ Searching...'); await sleep(5000); } // Extract search ID from URL const urlMatch = page.url().match(/search\/([a-f0-9-]+)/); if (!urlMatch) { throw new Error('Could not extract search ID from URL'); } const searchId = urlMatch[1]; log(`āœ… Search ID: ${searchId}`); // STEP: We need to find property IDs from the search results page // The properties are dynamically loaded, so we need to inspect how they're loaded log('\nšŸ“ Step 4: Finding property IDs...'); log('āš ļø Properties are dynamically loaded - checking DOM structure...'); // Check if properties are visible const propertyButtons = await page.evaluate(() => { const buttons = []; document.querySelectorAll('button').forEach(b => { const text = b.textContent.trim(); // Look for property patterns in button text const propertyMatch = text.match(/^(\d+[^,]+),\s*([A-Za-z\s,]+),\s*([A-Z]{2})\s*(\d{5})/); if (propertyMatch) { buttons.push({ text: text, address: propertyMatch[0], city: propertyMatch[1], state: propertyMatch[2], zip: propertyMatch[3], hasAddress: true }); } }); return buttons.slice(0, MAX_PROPERTIES); }); if (propertyButtons.length === 0) { log('āš ļø No property buttons found. Properties may be loaded differently.'); log('šŸ’” Trying alternative: Click on "Recently Viewed Properties" section...'); // Try to find property links directly await sleep(2000); } else { log(`āœ… Found ${propertyButtons.length} property buttons`); // For each property button, we need to click it and get the property ID from the URL for (let i = 0; i < Math.min(propertyButtons.length, MAX_PROPERTIES); i++) { const prop = propertyButtons[i]; log(`\n[${i + 1}/${Math.min(propertyButtons.length, MAX_PROPERTIES)}] ${prop.address || prop.text.substring(0, 40)}...`); // Click property button await page.evaluate((prop) => { const buttons = Array.from(document.querySelectorAll('button')); const target = buttons.find(b => b.textContent.includes(prop.address?.substring(0, 20)) || b.textContent.includes(prop.text?.substring(0, 20))); if (target) { target.click(); } }, prop); await sleep(3000); // Extract property ID from URL const newUrl = page.url(); const propIdMatch = newUrl.match(/property\/([a-f0-9-]+)/); if (propIdMatch) { const propertyId = propIdMatch[1]; // Navigate to ownership page for contact info const ownershipUrl = `https://app.reonomy.com/#!/search/${searchId}/property/${propertyId}/ownership`; log(` šŸ” Navigating to ownership page...`); await page.goto(ownershipUrl, { waitUntil: 'networkidle2', timeout: 30000 }); await sleep(2000); // Extract contact info const contactInfo = await extractContactInfo(page); log(` šŸ“§ Emails: ${contactInfo.emails.length} found: ${contactInfo.emails.join(', ') || 'none'}`); log(` šŸ“ž Phones: ${contactInfo.phones.length} found: ${contactInfo.phones.join(', ') || 'none'}`); const lead = { scrapeDate: new Date().toISOString().split('T')[0], propertyAddress: contactInfo.address || prop.address || '', city: prop.city || '', state: prop.state || '', zip: prop.zip || '', emails: contactInfo.emails, phones: contactInfo.phones, owners: contactInfo.owners, propertyUrl: `https://app.reonomy.com/#!/property/${propertyId}`, ownershipUrl: ownershipUrl }; leads.push(lead); // Rate limiting if (i < Math.min(propertyButtons.length, MAX_PROPERTIES) - 1) { await sleep(PAGE_DELAY_MS); } } else { log(' āš ļø Could not extract property ID from URL'); } // Go back to search results await page.goto(`https://app.reonomy.com/#!/search/${searchId}`, { waitUntil: 'networkidle2', timeout: 30000 }); await sleep(2000); } } // Save results if (leads.length > 0) { log(`\nāœ… Total leads scraped: ${leads.length}`); const outputData = { scrapeDate: new Date().toISOString(), location: SEARCH_LOCATION, searchId: searchId, leadCount: leads.length, leads: leads }; fs.writeFileSync(OUTPUT_FILE, JSON.stringify(outputData, null, 2)); log(`šŸ’¾ Saved to: ${OUTPUT_FILE}`); } else { log('\nāš ļø No leads scraped.'); } log('\nāœ… Scraping complete!'); return { leadCount: leads.length, outputFile: OUTPUT_FILE }; } catch (error) { log(`\nāŒ Error: ${error.message}`); log(error.stack); try { await page.screenshot({ path: '/tmp/reonomy-v3-error.png', fullPage: true }); log('šŸ“ø Error screenshot saved: /tmp/reonomy-v3-error.png'); } catch (e) {} throw error; } finally { await browser.close(); log('\nšŸ”š Browser closed'); } } // Run scrapeLeads() .then(result => { log(`\nšŸŽ‰ Success! ${result.leadCount} leads scraped.`); console.log(`\nšŸ’¾ View your leads at: ${result.outputFile}`); process.exit(0); }) .catch(error => { log(`\nšŸ’„ Scraper failed: ${error.message}`); process.exit(1); });