#!/usr/bin/env node /** * Reonomy Lead Scraper - Working JSON Fallback Version * * Extracts property and owner leads from Reonomy dashboard/search * and saves to JSON (no Google Sheets dependency). */ const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); // Configuration const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com'; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532'; const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'New York, NY'; const HEADLESS = process.env.HEADLESS === 'true'; // Output file const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads.json'); const LOG_FILE = path.join(__dirname, 'reonomy-scraper.log'); function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Save leads to JSON file */ function saveLeads(leads) { const data = { scrapeDate: new Date().toISOString(), leadCount: leads.length, location: SEARCH_LOCATION, leads: leads }; try { fs.writeFileSync(OUTPUT_FILE, JSON.stringify(data, null, 2)); log(`šŸ’¾ Saved ${leads.length} leads to ${OUTPUT_FILE}`); return OUTPUT_FILE; } catch (error) { log(`āŒ Error saving to JSON: ${error.message}`); return null; } } /** * Extract properties from page */ async function extractProperties(page) { log('šŸ” Extracting property data...'); const properties = await page.evaluate(() => { const results = []; const propertyLinks = Array.from(document.querySelectorAll('a[href*="/property/"]')); propertyLinks.forEach(link => { const text = (link.innerText || link.textContent || '').trim(); const addressMatch = text.match(/^(\d+.+),\s*([A-Za-z\s]+),\s*([A-Z]{2})\s*(\d{5})/); if (addressMatch) { results.push({ fullText: text, address: addressMatch[1].trim(), city: addressMatch[2].trim(), state: addressMatch[3].trim(), zip: addressMatch[4].trim(), url: link.href, remainingText: text.substring(addressMatch[0].length).trim() }); } }); return results; }); const scrapeDate = new Date().toISOString().split('T')[0]; const leads = []; for (const prop of properties) { const sqFtMatch = prop.remainingText.match(/(\d+\.?\d*)\s*k?\s*SF/i); const sqFt = sqFtMatch ? sqFtMatch[0] : ''; const propertyType = prop.remainingText.replace(sqFt, '').trim() || ''; const lead = { scrapeDate, ownerName: '', propertyAddress: prop.address, city: prop.city, state: prop.state, zip: prop.zip, propertyType, squareFootage: sqFt, ownerLocation: '', propertyCount: '', propertyUrl: prop.url, ownerUrl: '', email: '', phone: '' }; leads.push(lead); } log(`āœ… Extracted ${leads.length} properties`); return leads; } /** * Extract owners from page */ async function extractOwners(page) { log('šŸ” Extracting owner data...'); const owners = await page.evaluate(() => { const results = []; const ownerLinks = Array.from(document.querySelectorAll('a[href*="/person/"]')); ownerLinks.forEach(link => { const text = (link.innerText || link.textContent || '').trim(); const lines = text.split('\n').map(l => l.trim()).filter(l => l); if (lines.length >= 2) { const ownerName = lines[0]; const location = lines.find(l => l.includes(',')) || ''; const propertyCountMatch = text.match(/(\d+)\s*propert/i); const propertyCount = propertyCountMatch ? propertyCountMatch[1] : ''; results.push({ ownerName, location, propertyCount, url: link.href, fullText: text }); } }); return results; }); const scrapeDate = new Date().toISOString().split('T')[0]; const leads = []; for (const owner of owners) { let city = ''; let state = ''; let ownerLocation = owner.location; if (ownerLocation.includes(',')) { const parts = ownerLocation.split(',').map(p => p.trim()); if (parts.length >= 2 && /^[A-Z]{2}$/.test(parts[parts.length - 1])) { state = parts[parts.length - 1]; const cityWithPrefix = parts[parts.length - 2]; const cityMatch = cityWithPrefix.match(/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)$/); city = cityMatch ? cityMatch[1] : ''; } else if (parts.length === 2) { city = parts[0]; state = parts[1]; } } const lead = { scrapeDate, ownerName: owner.ownerName, propertyAddress: '', city, state, zip: '', propertyType: '', squareFootage: '', ownerLocation: owner.location, propertyCount: owner.propertyCount, propertyUrl: '', ownerUrl: owner.url, email: '', phone: '' }; leads.push(lead); } log(`āœ… Extracted ${leads.length} owners`); return leads; } /** * Main scraper */ async function scrapeLeads() { log('šŸš€ Starting Reonomy Lead Scraper (JSON Fallback Mode)...\n'); const browser = await puppeteer.launch({ headless: HEADLESS ? 'new' : false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); try { // Login log('\nšŸ“ Step 1: Logging into Reonomy...'); await page.goto('https://app.reonomy.com/#!/account', { waitUntil: 'domcontentloaded', timeout: 60000 }); await sleep(2000); await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 }); await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 }); await page.click('button[type="submit"]'); log('ā³ Logging in...'); await sleep(8000); const url = page.url(); if (url.includes('login') || url.includes('auth')) { throw new Error('Login failed. Please check credentials.'); } log('āœ… Successfully logged in!'); // Navigate to search log('\nšŸ“ Step 2: Navigating to search...'); await page.goto('https://app.reonomy.com/#!/search', { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); log('āœ… On search page'); // Search log(`\nšŸ“ Step 3: Searching for: ${SEARCH_LOCATION}...`); const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="Search"]', { timeout: 10000 }).catch(() => { return page.waitForSelector('input[type="text"]', { timeout: 5000 }); }); if (searchInput) { await searchInput.click({ clickCount: 3 }); await searchInput.type(SEARCH_LOCATION, { delay: 100 }); await sleep(1000); await page.keyboard.press('Enter'); log('ā³ Searching...'); await sleep(5000); } // Extract leads log('\nšŸ“ Step 4: Extracting lead data...'); const allLeads = []; const properties = await extractProperties(page); allLeads.push(...properties); const owners = await extractOwners(page); allLeads.push(...owners); log(`\nāœ… Total leads extracted: ${allLeads.length}`); if (allLeads.length === 0) { log('\nāš ļø No leads found. Taking screenshot for debugging...'); await page.screenshot({ path: '/tmp/reonomy-no-leads.png', fullPage: true }); log('šŸ“ø Screenshot saved: /tmp/reonomy-no-leads.png'); } else { // Save to JSON log('\nšŸ“ Step 5: Saving leads to JSON file...'); saveLeads(allLeads); } log('\nāœ… Scraping complete!'); log(`šŸ’¾ Leads saved to: ${OUTPUT_FILE}`); log(`šŸ“ Log file: ${LOG_FILE}`); return { leadCount: allLeads.length, outputFile: OUTPUT_FILE }; } catch (error) { log(`\nāŒ Error: ${error.message}`); log(error.stack); try { await page.screenshot({ path: '/tmp/reonomy-error.png', fullPage: true }); log('šŸ“ø Error screenshot saved: /tmp/reonomy-error.png'); } catch (e) {} throw error; } finally { await browser.close(); log('\nšŸ”š Browser closed'); } } // Run scrapeLeads() .then(result => { log(`\nšŸŽ‰ Success! ${result.leadCount} leads scraped.`); console.log(`\nšŸ’¾ View your leads at: ${result.outputFile}`); process.exit(0); }) .catch(error => { log(`\nšŸ’„ Scraper failed: ${error.message}`); process.exit(1); });