#!/usr/bin/env node /** * Reonomy Scraper v8 - FULL EXTRACTION WITH CLICK-THROUGH * * Workflow: * 1. Login * 2. Search for location * 3. Apply advanced filters (Has Phone + Has Email) * 4. Extract property IDs * 5. For each property: * - Click on property button * - Wait for property page to fully load * - Look for contact info tabs/sections * - Click "View Contact" or "Ownership" if needed * - Extract ALL data (emails, phones, owners, addresses, property details) * - Go back to search results * - Continue to next property */ const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); // Configuration const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com'; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532'; const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'Eatontown, NJ'; const HEADLESS = process.env.HEADLESS === 'true'; const MAX_PROPERTIES = 20; // Longer waits for full content loading const AFTER_CLICK_WAIT_MS = 5000; const AFTER_TAB_SWITCH_WAIT_MS = 3000; const BACK_NAVIGATION_WAIT_MS = 3000; // Output files const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads-v8-full.json'); const LOG_FILE = path.join(__dirname, 'reonomy-scraper-v8.log'); function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Apply advanced filters */ async function applyAdvancedFilters(page) { log('šŸ” Step 2.1: Applying advanced filters (Has Phone + Has Email)...'); try { // Look for "More Filters" button const moreFiltersBtn = await page.waitForSelector('button:has-text("More Filters"), button[aria-label*="Filters"], button:has-text("Filters")', { timeout: 15000 }).catch(() => null); if (moreFiltersBtn) { log(' šŸ“‹ Clicking "More Filters"...'); await moreFiltersBtn.click(); await sleep(2000); } // Look for "Has Phone" filter let hasPhoneFound = false; const phoneSelectors = [ 'label:has-text("Has Phone"), label:has-text("phone") input[type="checkbox"]', 'input[type="checkbox"][data-test*="phone"], input[type="checkbox"][id*="phone"]', '.filter-item:has-text("Has Phone") input[type="checkbox"]' ]; for (const selector of phoneSelectors) { const checkbox = await page.waitForSelector(selector, { timeout: 3000 }).catch(() => null); if (checkbox) { const isChecked = await (await page.evaluate(el => el.checked, { el }).catch(() => false)); if (!isChecked) { log(' ā˜‘ļø Checking "Has Phone" filter...'); await checkbox.click(); await sleep(500); hasPhoneFound = true; break; } } } if (!hasPhoneFound) { log(' āš ļø "Has Phone" filter not found, skipping'); } await sleep(1000); // Look for "Has Email" filter let hasEmailFound = false; const emailSelectors = [ 'label:has-text("Has Email"), label:has-text("email") input[type="checkbox"]', 'input[type="checkbox"][data-test*="email"], input[type="checkbox"][id*="email"]', '.filter-item:has-text("Has Email") input[type="checkbox"]' ]; for (const selector of emailSelectors) { const checkbox = await page.waitForSelector(selector, { timeout: 3000 }).catch(() => null); if (checkbox) { const isChecked = await (await page.evaluate(el => el.checked, { el }).catch(() => false)); if (!isChecked) { log(' ā˜‘ļø Checking "Has Email" filter...'); await checkbox.click(); await sleep(500); hasEmailFound = true; break; } } } if (!hasEmailFound) { log(' āš ļø "Has Email" filter not found, skipping'); } log('āœ… Filters applied'); } catch (error) { log(` āš ļø Filter application had issues: ${error.message}`); } } /** * Extract ALL available data from property page */ async function extractFullPropertyData(page, propertyUrl) { log(' šŸ”Ž Extracting full property data...'); const data = await page.evaluate(() => { const result = { propertyId: '', address: '', city: '', state: '', zip: '', propertyType: '', squareFootage: '', ownerName: '', ownerLocation: '', propertyCount: '', emails: [], phones: [], contacts: [], pageTitle: document.title, url: window.location.href }; // Extract property ID from URL const propIdMatch = window.location.href.match(/property\/([a-f0-9-]+)/); if (propIdMatch) { result.propertyId = propIdMatch[1]; } // Extract property address (look in multiple places) const addressPatterns = [ // h1, h2, h3, h4, h5, h6 document.querySelector('h1, h2, h3, h4, h5, h6')?.textContent?.trim(), // Heading with "Highway" or "Avenue" or "Street" etc. ...Array.from(document.querySelectorAll('[role="heading"], h1, h2, h3')).map(h => h.textContent?.trim()).find(t => t && (t.includes('Highway') || t.includes('Avenue') || t.includes('Street') || t.includes('Rd') || t.includes('Dr') || t.includes('Way') || t.includes('Ln') || t.includes('Blvd') || t.includes('Rte')) ]; for (const addr of addressPatterns) { if (addr && addr.length > 10 && addr.length < 200) { result.address = addr; break; } } // Extract city, state, zip from address const addressMatch = result.address.match(/,\s*([A-Za-z\s]+),\s*([A-Z]{2})\s*(\d{5})/); if (addressMatch) { result.city = addressMatch[1]?.trim(); result.state = addressMatch[2]?.trim(); result.zip = addressMatch[3]?.trim(); } // Extract property type const typePatterns = ['SF', 'Acre', 'General Industrial', 'Retail Stores', 'Warehouse', 'Office Building', 'Medical Building']; const bodyText = document.body.innerText; for (const type of typePatterns) { if (bodyText.includes(type)) { result.propertyType = type; break; } } // Extract square footage const sfMatch = bodyText.match(/(\d+\.?\d*\s*k?\s*SF)/i); if (sfMatch) { result.squareFootage = sfMatch[0]; } // Extract emails (from mailto: links and email patterns) document.querySelectorAll('a[href^="mailto:"]').forEach(a => { const email = a.href.replace('mailto:', ''); if (email && email.length > 5 && !result.emails.includes(email)) { result.emails.push(email); } }); // Also try email regex patterns in text const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g; const emailMatches = bodyText.match(emailRegex); if (emailMatches) { emailMatches.forEach(email => { if (!result.emails.includes(email)) { result.emails.push(email); } }); } // Extract phones (from tel: links and phone patterns) document.querySelectorAll('a[href^="tel:"]').forEach(a => { const phone = a.href.replace('tel:', ''); if (phone && phone.length > 7 && !result.phones.includes(phone)) { result.phones.push(phone); } }); // Also try phone regex patterns in text const phoneRegex = /\(?:(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})|(\d{10})/g; const phoneMatches = bodyText.match(phoneRegex); if (phoneMatches) { phoneMatches.forEach(match => { const phone = match.replace(/^:?\s*|\.|-/g, ''); if (phone && phone.length >= 10 && !result.phones.includes(phone)) { result.phones.push(phone); } }); } // Extract owner names const ownerPatterns = [ /Owner:\s*([A-Za-z\s]+)/g, /Owns\s+\d+\s+properties\s*in\s*([A-Za-z\s,]+)/i, /([A-Z][a-z]+\s+[A-Z][a-z]+\s+(?:LLC|LLP|Inc|Corp|Co|Ltd|Partners|Housing|Properties|Realty|Estate|Investments|Management))/g ]; const ownerMatches = [...new Set()]; for (const pattern of ownerPatterns) { const matches = bodyText.match(pattern); if (matches) { matches.forEach(m => { const owner = typeof m === 'string' ? m : (m[1] || m); if (owner && owner.length > 3 && !result.owners.includes(owner)) { ownerMatches.push(owner); } }); } } result.owners = Array.from(ownerMatches); // Extract property count const propCountMatch = bodyText.match(/Owns\s+(\d+)\s+properties/i); if (propCountMatch) { result.propertyCount = propCountMatch[1]; } // Look for owner location const locationPattern = /\s+in\s+([A-Za-z\s,]+(?:\s*,\s+[A-Z]{2})?/i; const locationMatch = bodyText.match(locationPattern); if (locationMatch) { result.ownerLocation = locationMatch[1]?.trim(); } // Look for contact tabs/buttons const tabSelectors = [ 'button:has-text("View Contact"), button:has-text("Contact")', 'button:has-text("Ownership"), button:has-text("Owner")', '[role="tab"]:has-text("Contact")' ]; for (const sel of tabSelectors) { const tab = document.querySelector(sel); if (tab) { result.hasContactButton = true; result.contactTabText = tab.textContent?.trim(); break; } } // Extract all contact section text (for debug) const contactSection = document.body.innerText.substring(0, 1000); result.contactSectionSample = contactSection; return result; }); log(` šŸ“§ Emails: ${data.emails.length} found`); log(` šŸ“ž Phones: ${data.phones.length} found`); log(` šŸ‘¤ Owners: ${data.owners.length} found`); return data; } /** * Click on property button and navigate to it */ async function clickAndNavigateToProperty(page, propertyId) { log(`\nšŸ”— Clicking property ${propertyId}...`); const clicked = await page.evaluate((propId) => { const buttons = Array.from(document.querySelectorAll('button')); // Try to find button with property ID in its link const targetButton = buttons.find(b => { const link = b.querySelector('a[href*="/property/"]'); if (link) { const href = link.href; const match = href.match(/property\/([a-f0-9-]+)/); return match && match[1] === propId; } }); // If not found by link, try by text content const textButton = buttons.find(b => { const text = b.textContent || b.innerText || ''; return text.includes(propId); }); if (targetButton) { targetButton.scrollIntoView({ behavior: 'smooth', block: 'center' }); setTimeout(() => { targetButton.click(); }, 100); return { clicked: true }; } else if (textButton) { textButton.scrollIntoView({ behavior: 'smooth', block: 'center' }); setTimeout(() => { textButton.click(); }, 100); return { clicked: true }; } return { clicked: false }; }, { propertyId }).catch(() => { return { clicked: false }; }); await sleep(2000); return clicked; } /** * Try to find and click "View Contact" tab */ async function clickViewContactTab(page) { log(' šŸ“‹ Looking for "View Contact" tab...'); const clicked = await page.evaluate(() => { const tabs = ['button:has-text("View Contact")', 'button:has-text("Contact")', 'button:has-text("Ownership")', '[role="tab"]:has-text("Contact")']; for (const selector of tabs) { const tab = document.querySelector(selector); if (tab) { tab.scrollIntoView({ behavior: 'smooth', block: 'center' }); setTimeout(() => { tab.click(); }, 200); return { clicked: true }; } } return { clicked: false }; }).catch(() => { return { clicked: false }; }); if (clicked && clicked.clicked) { log(' āœ… Clicked contact tab'); await sleep(AFTER_TAB_SWITCH_WAIT_MS); } else { log(' āš ļø No "View Contact" tab found'); } } /** * Main scraper */ async function scrapeLeads() { log('šŸš€ Starting Reonomy Scraper v8 (FULL EXTRACTION)...\n'); const browser = await puppeteer.launch({ headless: HEADLESS ? 'new' : false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); const leads = []; try { // Step 1: Login log('\nšŸ“ Step 1: Logging into Reonomy...'); await page.goto('https://app.reonomy.com/#!/account', { waitUntil: 'domcontentloaded', timeout: 60000 }); await sleep(2000); await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 }); await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 }); await page.click('button[type="submit"]'); log(' ā³ Waiting for login...'); await sleep(10000); // Check if logged in const url = page.url(); if (url.includes('login') || url.includes('auth')) { throw new Error('Login failed. Please check credentials.'); } log('āœ… Successfully logged in!'); // Step 2: Navigate to search log('\nšŸ“ Step 2: Navigating to search...'); await page.goto('https://app.reonomy.com/#!/search', { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); // Step 3: Apply advanced filters log('\nšŸ“ Step 3: Applying filters for contact info...'); await applyAdvancedFilters(page); // Step 4: Perform search log(`\nšŸ“ Step 4: Searching for: ${SEARCH_LOCATION}...`); const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="Search"]', { timeout: 10000 }).catch(() => { return page.waitForSelector('input[type="text"]', { timeout: 5000 }); }); if (searchInput) { await searchInput.click({ clickCount: 3 }); await searchInput.type(SEARCH_LOCATION, { delay: 100 }); await sleep(1000); await page.keyboard.press('Enter'); log(' ā³ Searching...'); await sleep(5000); } // Extract search ID const urlMatch = page.url().match(/search\/([a-f0-9-]+)/); if (!urlMatch) { throw new Error('Could not extract search ID from URL'); } const searchId = urlMatch[1]; log(`āœ… Search ID: ${searchId}`); // Step 5: Extract property IDs log('\nšŸ“ Step 5: Extracting property IDs...'); const propertyIds = await page.evaluate(() => { const ids = []; const links = document.querySelectorAll('a[href*="/property/"]'); links.forEach(link => { const href = link.href; const match = href.match(/property\/([a-f0-9-]+)/); if (match) { ids.push({ id: match[1], url: href }); } }); return ids; }); log(`āœ… Found ${propertyIds.length} property IDs`); if (propertyIds.length === 0) { log('āš ļø No property IDs found.'); throw new Error('No properties found on search page.'); } // Step 6: Click through properties const propertiesToScrape = propertyIds.slice(0, MAX_PROPERTIES); log(`\nšŸ“ Step 6: Clicking through ${propertiesToScrape.length} properties...`); for (let i = 0; i < propertiesToScrape.length; i++) { const prop = propertiesToScrape[i]; log(`\n[${i + 1}/${propertiesToScrape.length}] Property: ${prop.id}`); // Click on property button const clickResult = await clickAndNavigateToProperty(page, prop.id); if (!clickResult.clicked) { log(` āš ļø Could not click property ${prop.id}`); continue; } // Wait for property page to load log(` ā³ Waiting for property page to load...`); await sleep(AFTER_CLICK_WAIT_MS); // Try to click "View Contact" tab await clickViewContactTab(page); // Additional wait for dynamic content log(` ā³ Waiting for dynamic content...`); await sleep(AFTER_TAB_SWITCH_WAIT_MS); // Extract ALL data const propertyData = await extractFullPropertyData(page); log(` šŸ“§ Emails found: ${propertyData.emails.length}`); log(` šŸ“ž Phones found: ${propertyData.phones.length}`); log(` šŸ‘¤ Owners found: ${propertyData.owners.length}`); // Create lead object const lead = { scrapeDate: new Date().toISOString().split('T')[0], propertyId: prop.id, propertyUrl: page.url(), address: propertyData.address || '', city: propertyData.city || '', state: propertyData.state || '', zip: propertyData.zip || '', propertyType: propertyData.propertyType || '', squareFootage: propertyData.squareFootage || '', ownerNames: propertyData.owners.join(', '), ownerLocation: propertyData.ownerLocation || '', propertyCount: propertyData.propertyCount || '', emails: propertyData.emails, phones: propertyData.phones, pageTitle: propertyData.pageTitle, searchLocation: SEARCH_LOCATION, searchId: searchId, hasContactButton: propertyData.hasContactButton || false, contactTabText: propertyData.contactTabText || '' }; leads.push(lead); // Go back to search results log(` šŸ”™ Going back to search results...`); await page.goto(`https://app.reonomy.com/#!/search/${searchId}`, { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(BACK_NAVIGATION_WAIT_MS); // Rate limiting const rateDelay = 3000; log(` āø Rate limit: ${rateDelay}ms...`); await sleep(rateDelay); } // Save results if (leads.length > 0) { log(`\nāœ… Total leads scraped: ${leads.length}`); const outputData = { scrapeDate: new Date().toISOString(), location: SEARCH_LOCATION, searchId: searchId, leadCount: leads.length, leads: leads }; fs.writeFileSync(OUTPUT_FILE, JSON.stringify(outputData, null, 2)); log(`šŸ’¾ Saved to: ${OUTPUT_FILE}`); } else { log('\nāš ļø No leads scraped.'); } log('\nāœ… Scraping complete!'); return { leadCount: leads.length, outputFile: OUTPUT_FILE }; } catch (error) { log(`\nāŒ Error: ${error.message}`); log(error.stack); try { await page.screenshot({ path: '/tmp/reonomy-v8-error.png', fullPage: true }); log('šŸ“ø Error screenshot saved: /tmp/reonomy-v8-error.png'); } catch (e) {} throw error; } finally { await browser.close(); log('\nšŸ”š Browser closed'); } } // Run scrapeLeads() .then(result => { log(`\nšŸŽ‰ Success! ${result.leadCount} leads scraped.`); console.log(`\nšŸ’¾ View your leads at: ${result.outputFile}`); process.exit(0); }) .catch(error => { log(`\nšŸ’„ Scraper failed: ${error.message}`); process.exit(1); });