#!/usr/bin/env node /** * Reonomy Scraper v10 - OWNER TAB EXTRACTION WITH FILTERS * * Key improvements: * - Filters for phone and email in advanced search > owner section * - Extended wait (up to 30s) for contact details to load * - Waits until emails or phones are found before proceeding */ const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); // Configuration const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com'; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532'; const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'Eatontown, NJ'; const HEADLESS = process.env.HEADLESS === 'true'; const MAX_PROPERTIES = 20; // Output files const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads-v10-filters.json'); const LOG_FILE = path.join(__dirname, 'reonomy-scraper-v10.log'); function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Extract ALL data from Owner tab */ async function extractOwnerTabData(page) { return await page.evaluate(() => { const info = { propertyId: '', propertyAddress: '', city: '', state: '', zip: '', squareFootage: '', propertyType: '', emails: [], phones: [], ownerNames: [], pageTitle: document.title, bodyTextSample: '' }; // Extract property ID from URL const propIdMatch = window.location.href.match(/property\/([a-f0-9-]+)/); if (propIdMatch) { info.propertyId = propIdMatch[1]; } // Extract property address from h1, h2, h3 const headingSelectors = ['h1', 'h2', 'h3']; for (const sel of headingSelectors) { const heading = document.querySelector(sel); if (heading) { const text = heading.textContent.trim(); const addressMatch = text.match(/^(\d+[^,]+),\s*([A-Za-z\s,]+),\s*([A-Z]{2})\s*(\d{5})/); if (addressMatch) { info.propertyAddress = addressMatch[0]; info.city = addressMatch[1]?.trim(); info.state = addressMatch[2]?.trim(); info.zip = addressMatch[3]?.trim(); break; } } } // Extract property details (SF, type) const bodyText = document.body.innerText; // Square footage const sfMatch = bodyText.match(/(\d+\.?\d*\s*k?\s*SF)/i); if (sfMatch) { info.squareFootage = sfMatch[0]; } // Property type const typePatterns = ['Warehouse', 'Office Building', 'Retail Stores', 'Industrial', 'General Industrial', 'Medical Building', 'School', 'Religious', 'Supermarket', 'Financial Building']; for (const type of typePatterns) { if (bodyText.includes(type)) { info.propertyType = type; break; } } // Extract emails from mailto: links document.querySelectorAll('a[href^="mailto:"]').forEach(a => { const email = a.href.replace('mailto:', ''); if (email && email.length > 5 && !info.emails.includes(email)) { info.emails.push(email); } }); // Also try email patterns in text const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g; const emailMatches = bodyText.match(emailRegex); if (emailMatches) { emailMatches.forEach(email => { if (!info.emails.includes(email)) { info.emails.push(email); } }); } // Extract phones from tel: links document.querySelectorAll('a[href^="tel:"]').forEach(a => { const phone = a.href.replace('tel:', ''); if (phone && phone.length >= 10 && !info.phones.includes(phone)) { info.phones.push(phone); } }); // Also try phone patterns in text const phoneRegex = /(\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g; const phoneMatches = bodyText.match(phoneRegex); if (phoneMatches) { phoneMatches.forEach(phone => { if (!info.phones.includes(phone)) { info.phones.push(phone); } }); } // Extract owner names from Owner tab section const ownerPatterns = [ /Owner:\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+(?:\s+(?:LLC|LLP|Inc|Corp|Co|Ltd|Partners|Housing|Properties|Realty|Estate|Investments|Management))/g, /Owns\s+\d+\s+properties?\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+(?:\s+(?:LLC|LLP|Inc|Corp|Co|Ltd|Partners|Housing|Properties|Realty|Estate|Investments|Management))/i ]; for (const pattern of ownerPatterns) { const matches = bodyText.match(pattern); if (matches) { matches.forEach(m => { const owner = typeof m === 'string' ? m : m[1]; if (owner && owner.length > 3 && !info.ownerNames.includes(owner)) { info.ownerNames.push(owner); } }); } } // Save sample for debugging info.bodyTextSample = bodyText.substring(0, 500); return info; }); } /** * Extract property IDs from search results */ async function extractPropertyIds(page) { return await page.evaluate(() => { const ids = []; const links = document.querySelectorAll('a[href*="/property/"]'); links.forEach(link => { const href = link.href; const match = href.match(/property\/([a-f0-9-]+)/); if (match) { ids.push({ id: match[1], url: href }); } }); return ids; }); } /** * Check if contact details are present (emails or phones) */ async function hasContactDetails(page) { const data = await extractOwnerTabData(page); return data.emails.length > 0 || data.phones.length > 0; } /** * Apply phone and email filters in advanced search > owner */ async function applyContactFilters(page) { log('šŸ“ Step 3b: Applying phone and email filters...'); // Click on advanced search button log(' šŸ”˜ Clicking advanced search...'); // Try multiple selectors for advanced search button const advancedSearchSelectors = [ 'button[title*="Advanced"]', 'button:contains("Advanced")', 'div[class*="advanced"] button', 'button[class*="filter"]', 'button[aria-label*="filter"]', 'button[aria-label*="Filter"]' ]; let advancedButton = null; for (const selector of advancedSearchSelectors) { try { advancedButton = await page.waitForSelector(selector, { timeout: 3000, visible: true }); if (advancedButton) break; } catch (e) {} } // If no button found, try clicking by text content if (!advancedButton) { log(' šŸ” Looking for "Advanced" button by text...'); advancedButton = await page.evaluateHandle(() => { const buttons = Array.from(document.querySelectorAll('button')); return buttons.find(b => b.textContent.includes('Advanced') || b.textContent.includes('advanced')); }); } if (advancedButton) { await advancedButton.click(); await sleep(2000); log(' āœ… Advanced search opened'); } else { log(' āš ļø Could not find advanced search button, continuing without filters'); return false; } // Navigate to Owner tab in filters log(' šŸ“‹ Navigating to Owner section...'); // Try to find Owner tab in filter panel const ownerTabClicked = await page.evaluate(() => { const tabs = Array.from(document.querySelectorAll('button, div[role="tab"], a[role="tab"]')); const ownerTab = tabs.find(t => t.textContent.includes('Owner') && t.textContent.length < 20); if (ownerTab) { ownerTab.click(); return true; } return false; }); if (ownerTabClicked) { await sleep(1000); log(' āœ… Owner tab selected'); } // Find and enable phone filter log(' šŸ“ž Enabling phone filter...'); const phoneFilterEnabled = await page.evaluate(() => { // Look for checkbox, switch, or toggle for phone const phoneLabels = Array.from(document.querySelectorAll('label, span, div')).filter(el => { const text = el.textContent.toLowerCase(); return text.includes('phone') && (text.includes('available') || text.includes('has') || text.includes('filter')); }); for (const label of phoneLabels) { const checkbox = label.querySelector('input[type="checkbox"]') || label.previousElementSibling?.querySelector('input[type="checkbox"]') || label.parentElement?.querySelector('input[type="checkbox"]'); if (checkbox && !checkbox.checked) { checkbox.click(); return true; } // Also try clicking the label itself if (!checkbox) { const switchEl = label.querySelector('[role="switch"]') || label.querySelector('.switch') || label.querySelector('.toggle'); if (switchEl) { switchEl.click(); return true; } } } return false; }); if (phoneFilterEnabled) { log(' āœ… Phone filter enabled'); } else { log(' āš ļø Could not enable phone filter'); } // Find and enable email filter log(' šŸ“§ Enabling email filter...'); const emailFilterEnabled = await page.evaluate(() => { const emailLabels = Array.from(document.querySelectorAll('label, span, div')).filter(el => { const text = el.textContent.toLowerCase(); return text.includes('email') && (text.includes('available') || text.includes('has') || text.includes('filter')); }); for (const label of emailLabels) { const checkbox = label.querySelector('input[type="checkbox"]') || label.previousElementSibling?.querySelector('input[type="checkbox"]') || label.parentElement?.querySelector('input[type="checkbox"]'); if (checkbox && !checkbox.checked) { checkbox.click(); return true; } if (!checkbox) { const switchEl = label.querySelector('[role="switch"]') || label.querySelector('.switch') || label.querySelector('.toggle'); if (switchEl) { switchEl.click(); return true; } } } return false; }); if (emailFilterEnabled) { log(' āœ… Email filter enabled'); } else { log(' āš ļø Could not enable email filter'); } // Apply filters log(' āœ… Applying filters...'); // Look for apply/search button const applyButton = await page.evaluateHandle(() => { const buttons = Array.from(document.querySelectorAll('button')); return buttons.find(b => b.textContent.includes('Apply') || b.textContent.includes('Search') || b.textContent.includes('Done')); }); if (applyButton) { await applyButton.click(); await sleep(3000); log(' āœ… Filters applied'); } return phoneFilterEnabled || emailFilterEnabled; } /** * Wait for contact details (up to 30 seconds) */ async function waitForContactDetails(page, timeoutMs = 30000) { const startTime = Date.now(); log(` ā³ Waiting for contact details (up to ${timeoutMs/1000}s)...`); while (Date.now() - startTime < timeoutMs) { const hasContacts = await hasContactDetails(page); if (hasContacts) { const data = await extractOwnerTabData(page); log(` āœ… Contact details found! (${data.emails.length} emails, ${data.phones.length} phones)`); return true; } await sleep(1000); } log(' āš ļø No contact details found after timeout'); return false; } /** * Main scraper */ async function scrapeLeads() { log('šŸš€ Starting Reonomy Scraper v10 (FILTERS + EXTENDED WAIT)...\n'); const browser = await puppeteer.launch({ headless: HEADLESS ? 'new' : false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); const leads = []; try { // Login log('šŸ“ Step 1: Logging into Reonomy...'); await page.goto('https://app.reonomy.com/#!/account', { waitUntil: 'domcontentloaded', timeout: 60000 }); await sleep(2000); await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 }); await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 }); await page.click('button[type="submit"]'); log('ā³ Waiting for login...'); await sleep(10000); // Check if logged in const url = page.url(); if (url.includes('login') || url.includes('auth')) { throw new Error('Login failed. Please check credentials.'); } log('āœ… Successfully logged in!'); // Navigate to search log('\nšŸ“ Step 2: Navigating to search...'); await page.goto('https://app.reonomy.com/#!/search', { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); // Perform initial search log(`šŸ“ Step 3: Searching for: ${SEARCH_LOCATION}...`); const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="Search"]', { timeout: 10000 }).catch(() => { return page.waitForSelector('input[type="text"]', { timeout: 5000 }); }); if (searchInput) { await searchInput.click({ clickCount: 3 }); await searchInput.type(SEARCH_LOCATION, { delay: 100 }); await sleep(1000); await page.keyboard.press('Enter'); log('ā³ Searching...'); await sleep(5000); } // Apply phone and email filters await applyContactFilters(page); // Extract search ID from URL const urlMatch = page.url().match(/search\/([a-f0-9-]+)/); if (!urlMatch) { throw new Error('Could not extract search ID from URL'); } const searchId = urlMatch[1]; log(`āœ… Search ID: ${searchId}`); // Extract property IDs log('\nšŸ“ Step 4: Extracting property IDs...'); const propertyIds = await extractPropertyIds(page); log(`āœ… Found ${propertyIds.length} property IDs`); if (propertyIds.length === 0) { log('āš ļø No property IDs found.'); throw new Error('No properties found on search page.'); } // Process each property const propertiesToScrape = propertyIds.slice(0, MAX_PROPERTIES); log(`\nšŸ“ Step 5: Processing ${propertiesToScrape.length} properties...`); for (let i = 0; i < propertiesToScrape.length; i++) { const prop = propertiesToScrape[i]; log(`\n[${i + 1}/${propertiesToScrape.length}] Property ID: ${prop.id}`); // Click on property button (navigate to it) log(` šŸ”— Clicking property...`); const clicked = await page.evaluateHandle((propData) => { const buttons = Array.from(document.querySelectorAll('button')); const target = buttons.find(b => { const link = b.querySelector('a[href*="/property/"]'); return link && link.href.includes(propData.id); }); if (target) { target.scrollIntoView({ behavior: 'smooth', block: 'center' }); target.click(); return { clicked: true }; } }, { id: prop.id }).catch(() => { return { clicked: false }; }); if (!clicked.clicked) { log(` āš ļø Could not click property, trying to navigate directly...`); await page.goto(prop.url, { waitUntil: 'networkidle2', timeout: 30000 }); } // Initial wait for property page to load log(` ā³ Waiting for Owner tab to load...`); await sleep(3000); // Extended wait for contact details (up to 30 seconds) await waitForContactDetails(page, 30000); // Extract data from Owner tab log(` šŸ“Š Extracting data from Owner tab...`); const propertyData = await extractOwnerTabData(page); log(` šŸ“§ Emails: ${propertyData.emails.length} found`); log(` šŸ“ž Phones: ${propertyData.phones.length} found`); log(` šŸ‘¤ Owners: ${propertyData.ownerNames.length} found`); log(` šŸ¢ Address: ${propertyData.propertyAddress || 'N/A'}`); const lead = { scrapeDate: new Date().toISOString().split('T')[0], propertyId: propertyData.propertyId, propertyUrl: propertyData.pageTitle?.includes('property') ? `https://app.reonomy.com/#!/property/${propertyData.propertyId}` : page.url(), address: propertyData.propertyAddress || '', city: propertyData.city || '', state: propertyData.state || '', zip: propertyData.zip || '', squareFootage: propertyData.squareFootage || '', propertyType: propertyData.propertyType || '', ownerNames: propertyData.ownerNames.join('; ') || '', emails: propertyData.emails, phones: propertyData.phones, searchLocation: SEARCH_LOCATION, searchId: searchId, filtersApplied: { phone: true, email: true } }; leads.push(lead); // Go back to search results for next property log(` šŸ”™ Going back to search results...`); await page.goto(`https://app.reonomy.com/#!/search/${searchId}`, { waitUntil: 'networkidle2', timeout: 30000 }); await sleep(3000); } // Save results if (leads.length > 0) { log(`\nāœ… Total leads scraped: ${leads.length}`); const outputData = { scrapeDate: new Date().toISOString(), location: SEARCH_LOCATION, searchId: searchId, leadCount: leads.length, filters: { phone: true, email: true }, leads: leads }; fs.writeFileSync(OUTPUT_FILE, JSON.stringify(outputData, null, 2)); log(`šŸ’¾ Saved to: ${OUTPUT_FILE}`); } else { log('\nāš ļø No leads scraped.'); } log('\nāœ… Scraping complete!'); return { leadCount: leads.length, outputFile: OUTPUT_FILE }; } catch (error) { log(`\nāŒ Error: ${error.message}`); log(error.stack); try { await page.screenshot({ path: '/tmp/reonomy-v10-error.png', fullPage: true }); log('šŸ“ø Error screenshot saved: /tmp/reonomy-v10-error.png'); } catch (e) {} throw error; } finally { await browser.close(); log('\nšŸ”š Browser closed'); } } // Run scrapeLeads() .then(result => { log(`\nšŸŽ‰ Success! ${result.leadCount} leads scraped.`); console.log(`\nšŸ’¾ View your leads at: ${result.outputFile}`); process.exit(0); }) .catch(error => { log(`\nšŸ’„ Scraper failed: ${error.message}`); process.exit(1); });