#!/usr/bin/env node const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com'; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532'; const SEARCH_ID = process.env.REONOMY_SEARCH_ID || '504a2d13-d88f-4213-9ac6-a7c8bc7c20c6'; const MAX_PROPERTIES = parseInt(process.env.MAX_PROPERTIES) || 20; const HEADLESS = process.env.HEADLESS !== 'false'; const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads-v9-working.json'); const LOG_FILE = path.join(__dirname, 'reonomy-scraper-v9-working.log'); function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } async function extractOwnerTabData(page) { log('šŸ“Š Extracting Owner tab data...'); const propIdMatch = page.url().match(/property\/([a-f0-9-]+)/); const propertyId = propIdMatch ? propIdMatch[1] : ''; const headingSelectors = ['h1', 'h2', 'h3']; let propertyAddress = ''; let city = ''; let state = ''; let zip = ''; for (const sel of headingSelectors) { const heading = await page.$(sel); if (heading) { const text = (await page.evaluate(el => el.textContent, heading)).trim(); const addressMatch = text.match(/^(\d+[^,]+),\s*([A-Za-z\s,]+),\s*([A-Z]{2})\s*(\d{5})/); if (addressMatch) { propertyAddress = addressMatch[0]; city = addressMatch[1]?.trim() || ''; state = addressMatch[2]?.trim() || ''; zip = addressMatch[3]?.trim() || ''; log(` šŸ“ Address: ${text}`); break; } } } const bodyText = await page.evaluate(() => { return { emails: [], phones: [], ownerNames: [], pageTitle: document.title, bodyTextSample: '' }; }); const bodyTextContent = JSON.parse(bodyText).result || ''; const sfMatch = bodyTextContent.match(/(\d+\.?\d*\s*k?\s*SF)/i); const squareFootage = sfMatch ? sfMatch[0] : ''; const typePatterns = [ 'Warehouse', 'Office Building', 'Retail Stores', 'Industrial', 'General Industrial', 'Medical Building', 'School', 'Religious', 'Supermarket', 'Financial Building' ]; let propertyType = ''; for (const type of typePatterns) { if (bodyTextContent.includes(type)) { propertyType = type; log(` šŸ¢ Property Type: ${type}`); break; } } const ownerPatterns = [ /Owns\s+(\d+)\s+properties?\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+(?:\s+(?:LLC|LLP|Inc|Corp|Co|Ltd|Partners|Housing|Properties|Realty|Estate|Investments|Management))/g, /Owns\s+(\d+)\s+properties?\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+(?:\s+(?:LLC|LLP|Inc|Corp|Co|Ltd|Partners|Housing|Properties|Realty|Estate|Investments|Management))/i ]; let ownerNames = []; for (const pattern of ownerPatterns) { const matches = bodyTextContent.match(pattern); if (matches) { matches.forEach(m => { const owner = typeof m === 'string' ? m : m[1]; if (owner && owner.length > 3 && !ownerNames.includes(owner)) { ownerNames.push(owner); } }); } } const ownerData = { propertyId: propertyId, propertyAddress: propertyAddress, city: city, state: state, zip: zip, squareFootage: squareFootage, propertyType: propertyType, emails: [], phones: [], ownerNames: ownerNames }; log(` šŸ‘¤ Owners found: ${ownerNames.length}`); return ownerData; } async function scrapeLeads() { log('šŸš€ Starting Reonomy Scraper v9.1 (FIXED EDITION)...\n'); const browser = await puppeteer.launch({ headless: HEADLESS ? 'new' : false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); try { log('\nšŸ” Step 1: Logging into Reonomy...\n'); await page.goto('https://app.reonomy.com/#!/account', { waitUntil: 'domcontentloaded', timeout: 60000 }); await sleep(2000); await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 }); await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 }); await page.click('button[type="submit"]'); log('ā³ Waiting for login...'); await sleep(15000); const url = page.url(); if (url.includes('login') || url.includes('auth')) { throw new Error('Login failed. Please check credentials.'); } log('āœ… Successfully logged in!'); log('\nšŸ“ Step 2: Navigating to search...\n'); await page.goto(`https://app.reonomy.com/#!/search/${SEARCH_ID}`, { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); const urlMatch = page.url().match(/search\/([a-f0-9-]+)/); if (!urlMatch) { throw new Error('Could not extract search ID from URL'); } const searchId = urlMatch[1]; log(`āœ… Search ID: ${searchId}`); log('\nšŸ“ Step 3: Extracting property IDs...\n'); const propertyIds = await page.evaluate(() => { const ids = []; const links = document.querySelectorAll('a[href*="/property/"]'); links.forEach(link => { const href = link.href; const match = href.match(/property\/([a-f0-9-]+)/); if (match) { ids.push({ id: match[1], url: `https://app.reonomy.com/#!/search/${window.location.href.split('/')[4]}/property/${match[1]}` }); } }); return ids; }); log(`āœ… Found ${propertyIds.length} property IDs`); if (propertyIds.length === 0) { log('āš ļø No property IDs found.'); throw new Error('No properties found on search page.'); } const propertiesToScrape = propertyIds.slice(0, MAX_PROPERTIES); log(`\nšŸ“ Step 4: Processing ${propertiesToScrape.length} properties...\n`); const leads = []; for (let i = 0; i < propertiesToScrape.length; i++) { const prop = propertiesToScrape[i]; log(`\n[${i + 1}/${propertiesToScrape.length}] Property ID: ${prop.id}`); log(` šŸ”— Clicking property...`); const clicked = await page.evaluateHandle((propData) => { const buttons = Array.from(document.querySelectorAll('button')); const target = buttons.find(b => { const link = b.querySelector('a[href*="/property/"]'); return link && link.href.includes(propData.id); }); if (target) { target.scrollIntoView({ behavior: 'smooth', block: 'center' }); target.click(); return { clicked: true }; } }, { id: prop.id }).catch(() => { return { clicked: false }; }); if (!clicked.clicked) { log(` āš ļø Could not click property, trying to navigate directly...`); await page.goto(prop.url, { waitUntil: 'networkidle2', timeout: 30000 }); } log(` ā³ Waiting for Owner tab to load...`); await sleep(8000); log(` šŸ“Š Extracting data from Owner tab...`); const propertyData = await extractOwnerTabData(page); const lead = { scrapeDate: new Date().toISOString().split('T')[0], propertyId: prop.id, propertyUrl: page.url(), address: propertyData.propertyAddress || '', city: propertyData.city || '', state: propertyData.state || '', zip: propertyData.zip || '', squareFootage: propertyData.squareFootage || '', propertyType: propertyData.propertyType || '', ownerNames: propertyData.ownerNames.join('; ') || '', emails: propertyData.emails, phones: propertyData.phones }; log(` šŸ“§ Emails: ${propertyData.emails.length}`); log(` šŸ“ž Phones: ${propertyData.phones.length}`); log(` šŸ‘¤ Owners: ${propertyData.ownerNames.length}`); log(` šŸ“ Address: ${propertyData.propertyAddress || 'N/A'}`); leads.push(lead); log(` šŸ”™ Going back to search results...`); await page.goto(`https://app.reonomy.com/#!/search/${searchId}`, { waitUntil: 'networkidle2', timeout: 30000 }); await sleep(3000); } if (leads.length > 0) { log(`\nāœ… Total leads scraped: ${leads.length}`); const outputData = { scrapeDate: new Date().toISOString(), searchId: searchId, leadCount: leads.length, leads: leads }; fs.writeFileSync(OUTPUT_FILE, JSON.stringify(outputData, null, 2)); log(`šŸ’¾ Saved to: ${OUTPUT_FILE}`); } else { log('\nāš ļø No leads scraped.'); } log('\nāœ… Scraping complete!'); return { leadCount: leads.length, outputFile: OUTPUT_FILE }; } catch (error) { log(`\nāŒ Error: ${error.message}`); log(error.stack); try { await page.screenshot({ path: '/tmp/reonomy-v9-error.png', fullPage: true }); log('šŸ“ø Error screenshot saved: /tmp/reonomy-v9-error.png'); } catch (e) {} throw error; } finally { await browser.close(); log('\nšŸ”š Browser closed'); process.exit(1); } } scrapeLeads() .then(result => { log(`\nšŸŽ‰ Success! ${result.leadCount} leads scraped.`); console.log(`\nšŸ’¾ View your leads at: ${result.outputFile}`); process.exit(0); }) .catch(error => { log(`\nšŸ’„ Scraper failed: ${error.message}`); process.exit(1); });