#!/usr/bin/env node /** * Reonomy Lead Scraper * * Scrapes property and owner leads from Reonomy and exports to Google Sheets. * * Usage: * node reonomy-scraper.js [options] * * Environment Variables: * REONOMY_EMAIL - Reonomy login email * REONOMY_PASSWORD - Reonomy login password * REONOMY_SHEET_ID - Google Sheet ID (optional, will create new sheet if not provided) * REONOMY_LOCATION - Search location (e.g., "New York, NY") * HEADLESS - Set to "true" for headless mode */ const puppeteer = require('puppeteer'); const { execSync } = require('child_process'); const fs = require('fs'); const path = require('path'); // Configuration from environment variables const REONOMY_EMAIL = process.env.REONOMY_EMAIL; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD; const SHEET_ID = process.env.REONOMY_SHEET_ID; const SHEET_TITLE = process.env.REONOMY_SHEET_TITLE || 'Reonomy Leads'; const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'New York, NY'; const HEADLESS = process.env.HEADLESS === 'true'; const MAX_PROPERTIES = 20; // Skip property pages (no contact info there) const MAX_OWNERS = 2; // Limit number of owners to scrape to avoid rate limiting const PAGE_DELAY_MS = 3000; // Delay between page visits for rate limiting // Validate credentials if (!REONOMY_EMAIL || !REONOMY_PASSWORD) { console.error('āŒ Error: REONOMY_EMAIL and REONOMY_PASSWORD environment variables are required.'); console.error(' Set them like: REONOMY_EMAIL="..." REONOMY_PASSWORD="..." node reonomy-scraper.js'); process.exit(1); } // Log file const LOG_FILE = path.join(__dirname, 'reonomy-scraper.log'); function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Execute gog CLI command */ function gogCommand(command) { try { // Add account if specified let fullCommand = `gog ${command}`; const account = process.env.GOG_ACCOUNT; if (account) { fullCommand = `gog --account "${account}" ${command}`; } const output = execSync(fullCommand, { encoding: 'utf-8', timeout: 30000, stdio: ['pipe', 'pipe', 'pipe'] }); // Combine stdout and stderr const combinedOutput = (output || '').trim(); return combinedOutput; } catch (error) { // Check if it's a real error or just stderr output if (error.status !== 0) { const stderr = error.stderr ? error.stderr.toString() : ''; const stdout = error.stdout ? error.stdout.toString() : ''; // If we got useful output in stdout despite the error status, return it if (stdout && stdout.trim() && !stderr.includes('error') && !stderr.includes('Error')) { return stdout.trim(); } // Otherwise throw the error if (stderr.includes('error') || stderr.includes('Error')) { throw new Error(`gog command failed: ${stderr}`); } throw new Error(`gog command failed: ${stderr || stdout || 'Unknown error'}`); } throw error; } } /** * Get or create Google Sheet */ async function getOrCreateSheet() { log('šŸ“Š Checking Google Sheets...'); if (SHEET_ID) { log(`āœ… Using existing sheet: ${SHEET_ID}`); return SHEET_ID; } try { // Create a new sheet log('šŸ“ Creating new Google Sheet...'); const output = gogCommand(`sheets create "${SHEET_TITLE}" --json`); try { const result = JSON.parse(output); const newSheetId = result.spreadsheetId || result.id; log(`āœ… Created new sheet: ${newSheetId}`); return newSheetId; } catch (error) { // Try to extract ID from text output const match = output.match(/([0-9A-Za-z_-]{20,})/); if (match) { log(`āœ… Created new sheet: ${match[1]}`); return match[1]; } throw new Error('Could not parse sheet ID from gog output'); } } catch (error) { log(`āš ļø Could not create Google Sheet: ${error.message}`); log('šŸ’¾ Leads will be saved to JSON file instead'); return null; } } /** * Initialize sheet with headers */ async function initializeSheet(sheetId) { log('šŸ“‹ Initializing sheet headers...'); const headers = [ 'Scrape Date', 'Owner Name', 'Property Address', 'City', 'State', 'ZIP', 'Property Type', 'Square Footage', 'Owner Location', 'Property Count', 'Property URL', 'Owner URL', 'Email', 'Phone' ]; const headerString = headers.map(h => `"${h}"`).join(' '); try { gogCommand(`sheets update ${sheetId} "Sheet1!A1" ${headerString}`); log('āœ… Sheet headers initialized'); } catch (error) { log(`āš ļø Could not set headers: ${error.message}`); } } /** * Append row to Google Sheet or save to JSON file */ async function appendToSheet(sheetId, rowData) { if (sheetId) { const values = Object.values(rowData).map(v => { if (v === null || v === undefined) return ''; // Escape quotes const str = String(v).replace(/"/g, '""'); return `"${str}"`; }).join(' '); try { gogCommand(`sheets append ${sheetId} "Sheet1!A:N" ${values}`); log(`āœ… Added: ${rowData.ownerName} - ${rowData.propertyAddress}`); } catch (error) { log(`āŒ Error appending to sheet: ${error.message}`); } } else { // Save to JSON file jsonLeads.push(rowData); log(`āœ… Collected: ${rowData.ownerName} - ${rowData.propertyAddress}`); } } /** * Save leads to JSON file */ function saveToJsonFile(leads) { const filename = path.join(__dirname, 'reonomy-leads.json'); const data = { scrapeDate: new Date().toISOString(), leadCount: leads.length, location: SEARCH_LOCATION, leads: leads }; try { fs.writeFileSync(filename, JSON.stringify(data, null, 2)); log(`šŸ’¾ Saved ${leads.length} leads to ${filename}`); return filename; } catch (error) { log(`āŒ Error saving to JSON: ${error.message}`); return null; } } // Global array to store leads when not using Google Sheets let jsonLeads = []; /** * Extract contact info from a property detail page */ async function extractPropertyContactInfo(page, propertyUrl) { log(` šŸ  Visiting property: ${propertyUrl}`); try { await page.goto(propertyUrl, { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(2000); // Wait for dynamic content to load const contactInfo = await page.evaluate(() => { const info = { email: '', phone: '', ownerName: '', propertyAddress: '', city: '', state: '', zip: '', propertyType: '', squareFootage: '' }; // Extract email - multiple possible selectors (specific IDs first) const emailSelectors = [ '#people-contact-email-id', '[data-person-id="people-contact-email-id"]', 'a[href^="mailto:"]', '[data-test*="email"]', '[data-testid*="email"]', '.email-address', '.owner-email', '.contact-info [data-test*="email"]' ]; for (const selector of emailSelectors) { const emailEl = document.querySelector(selector); if (emailEl) { info.email = emailEl.innerText || emailEl.textContent; // Clean up email if it's in a mailto: link if (info.email.startsWith('mailto:')) { info.email = info.email.replace('mailto:', ''); } break; } } // Extract phone - multiple possible selectors (specific IDs first) const phoneSelectors = [ '#people-contact-phone-1', '#people-contact-phone-2', '#people-contact-phone-3', '[data-person-id="people-contact-phone-1"]', '[data-person-id="people-contact-phone-2"]', '[data-person-id="people-contact-phone-3"]', 'a[href^="tel:"]', '[data-test*="phone"]', '[data-testid*="phone"]', '.phone-number', '.contact-info [data-test*="phone"]', '.owner-phone' ]; for (const selector of phoneSelectors) { const phoneEl = document.querySelector(selector); if (phoneEl) { info.phone = phoneEl.innerText || phoneEl.textContent; // Clean up phone if it's in a tel: link if (info.phone.startsWith('tel:')) { info.phone = info.phone.replace('tel:', ''); } break; } } // Also try to extract from text content by regex const bodyText = document.body.innerText; // Email regex patterns const emailPatterns = [ /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g, /Email[:\s]*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i ]; if (!info.email) { for (const pattern of emailPatterns) { const match = bodyText.match(pattern); if (match && match[0]) { info.email = match[0].replace(/^email[:\s]*/i, ''); break; } } } // Phone regex patterns const phonePatterns = [ /\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/g, /\+?1?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/g, /Phone[:\s]*[+]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/i, /Tel[:\s]*[+]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/i ]; if (!info.phone) { for (const pattern of phonePatterns) { const matches = bodyText.match(pattern); if (matches) { // Use the first valid phone number found info.phone = matches[0].replace(/^phone[:\s]*/i, '').replace(/^tel[:\s]*/i, ''); break; } } } // Extract owner name from property page const ownerSelectors = [ '[data-test*="owner"]', '[data-testid*="owner"]', '.owner-name', '.owner', 'h1', 'h2' ]; for (const selector of ownerSelectors) { const ownerEl = document.querySelector(selector); if (ownerEl) { const text = ownerEl.innerText || ownerEl.textContent; if (text && text.length > 2 && text.length < 100) { info.ownerName = text; break; } } } // Extract property address const addressSelectors = [ '[data-test*="address"]', '[data-testid*="address"]', '.property-address', '.address', 'h1', 'h2' ]; for (const selector of addressSelectors) { const addrEl = document.querySelector(selector); if (addrEl) { const text = addrEl.innerText || addrEl.textContent; if (text && text.match(/\d+/)) { info.propertyAddress = text; break; } } } // Extract property type const typeSelectors = [ '[data-test*="type"]', '[data-testid*="type"]', '.property-type', '.type' ]; for (const selector of typeSelectors) { const typeEl = document.querySelector(selector); if (typeEl) { info.propertyType = typeEl.innerText || typeEl.textContent; break; } } // Extract square footage const sfSelectors = [ '[data-test*="sf"]', '[data-testid*="sf"]', '.square-footage', '.sf', '.sqft' ]; for (const selector of sfSelectors) { const sfEl = document.querySelector(selector); if (sfEl) { info.squareFootage = sfEl.innerText || sfEl.textContent; break; } } return info; }); log(` šŸ“§ Email: ${contactInfo.email || 'Not found'}`); log(` šŸ“ž Phone: ${contactInfo.phone || 'Not found'}`); return contactInfo; } catch (error) { log(` āš ļø Error extracting from property page: ${error.message}`); return { email: '', phone: '', ownerName: '', propertyAddress: '', city: '', state: '', zip: '', propertyType: '', squareFootage: '' }; } } /** * Extract contact info from an owner detail page */ async function extractOwnerContactInfo(page, ownerUrl) { log(` šŸ‘¤ Visiting owner: ${ownerUrl}`); try { await page.goto(ownerUrl, { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(2000); // Wait for dynamic content to load // DEBUG: Save screenshot const ownerMatch = ownerUrl.match(/person\/([a-zA-Z0-9_-]+)/); const ownerId = ownerMatch ? ownerMatch[1] : 'unknown'; const debugPath = `/tmp/reonomy-owner-${ownerId}.png`; await page.screenshot({ path: debugPath, fullPage: true }); log(` šŸ“ø Debug screenshot saved: ${debugPath}`); // DEBUG: Save HTML content const htmlPath = `/tmp/reonomy-owner-${ownerId}.html`; const htmlContent = await page.content(); fs.writeFileSync(htmlPath, htmlContent); log(` šŸ“„ Debug HTML saved: ${htmlPath}`); const contactInfo = await page.evaluate(() => { const info = { email: '', phone: '', ownerName: '', ownerLocation: '', propertyCount: '' }; // Extract email - multiple possible selectors (specific IDs first) const emailSelectors = [ '#people-contact-email-id', '[data-person-id="people-contact-email-id"]', 'a[href^="mailto:"]', '[data-test*="email"]', '[data-testid*="email"]', '.email-address', '.owner-email', '.contact-info [data-test*="email"]' ]; for (const selector of emailSelectors) { const emailEl = document.querySelector(selector); if (emailEl) { info.email = emailEl.innerText || emailEl.textContent; // Clean up email if it's in a mailto: link if (info.email.startsWith('mailto:')) { info.email = info.email.replace('mailto:', ''); } break; } } // Extract phone - multiple possible selectors (specific IDs first) const phoneSelectors = [ '#people-contact-phone-1', '#people-contact-phone-2', '#people-contact-phone-3', '[data-person-id="people-contact-phone-1"]', '[data-person-id="people-contact-phone-2"]', '[data-person-id="people-contact-phone-3"]', 'a[href^="tel:"]', '[data-test*="phone"]', '[data-testid*="phone"]', '.phone-number', '.contact-info [data-test*="phone"]', '.owner-phone' ]; for (const selector of phoneSelectors) { const phoneEl = document.querySelector(selector); if (phoneEl) { info.phone = phoneEl.innerText || phoneEl.textContent; // Clean up phone if it's in a tel: link if (info.phone.startsWith('tel:')) { info.phone = info.phone.replace('tel:', ''); } break; } } // Also try to extract from text content by regex const bodyText = document.body.innerText; // Email regex patterns const emailPatterns = [ /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g, /Email[:\s]*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i ]; if (!info.email) { for (const pattern of emailPatterns) { const match = bodyText.match(pattern); if (match && match[0]) { info.email = match[0].replace(/^email[:\s]*/i, ''); break; } } } // Phone regex patterns const phonePatterns = [ /\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/g, /\+?1?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/g, /Phone[:\s]*[+]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/i, /Tel[:\s]*[+]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/i ]; if (!info.phone) { for (const pattern of phonePatterns) { const matches = bodyText.match(pattern); if (matches) { // Use the first valid phone number found info.phone = matches[0].replace(/^phone[:\s]*/i, '').replace(/^tel[:\s]*/i, ''); break; } } } // Extract owner name const nameSelectors = [ '[data-test*="name"]', '[data-testid*="name"]', '.owner-name', '.person-name', 'h1', 'h2' ]; for (const selector of nameSelectors) { const nameEl = document.querySelector(selector); if (nameEl) { const text = nameEl.innerText || nameEl.textContent; if (text && text.length > 2 && text.length < 100) { info.ownerName = text; break; } } } // Extract owner location const locationSelectors = [ '[data-test*="location"]', '[data-testid*="location"]', '.location', '.owner-location', '.city-state' ]; for (const selector of locationSelectors) { const locEl = document.querySelector(selector); if (locEl) { const text = locEl.innerText || locEl.textContent; if (text && text.includes(',')) { info.ownerLocation = text; break; } } } // Extract property count const countSelectors = [ '[data-test*="property-count"]', '[data-testid*="property-count"]', '.property-count', '.properties-owned', '.total-properties' ]; for (const selector of countSelectors) { const countEl = document.querySelector(selector); if (countEl) { const text = countEl.innerText || countEl.textContent; if (text.match(/\d+/)) { info.propertyCount = text; break; } } } // Also try to extract property count from text if (!info.propertyCount) { const countMatch = bodyText.match(/(\d+)\s*propert(?:y|ies)/i); if (countMatch) { info.propertyCount = countMatch[1]; } } return info; }); log(` šŸ“§ Email: ${contactInfo.email || 'Not found'}`); log(` šŸ“ž Phone: ${contactInfo.phone || 'Not found'}`); return contactInfo; } catch (error) { log(` āš ļø Error extracting from owner page: ${error.message}`); return { email: '', phone: '', ownerName: '', ownerLocation: '', propertyCount: '' }; } } /** * Main scraper function */ async function scrapeLeads() { log('šŸš€ Starting Reonomy Lead Scraper...\n'); const browser = await puppeteer.launch({ headless: HEADLESS ? 'new' : false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); let sheetId; try { // Step 1: Setup Google Sheet sheetId = await getOrCreateSheet(); // If we have a sheet, initialize headers if (sheetId) { // Check if sheet has headers by trying to get them try { const existingData = gogCommand(`sheets get ${sheetId} "Sheet1!A1:N1" --plain`); if (!existingData.includes('Owner Name')) { await initializeSheet(sheetId); } } catch (error) { // Sheet might be empty, initialize it await initializeSheet(sheetId); } } else { // No sheet available, prepare to save to file log('šŸ’¾ Will save leads to: reonomy-leads.json'); } // Step 2: Login to Reonomy log('\nšŸ“ Step 1: Logging into Reonomy...'); await page.goto('https://app.reonomy.com/#!/account', { waitUntil: 'domcontentloaded', timeout: 60000 }); await sleep(2000); // Fill credentials await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 }); await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 }); // Submit login await page.click('button[type="submit"]'); log('ā³ Logging in...'); await sleep(8000); // Check if we're logged in const url = page.url(); if (url.includes('login') || url.includes('auth')) { throw new Error('Login failed. Please check credentials.'); } log('āœ… Successfully logged in!'); // Step 3: Navigate to search log('\nšŸ“ Step 2: Navigating to search...'); await page.goto('https://app.reonomy.com/#!/search', { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); log('āœ… On search page'); // Step 4: Enter search query log(`\nšŸ“ Step 3: Searching for: ${SEARCH_LOCATION}`); const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="Search"]', { timeout: 10000 }).catch(() => { // Try alternative selector return page.waitForSelector('input[type="text"]', { timeout: 5000 }); }); if (searchInput) { await searchInput.click({ clickCount: 3 }); // Select all await searchInput.type(SEARCH_LOCATION, { delay: 100 }); await sleep(1000); // Press Enter to search await page.keyboard.press('Enter'); log('ā³ Searching...'); // Wait for results to load await sleep(5000); } else { log('āš ļø Could not find search input, trying alternative method...'); } // Step 5: Extract leads from the page log('\nšŸ“ Step 4: Finding owner links (contact info is on owner pages)...'); // Extract property and owner links from the page const { propertyLinks, ownerLinks } = await extractLinksFromPage(page); log(`šŸ‘¤ Found ${ownerLinks.length} owner links`); const leads = []; const scrapeDate = new Date().toISOString().split('T')[0]; // Skip property pages - no contact info there log('\nšŸ“ Step 5: Skipping property pages (no contact info)...'); // Step 6: Visit owner pages to extract contact info log('\nšŸ“ Step 6: Extracting contact info from owner pages...'); const ownersToScrape = ownerLinks.slice(0, MAX_OWNERS); for (let i = 0; i < ownersToScrape.length; i++) { log(`\n[${i + 1}/${ownersToScrape.length}]`); const ownerUrl = ownersToScrape[i]; const contactInfo = await extractOwnerContactInfo(page, ownerUrl); // Parse owner ID from URL const ownerMatch = ownerUrl.match(/person\/([^/]+)/); const ownerId = ownerMatch ? ownerMatch[1] : ''; const lead = { scrapeDate, ownerName: contactInfo.ownerName || ownerId, propertyAddress: '', city: '', state: '', zip: '', propertyType: '', squareFootage: '', ownerLocation: contactInfo.ownerLocation || '', propertyCount: contactInfo.propertyCount || '', propertyUrl: '', ownerUrl: ownerUrl, email: contactInfo.email || '', phone: contactInfo.phone || '' }; leads.push(lead); // Rate limiting between page visits if (i < ownersToScrape.length - 1) { await sleep(PAGE_DELAY_MS); } } log(`\nāœ… Found ${leads.length} total leads`); if (leads.length === 0) { log('\nāš ļø No leads extracted. The page structure may have changed.'); log(' Please check the screenshot and logs for details.'); // Save screenshot for debugging await page.screenshot({ path: '/tmp/reonomy-no-leads.png', fullPage: true }); log('šŸ“ø Screenshot saved: /tmp/reonomy-no-leads.png'); } else { // Step 8: Save leads log('\nšŸ“ Step 7: Saving leads...'); for (const lead of leads) { await appendToSheet(sheetId, lead); await sleep(500); // Rate limiting } // If no sheet, save to JSON if (!sheetId && jsonLeads.length > 0) { saveToJsonFile(jsonLeads); } } log('\nāœ… Scraping complete!'); if (sheetId) { log(`šŸ“Š Google Sheet: https://docs.google.com/spreadsheets/d/${sheetId}`); } else { log('šŸ’¾ Leads saved to: reonomy-leads.json'); } log(`šŸ“ Log file: ${LOG_FILE}`); return { sheetId, leadCount: leads.length, jsonFile: sheetId ? null : 'reonomy-leads.json' }; } catch (error) { log(`\nāŒ Error: ${error.message}`); log(error.stack); // Save error screenshot try { await page.screenshot({ path: '/tmp/reonomy-error.png', fullPage: true }); log('šŸ“ø Error screenshot saved: /tmp/reonomy-error.png'); } catch (e) { // Ignore screenshot errors } throw error; } finally { await browser.close(); log('\nšŸ”š Browser closed'); } } /** * Extract property and owner links from the current page */ async function extractLinksFromPage(page) { const propertyLinks = []; const ownerLinks = []; try { const links = await page.evaluate(() => { const propertyUrls = []; const ownerUrls = []; // Find all anchor elements const anchors = Array.from(document.querySelectorAll('a')); anchors.forEach(anchor => { const href = anchor.href || ''; // Extract property URLs if (href.includes('/property/')) { // Extract the property ID and reconstruct the full URL const match = href.match(/property\/([a-zA-Z0-9_-]+)/); if (match) { propertyUrls.push(`https://app.reonomy.com/#!/property/${match[1]}`); } } // Extract owner/person URLs if (href.includes('/person/') || href.includes('/owner/')) { // Extract the person ID and reconstruct the full URL const match = href.match(/(?:person|owner)\/([a-zA-Z0-9_-]+)/); if (match) { ownerUrls.push(`https://app.reonomy.com/#!/person/${match[1]}`); } } }); return { propertyUrls: [...new Set(propertyUrls)], // Remove duplicates ownerUrls: [...new Set(ownerUrls)] // Remove duplicates }; }); propertyLinks.push(...links.propertyUrls); ownerLinks.push(...links.ownerUrls); } catch (error) { log(`āš ļø Error extracting links: ${error.message}`); } return { propertyLinks, ownerLinks }; } /** * Extract leads from search results page (legacy, kept for compatibility) */ async function extractLeadsFromPage(page) { const leads = []; try { // Try to find property cards/listings const properties = await page.evaluate(() => { const results = []; // Look for property cards - various possible selectors const selectors = [ '[data-test*="property"]', '[data-testid*="property"]', '.property-card', '.listing-card', '.search-result', '.result-item' ]; for (const selector of selectors) { const elements = document.querySelectorAll(selector); if (elements.length > 0) { elements.forEach(el => { results.push(el.innerText); }); break; } } // If no structured cards, try to extract from the whole page if (results.length === 0) { const bodyText = document.body.innerText; // Look for patterns that might be addresses const addressPattern = /\d+\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z]{2}\s*\d{5}/g; const addresses = bodyText.match(addressPattern) || []; addresses.forEach(addr => { results.push(addr); }); } return results.slice(0, 50); // Limit results }); // Parse extracted data into lead objects const scrapeDate = new Date().toISOString().split('T')[0]; for (const prop of properties) { const lead = parsePropertyData(prop, scrapeDate); if (lead && lead.propertyAddress) { leads.push(lead); } } } catch (error) { log(`āš ļø Error extracting from page: ${error.message}`); } return leads; } /** * Extract leads from dashboard (legacy, kept for compatibility) */ async function extractLeadsFromDashboard(page) { const leads = []; const scrapeDate = new Date().toISOString().split('T')[0]; try { // Extract recently viewed properties const properties = await page.evaluate(() => { const results = []; // Look for property links const links = Array.from(document.querySelectorAll('a[href*="/property/"]')); links.forEach(link => { results.push({ text: link.innerText || link.textContent, url: link.href }); }); return results.slice(0, 20); }); for (const prop of properties) { const lead = parsePropertyData(prop.text, scrapeDate); if (lead && lead.propertyAddress) { lead.propertyUrl = prop.url; leads.push(lead); } } // Extract recently viewed owners const owners = await page.evaluate(() => { const results = []; const links = Array.from(document.querySelectorAll('a[href*="/person/"]')); links.forEach(link => { results.push({ text: link.innerText || link.textContent, url: link.href }); }); return results.slice(0, 20); }); for (const owner of owners) { const ownerLead = parseOwnerData(owner.text, scrapeDate); if (ownerLead && ownerLead.ownerName) { ownerLead.ownerUrl = owner.url; leads.push(ownerLead); } } } catch (error) { log(`āš ļø Error extracting from dashboard: ${error.message}`); } return leads; } /** * Parse property data from text */ function parsePropertyData(text, scrapeDate) { const lines = text.split('\n').map(l => l.trim()).filter(l => l); return { scrapeDate, ownerName: '', propertyAddress: lines[0] || '', city: '', state: '', zip: '', propertyType: lines.find(l => l.includes('SF') || l.includes('Industrial') || l.includes('Office')) || '', squareFootage: extractSquareFootage(text), ownerLocation: '', propertyCount: '', propertyUrl: '', ownerUrl: '', email: '', phone: '' }; } /** * Parse owner data from text */ function parseOwnerData(text, scrapeDate) { const lines = text.split('\n').map(l => l.trim()).filter(l => l); return { scrapeDate, ownerName: lines[0] || '', propertyAddress: '', city: '', state: '', zip: '', propertyType: '', squareFootage: '', ownerLocation: lines.find(l => l.includes(',')) || '', propertyCount: extractPropertyCount(text), propertyUrl: '', ownerUrl: '', email: '', phone: '' }; } /** * Extract square footage from text */ function extractSquareFootage(text) { const match = text.match(/(\d+\.?\d*)\s*k?\s*SF/i); return match ? match[1] + (match[0].includes('k') ? 'k SF' : ' SF') : ''; } /** * Extract property count from text */ function extractPropertyCount(text) { const match = text.match(/(\d+)\s*propert(?:y|ies)/i); return match ? match[1] : ''; } // Run scraper scrapeLeads() .then(result => { log(`\nšŸŽ‰ Success! ${result.leadCount} leads scraped.`); if (result.sheetId) { console.log(`\nšŸ“Š View your leads at: https://docs.google.com/spreadsheets/d/${result.sheetId}`); } process.exit(0); }) .catch(error => { log(`\nšŸ’„ Scraper failed: ${error.message}`); process.exit(1); });