#!/usr/bin/env node /** * Simple Reonomy Lead Scraper - v2 * * Focus: Capture ANY available data without getting stuck on empty email/phone fields */ const puppeteer = require('puppeteer'); const { execSync } = require('child_process'); const fs = require('fs'); // Configuration const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com'; const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532'; const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'New York, NY'; const MAX_LEADS = 2; // Just scrape 2 owners as user requested // Validate credentials if (!REONOMY_EMAIL || !REONOMY_PASSWORD) { console.error('āŒ Error: REONOMY_EMAIL and REONOMY_PASSWORD environment variables are required.'); console.error(' Set them like:'); console.error(` REONOMY_EMAIL="your@email.com"`); console.error(` REONOMY_PASSWORD="yourpassword"`); console.error(' Or run: REONOMY_EMAIL="your@email.com" REONOMY_PASSWORD="yourpassword" node reonomy-scraper.js'); process.exit(1); } // Log file const LOG_FILE = '/Users/jakeshore/.clawdbot/workspace/reonomy-simple.log'; function log(message) { const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; console.log(message); fs.appendFileSync(LOG_FILE, logMessage); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Execute gog CLI command */ function gogCommand(command) { try { return execSync(`gog ${command}`, { encoding: 'utf-8', timeout: 30000 }).trim(); } catch (error) { log(`āš ļø gog command failed: ${error.message}`); return null; } } /** * Get or create Google Sheet */ async function getOrCreateSheet() { log('šŸ“Š Checking Google Sheets...'); const SHEET_ID = process.env.REONOMY_SHEET_ID; if (SHEET_ID) { log(`āœ… Using existing sheet: ${SHEET_ID}`); return SHEET_ID; } // Create a new sheet log('šŸ“ Creating new Google Sheet...'); const output = gogCommand(`sheets create "Reonomy Leads" --json`); try { const result = JSON.parse(output); const newSheetId = result.spreadsheetId || result.id; log(`āœ… Created new sheet: ${newSheetId}`); return newSheetId; } catch (error) { log(`āš ļø Could not create Google Sheet: ${error.message}`); // Try to extract ID from text output const match = output.match(/([0-9A-Za-z_-]{20,})/); if (match) { log(`āœ… Extracted sheet ID from output: ${match[0]}`); return match[0]; } throw new Error('Could not parse sheet ID from gog output'); } } /** * Initialize sheet with headers */ async function initializeSheet(sheetId) { log('šŸ“‹ Initializing sheet headers...'); const headers = [ 'Scrape Date', 'Owner Name', 'Property Address', 'City', 'State', 'ZIP', 'Property Type', 'Square Footage', 'Owner Location', 'Property Count', 'Property URL', 'Owner URL', 'Email', 'Phone' ]; const headerString = headers.map(h => `"${h}"`).join(' '); try { gogCommand(`sheets update ${sheetId} "Sheet1!A1" ${headerString}`); log('āœ… Sheet headers initialized'); } catch (error) { log(`āš ļø Could not set headers: ${error.message}`); } } /** * Append row to Google Sheet */ async function appendToSheet(sheetId, rowData) { const values = Object.values(rowData).map(v => { if (v === null || v === undefined) return ''; const str = String(v).replace(/"/g, '""'); return `"${str}"`; }).join(' '); try { gogCommand(`sheets append ${sheetId} "Sheet1!A:N" ${values}`); log(`āœ… Added: ${rowData.ownerName}`); return true; } catch (error) { log(`āŒ Error appending to sheet: ${error.message}`); return false; } } /** * Extract ANY data from page (simple, robust approach) */ async function extractAnyAvailableData(page, url) { const data = { scrapeDate: new Date().toISOString().split('T')[0], propertyUrl: url, ownerUrl: url, email: '', phone: '', ownerName: '', propertyAddress: '', city: '', state: '', zip: '', propertyType: '', squareFootage: '', ownerLocation: '', propertyCount: '', propertyUrl: '', ownerUrl: '' }; // Method 1: Try to find ANY email address try { const emailSelectors = [ 'a[href^="mailto:"]', '[data-test*="email"]', '.email-address', '.owner-email' ]; for (const selector of emailSelectors) { const el = await page.waitForSelector(selector, { timeout: 5000 }); if (el) { const href = await el.evaluate(e => e.getAttribute('href')); if (href && href.startsWith('mailto:')) { data.email = href.replace('mailto:', ''); log(`šŸ“§ Email found: ${data.email}`); break; } } } // Method 2: Try to find owner name const nameSelectors = [ '[data-person-id="people-contact-phone-1"]', '[data-person-id="people-contact-phone-2"]', '[data-person-id="people-contact-phone-3"]', '.owner-name', 'h1', '.h2', 'h3' ]; for (const selector of nameSelectors) { const el = await page.waitForSelector(selector, { timeout: 5000 }); if (el) { const name = await el.evaluate(e => e.textContent); if (name && name.trim().length > 2) { data.ownerName = name.trim(); log(`šŸ‘¤ Owner name: ${data.ownerName}`); break; } } } // Method 3: Try to find phone const phoneSelectors = [ 'a[href^="tel:"]', '[data-test*="phone"]', '.phone-number', '.owner-phone' ]; for (const selector of phoneSelectors) { const el = await page.waitForSelector(selector, { timeout: 5000 }); if (el) { const text = await el.evaluate(e => e.textContent || el.getAttribute('href')); // Try to match phone patterns const phonePatterns = [ /\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g, /\+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g, /^\(?\d{3}\)?[-.\s]*\d{3}[-.\s]?\d{4}/g ]; for (const pattern of phonePatterns) { const match = text.match(pattern); if (match) { // Try to format phone number let phone = match[0]; if (phone.startsWith('+')) { phone = phone.replace(/^\+1?/, '+1 '); } if (phone.includes('-')) { phone = phone.replace(/-/g, ' '); } if (phone.includes('.')) { phone = phone.replace(/\./g, ' '); } // Remove common prefixes phone = phone.replace(/^tel:/i, '') .replace(/^phone:/i, '') .replace(/^(Phone:|Tel:)/i, '') .trim(); data.phone = phone; log(`šŸ“ž Phone found: ${data.phone}`); break; } } } } // Method 4: Try to extract property details const propertyDetails = await page.evaluate(() => { const results = []; // Look for address patterns const addressPattern = /\d+\s+[A-Z][a-z]+,\s*[A-Z]{2}\s*\d{5}/g; const addressMatch = document.body.innerText.match(addressPattern); if (addressMatch) { data.propertyAddress = addressMatch[0]; } // Look for property type const typePattern = /(General Industrial|Office|Retail|Multifamily|Warehouse|Mixed Use|Apartment|Hotel|Motel|Hospital|School|Health Care|Other)/i; const typeMatch = document.body.innerText.match(typePattern); if (typeMatch) { data.propertyType = typeMatch[0]; } // Look for square footage const sfPattern = /(\d+\.?\d*k\s*SF|k\s*\s*sq\s*ft)/i; const sfMatch = document.body.innerText.match(sfPattern); if (sfMatch) { data.squareFootage = sfMatch[0]; } return results; }); } catch (error) { log(`āš ļø Error extracting data: ${error.message}`); } return data; } /** * Main scraper function */ async function scrapeLeads() { log('šŸš€ Starting Reonomy Lead Scraper (Simple Mode)...\\n'); const browser = await puppeteer.launch({ headless: process.env.HEADLESS === 'true' ? 'new' : false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); let leads = []; let sheetId; try { // Step 1: Get or create sheet sheetId = await getOrCreateSheet(); await initializeSheet(sheetId); // Step 2: Login log('\\nšŸ“ Step 1: Logging into Reonomy...'); await page.goto('https://app.reonomy.com/#!/account', { waitUntil: 'domcontentloaded', timeout: 60000 }); await sleep(2000); // Fill credentials await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 }); await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 }); // Submit login await page.click('button[type="submit"]'); log('ā³ Logging in...'); // Wait for redirect await sleep(8000); // Check if logged in const currentUrl = page.url(); if (currentUrl.includes('login') || currentUrl.includes('auth')) { throw new Error('Login failed. Please check credentials.'); } log('āœ… Successfully logged in!'); // Step 3: Navigate to search log('\\nšŸ“ Step 2: Navigating to search...'); await page.goto(`https://app.reonomy.com/#!/search`, { waitUntil: 'networkidle2', timeout: 30000 }); log('āœ… On search page'); // Step 4: Search log(`\\nšŸ“ Step 3: Searching for: ${SEARCH_LOCATION}...`); const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="location"], input[placeholder*="Search"]', { timeout: 10000 }); if (searchInput) { await searchInput.click({ clickCount: 3 }); await searchInput.type(SEARCH_LOCATION, { delay: 100 }); await searchInput.press('Enter'); log('ā³ Searching...'); // Wait for results await sleep(5000); } // Step 5: Find owner links log('\\nšŸ“ Step 4: Finding owner links...'); const ownerLinks = await page.evaluate((maxLeads) => { const links = []; const linkElements = document.querySelectorAll('a[href*="/person/"]'); linkElements.forEach(link => { const href = link.getAttribute('href'); if (href) { links.push({ ownerUrl: href, ownerId: href.split('/').pop() }); } }); return links.slice(0, maxLeads); }, MAX_LEADS); log(`šŸ‘¤ Found ${ownerLinks.length} owner links`); // Step 6: Extract data from owner pages log('\\nšŸ“ Step 5: Extracting data from owner pages (email, phone)...'); for (let i = 0; i < ownerLinks.length && i < MAX_LEADS; i++) { const ownerUrl = ownerLinks[i].ownerUrl; log(`\\n[${i + 1}/${ownerLinks.length}] Visiting owner: ${ownerUrl}`); const data = await extractAnyAvailableData(page, ownerUrl); // Ensure we have at least some data if (data.ownerName || data.email || data.phone || data.propertyAddress) { leads.push(data); log(` āœ… Collected: ${data.ownerName || data.email || 'Owner info'} - ${data.phone || 'Contact info'}`); } else { log(` āš ļø No contact info found for owner`); } } log(`\\nāœ… Found ${leads.length} total leads`); // Step 7: Save leads log('\\nšŸ“ Step 6: Saving leads to Google Sheet...'); for (const lead of leads) { const success = await appendToSheet(sheetId, lead); if (!success) { log(` āŒ Failed to save lead: ${lead.ownerName}`); } await sleep(500); } log(`\\nāœ… Scraping complete!`); log(`šŸ“Š Google Sheet: https://docs.google.com/spreadsheets/d/${sheetId}`); log(`šŸ“ Log file: ${LOG_FILE}`); return { sheetId, leadCount: leads.length }; } catch (error) { log(`\\nāŒ Error: ${error.message}`); log(error.stack); // Save error screenshot try { await page.screenshot({ path: '/tmp/reonomy-simple-error.png', fullPage: true }); log('šŸ“ø Error screenshot saved: /tmp/reonomy-simple-error.png'); } finally { await browser.close(); log('\\nšŸ”š Browser closed'); } } process.exit(0); } // Run scraper scrapeLeads().then(result => { log(`\\nšŸŽ‰ Success! ${result.leadCount} leads scraped.`); console.log(`\\nšŸ“Š View your leads at: https://docs.google.com/spreadsheets/d/${result.sheetId}`); process.exit(0); }).catch(error => { console.error(`\\nšŸ’„ Scraper failed: ${error.message}`); process.exit(1); });