clawdbot-workspace/reonomy-scraper.js.bak

#!/usr/bin/env node

/**
 * Reonomy Lead Scraper
 *
 * Scrapes property and owner leads from Reonomy and exports to Google Sheets.
 *
 * Usage:
 *   node reonomy-scraper.js [options]
 *
 * Environment Variables:
 *   REONOMY_EMAIL      - Reonomy login email
 *   REONOMY_PASSWORD   - Reonomy login password
 *   REONOMY_SHEET_ID   - Google Sheet ID (optional, will create new sheet if not provided)
 *   REONOMY_LOCATION   - Search location (e.g., "New York, NY")
 *   HEADLESS           - Set to "true" for headless mode
 */

const puppeteer = require('puppeteer');
const { execSync } = require('child_process');
const fs = require('fs');
const path = require('path');

// Configuration from environment variables
const REONOMY_EMAIL = process.env.REONOMY_EMAIL;
const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD;
const SHEET_ID = process.env.REONOMY_SHEET_ID;
const SHEET_TITLE = process.env.REONOMY_SHEET_TITLE || 'Reonomy Leads';
const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'New York, NY';
const HEADLESS = process.env.HEADLESS === 'true';
const MAX_PROPERTIES = 20; // Skip property pages (no contact info there)
const MAX_OWNERS = 2; // Limit number of owners to scrape to avoid rate limiting
const PAGE_DELAY_MS = 3000; // Delay between page visits for rate limiting

// Validate credentials
if (!REONOMY_EMAIL || !REONOMY_PASSWORD) {
  console.error('❌ Error: REONOMY_EMAIL and REONOMY_PASSWORD environment variables are required.');
  console.error('   Set them like: REONOMY_EMAIL="..." REONOMY_PASSWORD="..." node reonomy-scraper.js');
  process.exit(1);
}

// Log file
const LOG_FILE = path.join(__dirname, 'reonomy-scraper.log');

function log(message) {
  const timestamp = new Date().toISOString();
  const logMessage = `[${timestamp}] ${message}\n`;
  console.log(message);
  fs.appendFileSync(LOG_FILE, logMessage);
}

function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

/**
 * Execute gog CLI command
 */
function gogCommand(command) {
  try {
    // Add account if specified
    let fullCommand = `gog ${command}`;
    const account = process.env.GOG_ACCOUNT;
    if (account) {
      fullCommand = `gog --account "${account}" ${command}`;
    }

    const output = execSync(fullCommand, {
      encoding: 'utf-8',
      timeout: 30000,
      stdio: ['pipe', 'pipe', 'pipe']
    });

    // Combine stdout and stderr
    const combinedOutput = (output || '').trim();
    return combinedOutput;
  } catch (error) {
    // Check if it's a real error or just stderr output
    if (error.status !== 0) {
      const stderr = error.stderr ? error.stderr.toString() : '';
      const stdout = error.stdout ? error.stdout.toString() : '';

      // If we got useful output in stdout despite the error status, return it
      if (stdout && stdout.trim() && !stderr.includes('error') && !stderr.includes('Error')) {
        return stdout.trim();
      }

      // Otherwise throw the error
      if (stderr.includes('error') || stderr.includes('Error')) {
        throw new Error(`gog command failed: ${stderr}`);
      }
      throw new Error(`gog command failed: ${stderr || stdout || 'Unknown error'}`);
    }
    throw error;
  }
}

/**
 * Get or create Google Sheet
 */
async function getOrCreateSheet() {
  log('📊 Checking Google Sheets...');

  if (SHEET_ID) {
    log(`✅ Using existing sheet: ${SHEET_ID}`);
    return SHEET_ID;
  }

  try {
    // Create a new sheet
    log('📝 Creating new Google Sheet...');
    const output = gogCommand(`sheets create "${SHEET_TITLE}" --json`);

    try {
      const result = JSON.parse(output);
      const newSheetId = result.spreadsheetId || result.id;
      log(`✅ Created new sheet: ${newSheetId}`);
      return newSheetId;
    } catch (error) {
      // Try to extract ID from text output
      const match = output.match(/([0-9A-Za-z_-]{20,})/);
      if (match) {
        log(`✅ Created new sheet: ${match[1]}`);
        return match[1];
      }
      throw new Error('Could not parse sheet ID from gog output');
    }
  } catch (error) {
    log(`⚠️  Could not create Google Sheet: ${error.message}`);
    log('💾 Leads will be saved to JSON file instead');
    return null;
  }
}

/**
 * Initialize sheet with headers
 */
async function initializeSheet(sheetId) {
  log('📋 Initializing sheet headers...');

  const headers = [
    'Scrape Date',
    'Owner Name',
    'Property Address',
    'City',
    'State',
    'ZIP',
    'Property Type',
    'Square Footage',
    'Owner Location',
    'Property Count',
    'Property URL',
    'Owner URL',
    'Email',
    'Phone'
  ];

  const headerString = headers.map(h => `"${h}"`).join(' ');

  try {
    gogCommand(`sheets update ${sheetId} "Sheet1!A1" ${headerString}`);
    log('✅ Sheet headers initialized');
  } catch (error) {
    log(`⚠️  Could not set headers: ${error.message}`);
  }
}

/**
 * Append row to Google Sheet or save to JSON file
 */
async function appendToSheet(sheetId, rowData) {
  if (sheetId) {
    const values = Object.values(rowData).map(v => {
      if (v === null || v === undefined) return '';
      // Escape quotes
      const str = String(v).replace(/"/g, '""');
      return `"${str}"`;
    }).join(' ');

    try {
      gogCommand(`sheets append ${sheetId} "Sheet1!A:N" ${values}`);
      log(`✅ Added: ${rowData.ownerName} - ${rowData.propertyAddress}`);
    } catch (error) {
      log(`❌ Error appending to sheet: ${error.message}`);
    }
  } else {
    // Save to JSON file
    jsonLeads.push(rowData);
    log(`✅ Collected: ${rowData.ownerName} - ${rowData.propertyAddress}`);
  }
}

/**
 * Save leads to JSON file
 */
function saveToJsonFile(leads) {
  const filename = path.join(__dirname, 'reonomy-leads.json');
  const data = {
    scrapeDate: new Date().toISOString(),
    leadCount: leads.length,
    location: SEARCH_LOCATION,
    leads: leads
  };

  try {
    fs.writeFileSync(filename, JSON.stringify(data, null, 2));
    log(`💾 Saved ${leads.length} leads to ${filename}`);
    return filename;
  } catch (error) {
    log(`❌ Error saving to JSON: ${error.message}`);
    return null;
  }
}

// Global array to store leads when not using Google Sheets
let jsonLeads = [];

/**
 * Extract contact info from a property detail page
 */
async function extractPropertyContactInfo(page, propertyUrl) {
  log(`  🏠 Visiting property: ${propertyUrl}`);

  try {
    await page.goto(propertyUrl, {
      waitUntil: 'networkidle2',
      timeout: 60000
    });

    await sleep(2000); // Wait for dynamic content to load

    const contactInfo = await page.evaluate(() => {
      const info = {
        email: '',
        phone: '',
        ownerName: '',
        propertyAddress: '',
        city: '',
        state: '',
        zip: '',
        propertyType: '',
        squareFootage: ''
      };

      // Extract email - multiple possible selectors (specific IDs first)
      const emailSelectors = [
        '#people-contact-email-id',
        '[data-person-id="people-contact-email-id"]',
        'a[href^="mailto:"]',
        '[data-test*="email"]',
        '[data-testid*="email"]',
        '.email-address',
        '.owner-email',
        '.contact-info [data-test*="email"]'
      ];

      for (const selector of emailSelectors) {
        const emailEl = document.querySelector(selector);
        if (emailEl) {
          info.email = emailEl.innerText || emailEl.textContent;
          // Clean up email if it's in a mailto: link
          if (info.email.startsWith('mailto:')) {
            info.email = info.email.replace('mailto:', '');
          }
          break;
        }
      }

      // Extract phone - multiple possible selectors (specific IDs first)
      const phoneSelectors = [
        '#people-contact-phone-1',
        '#people-contact-phone-2',
        '#people-contact-phone-3',
        '[data-person-id="people-contact-phone-1"]',
        '[data-person-id="people-contact-phone-2"]',
        '[data-person-id="people-contact-phone-3"]',
        'a[href^="tel:"]',
        '[data-test*="phone"]',
        '[data-testid*="phone"]',
        '.phone-number',
        '.contact-info [data-test*="phone"]',
        '.owner-phone'
      ];

      for (const selector of phoneSelectors) {
        const phoneEl = document.querySelector(selector);
        if (phoneEl) {
          info.phone = phoneEl.innerText || phoneEl.textContent;
          // Clean up phone if it's in a tel: link
          if (info.phone.startsWith('tel:')) {
            info.phone = info.phone.replace('tel:', '');
          }
          break;
        }
      }

      // Also try to extract from text content by regex
      const bodyText = document.body.innerText;

      // Email regex patterns
      const emailPatterns = [
        /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
        /Email[:\s]*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i
      ];

      if (!info.email) {
        for (const pattern of emailPatterns) {
          const match = bodyText.match(pattern);
          if (match && match[0]) {
            info.email = match[0].replace(/^email[:\s]*/i, '');
            break;
          }
        }
      }

      // Phone regex patterns
      const phonePatterns = [
        /\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/g,
        /\+?1?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/g,
        /Phone[:\s]*[+]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/i,
        /Tel[:\s]*[+]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/i
      ];

      if (!info.phone) {
        for (const pattern of phonePatterns) {
          const matches = bodyText.match(pattern);
          if (matches) {
            // Use the first valid phone number found
            info.phone = matches[0].replace(/^phone[:\s]*/i, '').replace(/^tel[:\s]*/i, '');
            break;
          }
        }
      }

      // Extract owner name from property page
      const ownerSelectors = [
        '[data-test*="owner"]',
        '[data-testid*="owner"]',
        '.owner-name',
        '.owner',
        'h1',
        'h2'
      ];

      for (const selector of ownerSelectors) {
        const ownerEl = document.querySelector(selector);
        if (ownerEl) {
          const text = ownerEl.innerText || ownerEl.textContent;
          if (text && text.length > 2 && text.length < 100) {
            info.ownerName = text;
            break;
          }
        }
      }

      // Extract property address
      const addressSelectors = [
        '[data-test*="address"]',
        '[data-testid*="address"]',
        '.property-address',
        '.address',
        'h1',
        'h2'
      ];

      for (const selector of addressSelectors) {
        const addrEl = document.querySelector(selector);
        if (addrEl) {
          const text = addrEl.innerText || addrEl.textContent;
          if (text && text.match(/\d+/)) {
            info.propertyAddress = text;
            break;
          }
        }
      }

      // Extract property type
      const typeSelectors = [
        '[data-test*="type"]',
        '[data-testid*="type"]',
        '.property-type',
        '.type'
      ];

      for (const selector of typeSelectors) {
        const typeEl = document.querySelector(selector);
        if (typeEl) {
          info.propertyType = typeEl.innerText || typeEl.textContent;
          break;
        }
      }

      // Extract square footage
      const sfSelectors = [
        '[data-test*="sf"]',
        '[data-testid*="sf"]',
        '.square-footage',
        '.sf',
        '.sqft'
      ];

      for (const selector of sfSelectors) {
        const sfEl = document.querySelector(selector);
        if (sfEl) {
          info.squareFootage = sfEl.innerText || sfEl.textContent;
          break;
        }
      }

      return info;
    });

    log(`    📧 Email: ${contactInfo.email || 'Not found'}`);
    log(`    📞 Phone: ${contactInfo.phone || 'Not found'}`);

    return contactInfo;

  } catch (error) {
    log(`    ⚠️  Error extracting from property page: ${error.message}`);
    return {
      email: '',
      phone: '',
      ownerName: '',
      propertyAddress: '',
      city: '',
      state: '',
      zip: '',
      propertyType: '',
      squareFootage: ''
    };
  }
}

/**
 * Extract contact info from an owner detail page
 */
async function extractOwnerContactInfo(page, ownerUrl) {
  log(`  👤 Visiting owner: ${ownerUrl}`);

  try {
    await page.goto(ownerUrl, {
      waitUntil: 'networkidle2',
      timeout: 60000
    });

    await sleep(2000); // Wait for dynamic content to load

    // DEBUG: Save screenshot
    const ownerMatch = ownerUrl.match(/person\/([a-zA-Z0-9_-]+)/);
    const ownerId = ownerMatch ? ownerMatch[1] : 'unknown';
    const debugPath = `/tmp/reonomy-owner-${ownerId}.png`;
    await page.screenshot({ path: debugPath, fullPage: true });
    log(`    📸 Debug screenshot saved: ${debugPath}`);

    // DEBUG: Save HTML content
    const htmlPath = `/tmp/reonomy-owner-${ownerId}.html`;
    const htmlContent = await page.content();
    fs.writeFileSync(htmlPath, htmlContent);
    log(`    📄 Debug HTML saved: ${htmlPath}`);

    const contactInfo = await page.evaluate(() => {
      const info = {
        email: '',
        phone: '',
        ownerName: '',
        ownerLocation: '',
        propertyCount: ''
      };

      // Extract email - multiple possible selectors (specific IDs first)
      const emailSelectors = [
        '#people-contact-email-id',
        '[data-person-id="people-contact-email-id"]',
        'a[href^="mailto:"]',
        '[data-test*="email"]',
        '[data-testid*="email"]',
        '.email-address',
        '.owner-email',
        '.contact-info [data-test*="email"]'
      ];

      for (const selector of emailSelectors) {
        const emailEl = document.querySelector(selector);
        if (emailEl) {
          info.email = emailEl.innerText || emailEl.textContent;
          // Clean up email if it's in a mailto: link
          if (info.email.startsWith('mailto:')) {
            info.email = info.email.replace('mailto:', '');
          }
          break;
        }
      }

      // Extract phone - multiple possible selectors (specific IDs first)
      const phoneSelectors = [
        '#people-contact-phone-1',
        '#people-contact-phone-2',
        '#people-contact-phone-3',
        '[data-person-id="people-contact-phone-1"]',
        '[data-person-id="people-contact-phone-2"]',
        '[data-person-id="people-contact-phone-3"]',
        'a[href^="tel:"]',
        '[data-test*="phone"]',
        '[data-testid*="phone"]',
        '.phone-number',
        '.contact-info [data-test*="phone"]',
        '.owner-phone'
      ];

      for (const selector of phoneSelectors) {
        const phoneEl = document.querySelector(selector);
        if (phoneEl) {
          info.phone = phoneEl.innerText || phoneEl.textContent;
          // Clean up phone if it's in a tel: link
          if (info.phone.startsWith('tel:')) {
            info.phone = info.phone.replace('tel:', '');
          }
          break;
        }
      }

      // Also try to extract from text content by regex
      const bodyText = document.body.innerText;

      // Email regex patterns
      const emailPatterns = [
        /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
        /Email[:\s]*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i
      ];

      if (!info.email) {
        for (const pattern of emailPatterns) {
          const match = bodyText.match(pattern);
          if (match && match[0]) {
            info.email = match[0].replace(/^email[:\s]*/i, '');
            break;
          }
        }
      }

      // Phone regex patterns
      const phonePatterns = [
        /\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/g,
        /\+?1?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/g,
        /Phone[:\s]*[+]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/i,
        /Tel[:\s]*[+]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})/i
      ];

      if (!info.phone) {
        for (const pattern of phonePatterns) {
          const matches = bodyText.match(pattern);
          if (matches) {
            // Use the first valid phone number found
            info.phone = matches[0].replace(/^phone[:\s]*/i, '').replace(/^tel[:\s]*/i, '');
            break;
          }
        }
      }

      // Extract owner name
      const nameSelectors = [
        '[data-test*="name"]',
        '[data-testid*="name"]',
        '.owner-name',
        '.person-name',
        'h1',
        'h2'
      ];

      for (const selector of nameSelectors) {
        const nameEl = document.querySelector(selector);
        if (nameEl) {
          const text = nameEl.innerText || nameEl.textContent;
          if (text && text.length > 2 && text.length < 100) {
            info.ownerName = text;
            break;
          }
        }
      }

      // Extract owner location
      const locationSelectors = [
        '[data-test*="location"]',
        '[data-testid*="location"]',
        '.location',
        '.owner-location',
        '.city-state'
      ];

      for (const selector of locationSelectors) {
        const locEl = document.querySelector(selector);
        if (locEl) {
          const text = locEl.innerText || locEl.textContent;
          if (text && text.includes(',')) {
            info.ownerLocation = text;
            break;
          }
        }
      }

      // Extract property count
      const countSelectors = [
        '[data-test*="property-count"]',
        '[data-testid*="property-count"]',
        '.property-count',
        '.properties-owned',
        '.total-properties'
      ];

      for (const selector of countSelectors) {
        const countEl = document.querySelector(selector);
        if (countEl) {
          const text = countEl.innerText || countEl.textContent;
          if (text.match(/\d+/)) {
            info.propertyCount = text;
            break;
          }
        }
      }

      // Also try to extract property count from text
      if (!info.propertyCount) {
        const countMatch = bodyText.match(/(\d+)\s*propert(?:y|ies)/i);
        if (countMatch) {
          info.propertyCount = countMatch[1];
        }
      }

      return info;
    });

    log(`    📧 Email: ${contactInfo.email || 'Not found'}`);
    log(`    📞 Phone: ${contactInfo.phone || 'Not found'}`);

    return contactInfo;

  } catch (error) {
    log(`    ⚠️  Error extracting from owner page: ${error.message}`);
    return {
      email: '',
      phone: '',
      ownerName: '',
      ownerLocation: '',
      propertyCount: ''
    };
  }
}

/**
 * Main scraper function
 */
async function scrapeLeads() {
  log('🚀 Starting Reonomy Lead Scraper...\n');

  const browser = await puppeteer.launch({
    headless: HEADLESS ? 'new' : false,
    args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080']
  });

  const page = await browser.newPage();
  await page.setViewport({ width: 1920, height: 1080 });

  let sheetId;

  try {
    // Step 1: Setup Google Sheet
    sheetId = await getOrCreateSheet();

    // If we have a sheet, initialize headers
    if (sheetId) {
      // Check if sheet has headers by trying to get them
      try {
        const existingData = gogCommand(`sheets get ${sheetId} "Sheet1!A1:N1" --plain`);
        if (!existingData.includes('Owner Name')) {
          await initializeSheet(sheetId);
        }
      } catch (error) {
        // Sheet might be empty, initialize it
        await initializeSheet(sheetId);
      }
    } else {
      // No sheet available, prepare to save to file
      log('💾 Will save leads to: reonomy-leads.json');
    }

    // Step 2: Login to Reonomy
    log('\n📍 Step 1: Logging into Reonomy...');
    await page.goto('https://app.reonomy.com/#!/account', {
      waitUntil: 'domcontentloaded',
      timeout: 60000
    });

    await sleep(2000);

    // Fill credentials
    await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 });
    await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 });

    // Submit login
    await page.click('button[type="submit"]');
    log('⏳ Logging in...');

    await sleep(8000);

    // Check if we're logged in
    const url = page.url();
    if (url.includes('login') || url.includes('auth')) {
      throw new Error('Login failed. Please check credentials.');
    }

    log('✅ Successfully logged in!');

    // Step 3: Navigate to search
    log('\n📍 Step 2: Navigating to search...');
    await page.goto('https://app.reonomy.com/#!/search', {
      waitUntil: 'networkidle2',
      timeout: 60000
    });

    await sleep(3000);
    log('✅ On search page');

    // Step 4: Enter search query
    log(`\n📍 Step 3: Searching for: ${SEARCH_LOCATION}`);
    const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="Search"]', {
      timeout: 10000
    }).catch(() => {
      // Try alternative selector
      return page.waitForSelector('input[type="text"]', { timeout: 5000 });
    });

    if (searchInput) {
      await searchInput.click({ clickCount: 3 }); // Select all
      await searchInput.type(SEARCH_LOCATION, { delay: 100 });
      await sleep(1000);

      // Press Enter to search
      await page.keyboard.press('Enter');
      log('⏳ Searching...');

      // Wait for results to load
      await sleep(5000);
    } else {
      log('⚠️  Could not find search input, trying alternative method...');
    }

    // Step 5: Extract leads from the page
    log('\n📍 Step 4: Finding owner links (contact info is on owner pages)...');

    // Extract property and owner links from the page
    const { propertyLinks, ownerLinks } = await extractLinksFromPage(page);

    log(`👤 Found ${ownerLinks.length} owner links`);

    const leads = [];
    const scrapeDate = new Date().toISOString().split('T')[0];

    // Skip property pages - no contact info there
    log('\n📍 Step 5: Skipping property pages (no contact info)...');

    // Step 6: Visit owner pages to extract contact info
    log('\n📍 Step 6: Extracting contact info from owner pages...');
    const ownersToScrape = ownerLinks.slice(0, MAX_OWNERS);

    for (let i = 0; i < ownersToScrape.length; i++) {
      log(`\n[${i + 1}/${ownersToScrape.length}]`);

      const ownerUrl = ownersToScrape[i];
      const contactInfo = await extractOwnerContactInfo(page, ownerUrl);

      // Parse owner ID from URL
      const ownerMatch = ownerUrl.match(/person\/([^/]+)/);
      const ownerId = ownerMatch ? ownerMatch[1] : '';

      const lead = {
        scrapeDate,
        ownerName: contactInfo.ownerName || ownerId,
        propertyAddress: '',
        city: '',
        state: '',
        zip: '',
        propertyType: '',
        squareFootage: '',
        ownerLocation: contactInfo.ownerLocation || '',
        propertyCount: contactInfo.propertyCount || '',
        propertyUrl: '',
        ownerUrl: ownerUrl,
        email: contactInfo.email || '',
        phone: contactInfo.phone || ''
      };

      leads.push(lead);

      // Rate limiting between page visits
      if (i < ownersToScrape.length - 1) {
        await sleep(PAGE_DELAY_MS);
      }
    }

    log(`\n✅ Found ${leads.length} total leads`);

    if (leads.length === 0) {
      log('\n⚠️  No leads extracted. The page structure may have changed.');
      log('   Please check the screenshot and logs for details.');

      // Save screenshot for debugging
      await page.screenshot({ path: '/tmp/reonomy-no-leads.png', fullPage: true });
      log('📸 Screenshot saved: /tmp/reonomy-no-leads.png');
    } else {
      // Step 8: Save leads
      log('\n📍 Step 7: Saving leads...');

      for (const lead of leads) {
        await appendToSheet(sheetId, lead);
        await sleep(500); // Rate limiting
      }

      // If no sheet, save to JSON
      if (!sheetId && jsonLeads.length > 0) {
        saveToJsonFile(jsonLeads);
      }
    }

    log('\n✅ Scraping complete!');
    if (sheetId) {
      log(`📊 Google Sheet: https://docs.google.com/spreadsheets/d/${sheetId}`);
    } else {
      log('💾 Leads saved to: reonomy-leads.json');
    }
    log(`📝 Log file: ${LOG_FILE}`);

    return { sheetId, leadCount: leads.length, jsonFile: sheetId ? null : 'reonomy-leads.json' };

  } catch (error) {
    log(`\n❌ Error: ${error.message}`);
    log(error.stack);

    // Save error screenshot
    try {
      await page.screenshot({ path: '/tmp/reonomy-error.png', fullPage: true });
      log('📸 Error screenshot saved: /tmp/reonomy-error.png');
    } catch (e) {
      // Ignore screenshot errors
    }

    throw error;

  } finally {
    await browser.close();
    log('\n🔚 Browser closed');
  }
}

/**
 * Extract property and owner links from the current page
 */
async function extractLinksFromPage(page) {
  const propertyLinks = [];
  const ownerLinks = [];

  try {
    const links = await page.evaluate(() => {
      const propertyUrls = [];
      const ownerUrls = [];

      // Find all anchor elements
      const anchors = Array.from(document.querySelectorAll('a'));

      anchors.forEach(anchor => {
        const href = anchor.href || '';

        // Extract property URLs
        if (href.includes('/property/')) {
          // Extract the property ID and reconstruct the full URL
          const match = href.match(/property\/([a-zA-Z0-9_-]+)/);
          if (match) {
            propertyUrls.push(`https://app.reonomy.com/#!/property/${match[1]}`);
          }
        }

        // Extract owner/person URLs
        if (href.includes('/person/') || href.includes('/owner/')) {
          // Extract the person ID and reconstruct the full URL
          const match = href.match(/(?:person|owner)\/([a-zA-Z0-9_-]+)/);
          if (match) {
            ownerUrls.push(`https://app.reonomy.com/#!/person/${match[1]}`);
          }
        }
      });

      return {
        propertyUrls: [...new Set(propertyUrls)], // Remove duplicates
        ownerUrls: [...new Set(ownerUrls)] // Remove duplicates
      };
    });

    propertyLinks.push(...links.propertyUrls);
    ownerLinks.push(...links.ownerUrls);

  } catch (error) {
    log(`⚠️  Error extracting links: ${error.message}`);
  }

  return { propertyLinks, ownerLinks };
}

/**
 * Extract leads from search results page (legacy, kept for compatibility)
 */
async function extractLeadsFromPage(page) {
  const leads = [];

  try {
    // Try to find property cards/listings
    const properties = await page.evaluate(() => {
      const results = [];

      // Look for property cards - various possible selectors
      const selectors = [
        '[data-test*="property"]',
        '[data-testid*="property"]',
        '.property-card',
        '.listing-card',
        '.search-result',
        '.result-item'
      ];

      for (const selector of selectors) {
        const elements = document.querySelectorAll(selector);
        if (elements.length > 0) {
          elements.forEach(el => {
            results.push(el.innerText);
          });
          break;
        }
      }

      // If no structured cards, try to extract from the whole page
      if (results.length === 0) {
        const bodyText = document.body.innerText;

        // Look for patterns that might be addresses
        const addressPattern = /\d+\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z]{2}\s*\d{5}/g;
        const addresses = bodyText.match(addressPattern) || [];

        addresses.forEach(addr => {
          results.push(addr);
        });
      }

      return results.slice(0, 50); // Limit results
    });

    // Parse extracted data into lead objects
    const scrapeDate = new Date().toISOString().split('T')[0];

    for (const prop of properties) {
      const lead = parsePropertyData(prop, scrapeDate);
      if (lead && lead.propertyAddress) {
        leads.push(lead);
      }
    }

  } catch (error) {
    log(`⚠️  Error extracting from page: ${error.message}`);
  }

  return leads;
}

/**
 * Extract leads from dashboard (legacy, kept for compatibility)
 */
async function extractLeadsFromDashboard(page) {
  const leads = [];
  const scrapeDate = new Date().toISOString().split('T')[0];

  try {
    // Extract recently viewed properties
    const properties = await page.evaluate(() => {
      const results = [];

      // Look for property links
      const links = Array.from(document.querySelectorAll('a[href*="/property/"]'));
      links.forEach(link => {
        results.push({
          text: link.innerText || link.textContent,
          url: link.href
        });
      });

      return results.slice(0, 20);
    });

    for (const prop of properties) {
      const lead = parsePropertyData(prop.text, scrapeDate);
      if (lead && lead.propertyAddress) {
        lead.propertyUrl = prop.url;
        leads.push(lead);
      }
    }

    // Extract recently viewed owners
    const owners = await page.evaluate(() => {
      const results = [];

      const links = Array.from(document.querySelectorAll('a[href*="/person/"]'));
      links.forEach(link => {
        results.push({
          text: link.innerText || link.textContent,
          url: link.href
        });
      });

      return results.slice(0, 20);
    });

    for (const owner of owners) {
      const ownerLead = parseOwnerData(owner.text, scrapeDate);
      if (ownerLead && ownerLead.ownerName) {
        ownerLead.ownerUrl = owner.url;
        leads.push(ownerLead);
      }
    }

  } catch (error) {
    log(`⚠️  Error extracting from dashboard: ${error.message}`);
  }

  return leads;
}

/**
 * Parse property data from text
 */
function parsePropertyData(text, scrapeDate) {
  const lines = text.split('\n').map(l => l.trim()).filter(l => l);

  return {
    scrapeDate,
    ownerName: '',
    propertyAddress: lines[0] || '',
    city: '',
    state: '',
    zip: '',
    propertyType: lines.find(l => l.includes('SF') || l.includes('Industrial') || l.includes('Office')) || '',
    squareFootage: extractSquareFootage(text),
    ownerLocation: '',
    propertyCount: '',
    propertyUrl: '',
    ownerUrl: '',
    email: '',
    phone: ''
  };
}

/**
 * Parse owner data from text
 */
function parseOwnerData(text, scrapeDate) {
  const lines = text.split('\n').map(l => l.trim()).filter(l => l);

  return {
    scrapeDate,
    ownerName: lines[0] || '',
    propertyAddress: '',
    city: '',
    state: '',
    zip: '',
    propertyType: '',
    squareFootage: '',
    ownerLocation: lines.find(l => l.includes(',')) || '',
    propertyCount: extractPropertyCount(text),
    propertyUrl: '',
    ownerUrl: '',
    email: '',
    phone: ''
  };
}

/**
 * Extract square footage from text
 */
function extractSquareFootage(text) {
  const match = text.match(/(\d+\.?\d*)\s*k?\s*SF/i);
  return match ? match[1] + (match[0].includes('k') ? 'k SF' : ' SF') : '';
}

/**
 * Extract property count from text
 */
function extractPropertyCount(text) {
  const match = text.match(/(\d+)\s*propert(?:y|ies)/i);
  return match ? match[1] : '';
}

// Run scraper
scrapeLeads()
  .then(result => {
    log(`\n🎉 Success! ${result.leadCount} leads scraped.`);
    if (result.sheetId) {
      console.log(`\n📊 View your leads at: https://docs.google.com/spreadsheets/d/${result.sheetId}`);
    }
    process.exit(0);
  })
  .catch(error => {
    log(`\n💥 Scraper failed: ${error.message}`);
    process.exit(1);
  });