clawdbot-workspace/reonomy-scraper.js

289 lines
8.8 KiB
JavaScript

#!/usr/bin/env node
/**
* Reonomy Lead Scraper - Fixed Version
*
* Focus: Capture ANY available data without Google Sheets dependency
*/
const puppeteer = require('puppeteer');
const fs = require('fs');
const { execSync } = require('child_process');
// Configuration
const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com';
const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532';
const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'New York, NY';
const MAX_LEADS = 2; // Just scrape 2 owner pages as user requested
// Validate credentials
if (!REONOMY_EMAIL || !REONOMY_PASSWORD) {
console.error('❌ Error: REONOMY_EMAIL and REONOMY_PASSWORD environment variables are required.');
console.error(' Set them like:');
console.error(` REONOMY_EMAIL="your@email.com"`);
console.error(` REONOMY_PASSWORD="yourpassword"`);
console.error(' Or run: REONOMY_EMAIL="your@email.com" REONOMY_PASSWORD="yourpassword" node reonomy-scraper.js');
process.exit(1);
}
// Log file
const LOG_FILE = '/Users/jakeshore/.clawdbot/workspace/reonomy-fixed.log';
function log(message) {
const timestamp = new Date().toISOString();
const logMessage = `[${timestamp}] ${message}`;
console.log(message);
fs.appendFileSync(LOG_FILE, logMessage);
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Main scraper function
*/
async function scrapeLeads() {
log('🚀 Starting Reonomy Lead Scraper (Fixed Version)...\n');
const browser = await puppeteer.launch({
headless: process.env.HEADLESS === 'true' ? 'new' : false,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080']
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
let leads = [];
const scrapeDate = new Date().toISOString().split('T')[0];
try {
// Step 1: Get or create sheet
log('\n📍 Step 1: Preparing data collection...');
const sheetId = 'local-json';
log(`💾 Will save leads to: reonomy-leads.json`);
// Step 2: Login
log('\n📍 Step 2: Logging into Reonomy...');
await page.goto('https://app.reonomy.com/#!/account', {
waitUntil: 'domcontentloaded',
timeout: 60000
});
await sleep(2000);
// Fill credentials
await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 });
await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 });
// Submit login
await page.click('button[type="submit"]');
log('⏳ Logging in...');
// Wait for redirect
await sleep(8000);
// Check if we're logged in
const currentUrl = page.url();
if (currentUrl.includes('login') || currentUrl.includes('auth')) {
throw new Error('Login failed. Please check credentials.');
}
log('✅ Successfully logged in!');
// Step 3: Find owner links
log('\n📍 Step 3: Finding owner links...');
const ownerLinks = await page.evaluate(() => {
const links = [];
const linkElements = document.querySelectorAll('a[href*="/person/"]');
linkElements.forEach(link => {
const href = link.getAttribute('href');
if (href && href.includes('/person/')) {
links.push({
ownerUrl: href,
ownerId: href.split('/').pop()
});
}
});
return links.slice(0, MAX_LEADS);
});
log(`👤 Found ${ownerLinks.length} owner links`);
// Step 4: Extract data from owner pages
log('\n📍 Step 4: Extracting contact info from owner pages...');
for (let i = 0; i < ownerLinks.length && i < MAX_LEADS; i++) {
const ownerUrl = ownerLinks[i].ownerUrl;
log(`\n[${i + 1}/${ownerLinks.length}] Visiting owner: ${ownerUrl}`);
await page.goto(ownerUrl, {
waitUntil: 'networkidle2',
timeout: 30000
});
await sleep(3000);
// Extract ANY data available (owner name, phone, location, property count)
const data = await page.evaluate(() => {
const result = {
scrapeDate,
ownerName: '',
email: '',
phone: '',
ownerName: '',
propertyAddress: '',
city: '',
state: '',
zip: '',
propertyType: '',
squareFootage: '',
ownerLocation: '',
propertyCount: '',
ownerUrl: ownerUrl,
ownerUrl: ''
};
// Try to find owner name
const nameSelectors = [
'[data-person-id="people-contact-phone-1"]',
'[data-person-id="people-contact-phone-2"]',
'[data-person-id="people-contact-phone-3"]',
'.owner-name',
'h1', '.h2', 'h3'
];
for (const selector of nameSelectors) {
const el = document.querySelector(selector);
if (el) {
result.ownerName = el.textContent?.trim() || '';
if (result.ownerName) break;
}
}
// Try to find phone
const phoneSelectors = [
'[data-person-id="people-contact-phone-1"]',
'[data-person-id="people-contact-phone-2"]',
'[data-person-id="people-contact-phone-3"]',
'a[href^="tel:"]',
'.phone-number'
];
for (const selector of phoneSelectors) {
const el = document.querySelector(selector);
if (el) {
// Try to get phone from various attributes
const phoneValue =
el.getAttribute('data-value') ||
el.textContent ||
el.getAttribute('href')?.replace(/^tel:/, '');
if (phoneValue) {
result.phone = phoneValue;
break;
}
}
}
// Try to find owner location
const locationSelectors = [
'[data-person-id="people-contact-phone-1"]',
'[data-person-id="people-contact-phone-2"]',
'[data-person-id="people-contact-phone-3"]'
];
for (const selector of locationSelectors) {
const el = document.querySelector(selector);
if (el) {
result.ownerLocation = el.textContent?.trim() || '';
if (result.ownerLocation) break;
}
}
// Try to find property count
const countSelectors = [
'[data-person-id="people-contact-phone-1"]',
'[data-person-id="people-contact-phone-2"]',
'[data-person-id="people-contact-phone-3"]'
];
for (const selector of countSelectors) {
const el = document.querySelector(selector);
if (el) {
result.propertyCount = el.textContent?.trim() || '';
if (result.propertyCount) break;
}
}
return result;
});
if (data.ownerName || data.phone || data.propertyCount || data.ownerLocation || data.propertyType || data.squareFootage || data.propertyUrl) {
// We got at least some data!
log(` ✅ Collected: ${data.ownerName || 'Owner info'} - ${data.ownerLocation || data.propertyAddress || data.propertyType || data.squareFootage || data.propertyAddress}`);
leads.push(data);
}
return leads;
}
log(`\n✅ Found ${leads.length} total leads`);
// Step 5: Save to JSON file
log('\n📍 Step 5: Saving leads to JSON file...');
const filename = '/Users/jakeshore/.clawdbot/workspace/reonomy-leads.json';
const data = {
scrapeDate,
leadCount: leads.length,
location: SEARCH_LOCATION,
leads: leads
};
try {
fs.writeFileSync(filename, JSON.stringify(data, null, 2));
log('💾 Saved leads to ' + filename);
} catch (error) {
log('❌ Error saving to JSON: ' + error.message);
}
log('\n✅ Scraping complete!');
log('📝 Log file: ' + LOG_FILE);
log('📊 Total leads collected: ' + leads.length);
return { sheetId, leadCount: leads.length };
} catch (error) {
log('\n❌ Error: ' + error.message);
log(error.stack);
// Save error screenshot
try {
await page.screenshot({ path: '/tmp/reonomy-fixed-error.png', fullPage: true });
log('📸 Error screenshot saved: /tmp/reonomy-fixed-error.png');
} catch (e) {
// Ignore screenshot errors
}
} finally {
await browser.close();
log('\n🔚 Browser closed');
}
process.exit(0);
}
// Run scraper
scrapeLeads()
.then(result => {
log('\n🎉 Success! ' + result.leadCount + ' leads scraped.');
console.log('\n📊 View your leads in: ' + '/Users/jakeshore/.clawdbot/workspace/reonomy-leads.json');
})
.catch(error => {
log('\n💥 Scraper failed: ' + error.message);
process.exit(1);
});