289 lines
8.8 KiB
JavaScript
289 lines
8.8 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Reonomy Lead Scraper - Fixed Version
|
|
*
|
|
* Focus: Capture ANY available data without Google Sheets dependency
|
|
*/
|
|
|
|
const puppeteer = require('puppeteer');
|
|
const fs = require('fs');
|
|
const { execSync } = require('child_process');
|
|
|
|
// Configuration
|
|
const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com';
|
|
const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532';
|
|
const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'New York, NY';
|
|
const MAX_LEADS = 2; // Just scrape 2 owner pages as user requested
|
|
|
|
// Validate credentials
|
|
if (!REONOMY_EMAIL || !REONOMY_PASSWORD) {
|
|
console.error('❌ Error: REONOMY_EMAIL and REONOMY_PASSWORD environment variables are required.');
|
|
console.error(' Set them like:');
|
|
console.error(` REONOMY_EMAIL="your@email.com"`);
|
|
console.error(` REONOMY_PASSWORD="yourpassword"`);
|
|
console.error(' Or run: REONOMY_EMAIL="your@email.com" REONOMY_PASSWORD="yourpassword" node reonomy-scraper.js');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Log file
|
|
const LOG_FILE = '/Users/jakeshore/.clawdbot/workspace/reonomy-fixed.log';
|
|
|
|
function log(message) {
|
|
const timestamp = new Date().toISOString();
|
|
const logMessage = `[${timestamp}] ${message}`;
|
|
console.log(message);
|
|
fs.appendFileSync(LOG_FILE, logMessage);
|
|
}
|
|
|
|
function sleep(ms) {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
/**
|
|
* Main scraper function
|
|
*/
|
|
async function scrapeLeads() {
|
|
log('🚀 Starting Reonomy Lead Scraper (Fixed Version)...\n');
|
|
|
|
const browser = await puppeteer.launch({
|
|
headless: process.env.HEADLESS === 'true' ? 'new' : false,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080']
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
|
|
let leads = [];
|
|
const scrapeDate = new Date().toISOString().split('T')[0];
|
|
|
|
try {
|
|
// Step 1: Get or create sheet
|
|
log('\n📍 Step 1: Preparing data collection...');
|
|
const sheetId = 'local-json';
|
|
log(`💾 Will save leads to: reonomy-leads.json`);
|
|
|
|
// Step 2: Login
|
|
log('\n📍 Step 2: Logging into Reonomy...');
|
|
await page.goto('https://app.reonomy.com/#!/account', {
|
|
waitUntil: 'domcontentloaded',
|
|
timeout: 60000
|
|
});
|
|
|
|
await sleep(2000);
|
|
|
|
// Fill credentials
|
|
await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 });
|
|
await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 });
|
|
|
|
// Submit login
|
|
await page.click('button[type="submit"]');
|
|
log('⏳ Logging in...');
|
|
|
|
// Wait for redirect
|
|
await sleep(8000);
|
|
|
|
// Check if we're logged in
|
|
const currentUrl = page.url();
|
|
if (currentUrl.includes('login') || currentUrl.includes('auth')) {
|
|
throw new Error('Login failed. Please check credentials.');
|
|
}
|
|
|
|
log('✅ Successfully logged in!');
|
|
|
|
// Step 3: Find owner links
|
|
log('\n📍 Step 3: Finding owner links...');
|
|
const ownerLinks = await page.evaluate(() => {
|
|
const links = [];
|
|
const linkElements = document.querySelectorAll('a[href*="/person/"]');
|
|
|
|
linkElements.forEach(link => {
|
|
const href = link.getAttribute('href');
|
|
if (href && href.includes('/person/')) {
|
|
links.push({
|
|
ownerUrl: href,
|
|
ownerId: href.split('/').pop()
|
|
});
|
|
}
|
|
});
|
|
|
|
return links.slice(0, MAX_LEADS);
|
|
});
|
|
|
|
log(`👤 Found ${ownerLinks.length} owner links`);
|
|
|
|
// Step 4: Extract data from owner pages
|
|
log('\n📍 Step 4: Extracting contact info from owner pages...');
|
|
|
|
for (let i = 0; i < ownerLinks.length && i < MAX_LEADS; i++) {
|
|
const ownerUrl = ownerLinks[i].ownerUrl;
|
|
log(`\n[${i + 1}/${ownerLinks.length}] Visiting owner: ${ownerUrl}`);
|
|
|
|
await page.goto(ownerUrl, {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 30000
|
|
});
|
|
|
|
await sleep(3000);
|
|
|
|
// Extract ANY data available (owner name, phone, location, property count)
|
|
const data = await page.evaluate(() => {
|
|
const result = {
|
|
scrapeDate,
|
|
ownerName: '',
|
|
email: '',
|
|
phone: '',
|
|
ownerName: '',
|
|
propertyAddress: '',
|
|
city: '',
|
|
state: '',
|
|
zip: '',
|
|
propertyType: '',
|
|
squareFootage: '',
|
|
ownerLocation: '',
|
|
propertyCount: '',
|
|
ownerUrl: ownerUrl,
|
|
ownerUrl: ''
|
|
};
|
|
|
|
// Try to find owner name
|
|
const nameSelectors = [
|
|
'[data-person-id="people-contact-phone-1"]',
|
|
'[data-person-id="people-contact-phone-2"]',
|
|
'[data-person-id="people-contact-phone-3"]',
|
|
'.owner-name',
|
|
'h1', '.h2', 'h3'
|
|
];
|
|
|
|
for (const selector of nameSelectors) {
|
|
const el = document.querySelector(selector);
|
|
if (el) {
|
|
result.ownerName = el.textContent?.trim() || '';
|
|
if (result.ownerName) break;
|
|
}
|
|
}
|
|
|
|
// Try to find phone
|
|
const phoneSelectors = [
|
|
'[data-person-id="people-contact-phone-1"]',
|
|
'[data-person-id="people-contact-phone-2"]',
|
|
'[data-person-id="people-contact-phone-3"]',
|
|
'a[href^="tel:"]',
|
|
'.phone-number'
|
|
];
|
|
|
|
for (const selector of phoneSelectors) {
|
|
const el = document.querySelector(selector);
|
|
if (el) {
|
|
// Try to get phone from various attributes
|
|
const phoneValue =
|
|
el.getAttribute('data-value') ||
|
|
el.textContent ||
|
|
el.getAttribute('href')?.replace(/^tel:/, '');
|
|
|
|
if (phoneValue) {
|
|
result.phone = phoneValue;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Try to find owner location
|
|
const locationSelectors = [
|
|
'[data-person-id="people-contact-phone-1"]',
|
|
'[data-person-id="people-contact-phone-2"]',
|
|
'[data-person-id="people-contact-phone-3"]'
|
|
];
|
|
|
|
for (const selector of locationSelectors) {
|
|
const el = document.querySelector(selector);
|
|
if (el) {
|
|
result.ownerLocation = el.textContent?.trim() || '';
|
|
if (result.ownerLocation) break;
|
|
}
|
|
}
|
|
|
|
// Try to find property count
|
|
const countSelectors = [
|
|
'[data-person-id="people-contact-phone-1"]',
|
|
'[data-person-id="people-contact-phone-2"]',
|
|
'[data-person-id="people-contact-phone-3"]'
|
|
];
|
|
|
|
for (const selector of countSelectors) {
|
|
const el = document.querySelector(selector);
|
|
if (el) {
|
|
result.propertyCount = el.textContent?.trim() || '';
|
|
if (result.propertyCount) break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
});
|
|
|
|
if (data.ownerName || data.phone || data.propertyCount || data.ownerLocation || data.propertyType || data.squareFootage || data.propertyUrl) {
|
|
// We got at least some data!
|
|
log(` ✅ Collected: ${data.ownerName || 'Owner info'} - ${data.ownerLocation || data.propertyAddress || data.propertyType || data.squareFootage || data.propertyAddress}`);
|
|
leads.push(data);
|
|
}
|
|
|
|
return leads;
|
|
}
|
|
|
|
log(`\n✅ Found ${leads.length} total leads`);
|
|
|
|
// Step 5: Save to JSON file
|
|
log('\n📍 Step 5: Saving leads to JSON file...');
|
|
|
|
const filename = '/Users/jakeshore/.clawdbot/workspace/reonomy-leads.json';
|
|
const data = {
|
|
scrapeDate,
|
|
leadCount: leads.length,
|
|
location: SEARCH_LOCATION,
|
|
leads: leads
|
|
};
|
|
|
|
try {
|
|
fs.writeFileSync(filename, JSON.stringify(data, null, 2));
|
|
log('💾 Saved leads to ' + filename);
|
|
} catch (error) {
|
|
log('❌ Error saving to JSON: ' + error.message);
|
|
}
|
|
|
|
log('\n✅ Scraping complete!');
|
|
log('📝 Log file: ' + LOG_FILE);
|
|
log('📊 Total leads collected: ' + leads.length);
|
|
|
|
return { sheetId, leadCount: leads.length };
|
|
|
|
} catch (error) {
|
|
log('\n❌ Error: ' + error.message);
|
|
log(error.stack);
|
|
|
|
// Save error screenshot
|
|
try {
|
|
await page.screenshot({ path: '/tmp/reonomy-fixed-error.png', fullPage: true });
|
|
log('📸 Error screenshot saved: /tmp/reonomy-fixed-error.png');
|
|
} catch (e) {
|
|
// Ignore screenshot errors
|
|
}
|
|
|
|
} finally {
|
|
await browser.close();
|
|
log('\n🔚 Browser closed');
|
|
}
|
|
|
|
process.exit(0);
|
|
}
|
|
|
|
// Run scraper
|
|
scrapeLeads()
|
|
.then(result => {
|
|
log('\n🎉 Success! ' + result.leadCount + ' leads scraped.');
|
|
console.log('\n📊 View your leads in: ' + '/Users/jakeshore/.clawdbot/workspace/reonomy-leads.json');
|
|
})
|
|
.catch(error => {
|
|
log('\n💥 Scraper failed: ' + error.message);
|
|
process.exit(1);
|
|
});
|