324 lines
8.5 KiB
JavaScript
324 lines
8.5 KiB
JavaScript
#!/usr/bin/env node
|
||
|
||
/**
|
||
* Reonomy Lead Scraper - Working JSON Fallback Version
|
||
*
|
||
* Extracts property and owner leads from Reonomy dashboard/search
|
||
* and saves to JSON (no Google Sheets dependency).
|
||
*/
|
||
|
||
const puppeteer = require('puppeteer');
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
// Configuration
|
||
const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com';
|
||
const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532';
|
||
const SEARCH_LOCATION = process.env.REONOMY_LOCATION || 'New York, NY';
|
||
const HEADLESS = process.env.HEADLESS === 'true';
|
||
|
||
// Output file
|
||
const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads.json');
|
||
const LOG_FILE = path.join(__dirname, 'reonomy-scraper.log');
|
||
|
||
function log(message) {
|
||
const timestamp = new Date().toISOString();
|
||
const logMessage = `[${timestamp}] ${message}\n`;
|
||
console.log(message);
|
||
fs.appendFileSync(LOG_FILE, logMessage);
|
||
}
|
||
|
||
function sleep(ms) {
|
||
return new Promise(resolve => setTimeout(resolve, ms));
|
||
}
|
||
|
||
/**
|
||
* Save leads to JSON file
|
||
*/
|
||
function saveLeads(leads) {
|
||
const data = {
|
||
scrapeDate: new Date().toISOString(),
|
||
leadCount: leads.length,
|
||
location: SEARCH_LOCATION,
|
||
leads: leads
|
||
};
|
||
|
||
try {
|
||
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(data, null, 2));
|
||
log(`💾 Saved ${leads.length} leads to ${OUTPUT_FILE}`);
|
||
return OUTPUT_FILE;
|
||
} catch (error) {
|
||
log(`❌ Error saving to JSON: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Extract properties from page
|
||
*/
|
||
async function extractProperties(page) {
|
||
log('🔍 Extracting property data...');
|
||
|
||
const properties = await page.evaluate(() => {
|
||
const results = [];
|
||
|
||
const propertyLinks = Array.from(document.querySelectorAll('a[href*="/property/"]'));
|
||
|
||
propertyLinks.forEach(link => {
|
||
const text = (link.innerText || link.textContent || '').trim();
|
||
|
||
const addressMatch = text.match(/^(\d+.+),\s*([A-Za-z\s]+),\s*([A-Z]{2})\s*(\d{5})/);
|
||
|
||
if (addressMatch) {
|
||
results.push({
|
||
fullText: text,
|
||
address: addressMatch[1].trim(),
|
||
city: addressMatch[2].trim(),
|
||
state: addressMatch[3].trim(),
|
||
zip: addressMatch[4].trim(),
|
||
url: link.href,
|
||
remainingText: text.substring(addressMatch[0].length).trim()
|
||
});
|
||
}
|
||
});
|
||
|
||
return results;
|
||
});
|
||
|
||
const scrapeDate = new Date().toISOString().split('T')[0];
|
||
const leads = [];
|
||
|
||
for (const prop of properties) {
|
||
const sqFtMatch = prop.remainingText.match(/(\d+\.?\d*)\s*k?\s*SF/i);
|
||
const sqFt = sqFtMatch ? sqFtMatch[0] : '';
|
||
const propertyType = prop.remainingText.replace(sqFt, '').trim() || '';
|
||
|
||
const lead = {
|
||
scrapeDate,
|
||
ownerName: '',
|
||
propertyAddress: prop.address,
|
||
city: prop.city,
|
||
state: prop.state,
|
||
zip: prop.zip,
|
||
propertyType,
|
||
squareFootage: sqFt,
|
||
ownerLocation: '',
|
||
propertyCount: '',
|
||
propertyUrl: prop.url,
|
||
ownerUrl: '',
|
||
email: '',
|
||
phone: ''
|
||
};
|
||
|
||
leads.push(lead);
|
||
}
|
||
|
||
log(`✅ Extracted ${leads.length} properties`);
|
||
return leads;
|
||
}
|
||
|
||
/**
|
||
* Extract owners from page
|
||
*/
|
||
async function extractOwners(page) {
|
||
log('🔍 Extracting owner data...');
|
||
|
||
const owners = await page.evaluate(() => {
|
||
const results = [];
|
||
|
||
const ownerLinks = Array.from(document.querySelectorAll('a[href*="/person/"]'));
|
||
|
||
ownerLinks.forEach(link => {
|
||
const text = (link.innerText || link.textContent || '').trim();
|
||
|
||
const lines = text.split('\n').map(l => l.trim()).filter(l => l);
|
||
|
||
if (lines.length >= 2) {
|
||
const ownerName = lines[0];
|
||
const location = lines.find(l => l.includes(',')) || '';
|
||
const propertyCountMatch = text.match(/(\d+)\s*propert/i);
|
||
const propertyCount = propertyCountMatch ? propertyCountMatch[1] : '';
|
||
|
||
results.push({
|
||
ownerName,
|
||
location,
|
||
propertyCount,
|
||
url: link.href,
|
||
fullText: text
|
||
});
|
||
}
|
||
});
|
||
|
||
return results;
|
||
});
|
||
|
||
const scrapeDate = new Date().toISOString().split('T')[0];
|
||
const leads = [];
|
||
|
||
for (const owner of owners) {
|
||
let city = '';
|
||
let state = '';
|
||
let ownerLocation = owner.location;
|
||
|
||
if (ownerLocation.includes(',')) {
|
||
const parts = ownerLocation.split(',').map(p => p.trim());
|
||
|
||
if (parts.length >= 2 && /^[A-Z]{2}$/.test(parts[parts.length - 1])) {
|
||
state = parts[parts.length - 1];
|
||
const cityWithPrefix = parts[parts.length - 2];
|
||
const cityMatch = cityWithPrefix.match(/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)$/);
|
||
city = cityMatch ? cityMatch[1] : '';
|
||
} else if (parts.length === 2) {
|
||
city = parts[0];
|
||
state = parts[1];
|
||
}
|
||
}
|
||
|
||
const lead = {
|
||
scrapeDate,
|
||
ownerName: owner.ownerName,
|
||
propertyAddress: '',
|
||
city,
|
||
state,
|
||
zip: '',
|
||
propertyType: '',
|
||
squareFootage: '',
|
||
ownerLocation: owner.location,
|
||
propertyCount: owner.propertyCount,
|
||
propertyUrl: '',
|
||
ownerUrl: owner.url,
|
||
email: '',
|
||
phone: ''
|
||
};
|
||
|
||
leads.push(lead);
|
||
}
|
||
|
||
log(`✅ Extracted ${leads.length} owners`);
|
||
return leads;
|
||
}
|
||
|
||
/**
|
||
* Main scraper
|
||
*/
|
||
async function scrapeLeads() {
|
||
log('🚀 Starting Reonomy Lead Scraper (JSON Fallback Mode)...\n');
|
||
|
||
const browser = await puppeteer.launch({
|
||
headless: HEADLESS ? 'new' : false,
|
||
args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080']
|
||
});
|
||
|
||
const page = await browser.newPage();
|
||
await page.setViewport({ width: 1920, height: 1080 });
|
||
|
||
try {
|
||
// Login
|
||
log('\n📍 Step 1: Logging into Reonomy...');
|
||
await page.goto('https://app.reonomy.com/#!/account', {
|
||
waitUntil: 'domcontentloaded',
|
||
timeout: 60000
|
||
});
|
||
|
||
await sleep(2000);
|
||
|
||
await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 });
|
||
await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 });
|
||
|
||
await page.click('button[type="submit"]');
|
||
log('⏳ Logging in...');
|
||
|
||
await sleep(8000);
|
||
|
||
const url = page.url();
|
||
if (url.includes('login') || url.includes('auth')) {
|
||
throw new Error('Login failed. Please check credentials.');
|
||
}
|
||
|
||
log('✅ Successfully logged in!');
|
||
|
||
// Navigate to search
|
||
log('\n📍 Step 2: Navigating to search...');
|
||
await page.goto('https://app.reonomy.com/#!/search', {
|
||
waitUntil: 'networkidle2',
|
||
timeout: 60000
|
||
});
|
||
|
||
await sleep(3000);
|
||
log('✅ On search page');
|
||
|
||
// Search
|
||
log(`\n📍 Step 3: Searching for: ${SEARCH_LOCATION}...`);
|
||
|
||
const searchInput = await page.waitForSelector('input[placeholder*="address"], input[placeholder*="Search"]', {
|
||
timeout: 10000
|
||
}).catch(() => {
|
||
return page.waitForSelector('input[type="text"]', { timeout: 5000 });
|
||
});
|
||
|
||
if (searchInput) {
|
||
await searchInput.click({ clickCount: 3 });
|
||
await searchInput.type(SEARCH_LOCATION, { delay: 100 });
|
||
await sleep(1000);
|
||
await page.keyboard.press('Enter');
|
||
log('⏳ Searching...');
|
||
await sleep(5000);
|
||
}
|
||
|
||
// Extract leads
|
||
log('\n📍 Step 4: Extracting lead data...');
|
||
const allLeads = [];
|
||
|
||
const properties = await extractProperties(page);
|
||
allLeads.push(...properties);
|
||
|
||
const owners = await extractOwners(page);
|
||
allLeads.push(...owners);
|
||
|
||
log(`\n✅ Total leads extracted: ${allLeads.length}`);
|
||
|
||
if (allLeads.length === 0) {
|
||
log('\n⚠️ No leads found. Taking screenshot for debugging...');
|
||
await page.screenshot({ path: '/tmp/reonomy-no-leads.png', fullPage: true });
|
||
log('📸 Screenshot saved: /tmp/reonomy-no-leads.png');
|
||
} else {
|
||
// Save to JSON
|
||
log('\n📍 Step 5: Saving leads to JSON file...');
|
||
saveLeads(allLeads);
|
||
}
|
||
|
||
log('\n✅ Scraping complete!');
|
||
log(`💾 Leads saved to: ${OUTPUT_FILE}`);
|
||
log(`📝 Log file: ${LOG_FILE}`);
|
||
|
||
return { leadCount: allLeads.length, outputFile: OUTPUT_FILE };
|
||
|
||
} catch (error) {
|
||
log(`\n❌ Error: ${error.message}`);
|
||
log(error.stack);
|
||
|
||
try {
|
||
await page.screenshot({ path: '/tmp/reonomy-error.png', fullPage: true });
|
||
log('📸 Error screenshot saved: /tmp/reonomy-error.png');
|
||
} catch (e) {}
|
||
|
||
throw error;
|
||
|
||
} finally {
|
||
await browser.close();
|
||
log('\n🔚 Browser closed');
|
||
}
|
||
}
|
||
|
||
// Run
|
||
scrapeLeads()
|
||
.then(result => {
|
||
log(`\n🎉 Success! ${result.leadCount} leads scraped.`);
|
||
console.log(`\n💾 View your leads at: ${result.outputFile}`);
|
||
process.exit(0);
|
||
})
|
||
.catch(error => {
|
||
log(`\n💥 Scraper failed: ${error.message}`);
|
||
process.exit(1);
|
||
});
|