clawdbot-workspace/reonomy-scraper-v9-working.js

321 lines
9.4 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');
const REONOMY_EMAIL = process.env.REONOMY_EMAIL || 'henry@realestateenhanced.com';
const REONOMY_PASSWORD = process.env.REONOMY_PASSWORD || '9082166532';
const SEARCH_ID = process.env.REONOMY_SEARCH_ID || '504a2d13-d88f-4213-9ac6-a7c8bc7c20c6';
const MAX_PROPERTIES = parseInt(process.env.MAX_PROPERTIES) || 20;
const HEADLESS = process.env.HEADLESS !== 'false';
const OUTPUT_FILE = path.join(__dirname, 'reonomy-leads-v9-working.json');
const LOG_FILE = path.join(__dirname, 'reonomy-scraper-v9-working.log');
function log(message) {
const timestamp = new Date().toISOString();
const logMessage = `[${timestamp}] ${message}\n`;
console.log(message);
fs.appendFileSync(LOG_FILE, logMessage);
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function extractOwnerTabData(page) {
log('📊 Extracting Owner tab data...');
const propIdMatch = page.url().match(/property\/([a-f0-9-]+)/);
const propertyId = propIdMatch ? propIdMatch[1] : '';
const headingSelectors = ['h1', 'h2', 'h3'];
let propertyAddress = '';
let city = '';
let state = '';
let zip = '';
for (const sel of headingSelectors) {
const heading = await page.$(sel);
if (heading) {
const text = (await page.evaluate(el => el.textContent, heading)).trim();
const addressMatch = text.match(/^(\d+[^,]+),\s*([A-Za-z\s,]+),\s*([A-Z]{2})\s*(\d{5})/);
if (addressMatch) {
propertyAddress = addressMatch[0];
city = addressMatch[1]?.trim() || '';
state = addressMatch[2]?.trim() || '';
zip = addressMatch[3]?.trim() || '';
log(` 📍 Address: ${text}`);
break;
}
}
}
const bodyText = await page.evaluate(() => {
return {
emails: [],
phones: [],
ownerNames: [],
pageTitle: document.title,
bodyTextSample: ''
};
});
const bodyTextContent = JSON.parse(bodyText).result || '';
const sfMatch = bodyTextContent.match(/(\d+\.?\d*\s*k?\s*SF)/i);
const squareFootage = sfMatch ? sfMatch[0] : '';
const typePatterns = [
'Warehouse', 'Office Building', 'Retail Stores', 'Industrial',
'General Industrial', 'Medical Building', 'School', 'Religious',
'Supermarket', 'Financial Building'
];
let propertyType = '';
for (const type of typePatterns) {
if (bodyTextContent.includes(type)) {
propertyType = type;
log(` 🏢 Property Type: ${type}`);
break;
}
}
const ownerPatterns = [
/Owns\s+(\d+)\s+properties?\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+(?:\s+(?:LLC|LLP|Inc|Corp|Co|Ltd|Partners|Housing|Properties|Realty|Estate|Investments|Management))/g,
/Owns\s+(\d+)\s+properties?\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+(?:\s+(?:LLC|LLP|Inc|Corp|Co|Ltd|Partners|Housing|Properties|Realty|Estate|Investments|Management))/i
];
let ownerNames = [];
for (const pattern of ownerPatterns) {
const matches = bodyTextContent.match(pattern);
if (matches) {
matches.forEach(m => {
const owner = typeof m === 'string' ? m : m[1];
if (owner && owner.length > 3 && !ownerNames.includes(owner)) {
ownerNames.push(owner);
}
});
}
}
const ownerData = {
propertyId: propertyId,
propertyAddress: propertyAddress,
city: city,
state: state,
zip: zip,
squareFootage: squareFootage,
propertyType: propertyType,
emails: [],
phones: [],
ownerNames: ownerNames
};
log(` 👤 Owners found: ${ownerNames.length}`);
return ownerData;
}
async function scrapeLeads() {
log('🚀 Starting Reonomy Scraper v9.1 (FIXED EDITION)...\n');
const browser = await puppeteer.launch({
headless: HEADLESS ? 'new' : false,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080']
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
try {
log('\n🔐 Step 1: Logging into Reonomy...\n');
await page.goto('https://app.reonomy.com/#!/account', {
waitUntil: 'domcontentloaded',
timeout: 60000
});
await sleep(2000);
await page.type('input[type="email"]', REONOMY_EMAIL, { delay: 100 });
await page.type('input[type="password"]', REONOMY_PASSWORD, { delay: 100 });
await page.click('button[type="submit"]');
log('⏳ Waiting for login...');
await sleep(15000);
const url = page.url();
if (url.includes('login') || url.includes('auth')) {
throw new Error('Login failed. Please check credentials.');
}
log('✅ Successfully logged in!');
log('\n📍 Step 2: Navigating to search...\n');
await page.goto(`https://app.reonomy.com/#!/search/${SEARCH_ID}`, {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
const urlMatch = page.url().match(/search\/([a-f0-9-]+)/);
if (!urlMatch) {
throw new Error('Could not extract search ID from URL');
}
const searchId = urlMatch[1];
log(`✅ Search ID: ${searchId}`);
log('\n📍 Step 3: Extracting property IDs...\n');
const propertyIds = await page.evaluate(() => {
const ids = [];
const links = document.querySelectorAll('a[href*="/property/"]');
links.forEach(link => {
const href = link.href;
const match = href.match(/property\/([a-f0-9-]+)/);
if (match) {
ids.push({
id: match[1],
url: `https://app.reonomy.com/#!/search/${window.location.href.split('/')[4]}/property/${match[1]}`
});
}
});
return ids;
});
log(`✅ Found ${propertyIds.length} property IDs`);
if (propertyIds.length === 0) {
log('⚠️ No property IDs found.');
throw new Error('No properties found on search page.');
}
const propertiesToScrape = propertyIds.slice(0, MAX_PROPERTIES);
log(`\n📍 Step 4: Processing ${propertiesToScrape.length} properties...\n`);
const leads = [];
for (let i = 0; i < propertiesToScrape.length; i++) {
const prop = propertiesToScrape[i];
log(`\n[${i + 1}/${propertiesToScrape.length}] Property ID: ${prop.id}`);
log(` 🔗 Clicking property...`);
const clicked = await page.evaluateHandle((propData) => {
const buttons = Array.from(document.querySelectorAll('button'));
const target = buttons.find(b => {
const link = b.querySelector('a[href*="/property/"]');
return link && link.href.includes(propData.id);
});
if (target) {
target.scrollIntoView({ behavior: 'smooth', block: 'center' });
target.click();
return { clicked: true };
}
}, { id: prop.id }).catch(() => {
return { clicked: false };
});
if (!clicked.clicked) {
log(` ⚠️ Could not click property, trying to navigate directly...`);
await page.goto(prop.url, {
waitUntil: 'networkidle2',
timeout: 30000
});
}
log(` ⏳ Waiting for Owner tab to load...`);
await sleep(8000);
log(` 📊 Extracting data from Owner tab...`);
const propertyData = await extractOwnerTabData(page);
const lead = {
scrapeDate: new Date().toISOString().split('T')[0],
propertyId: prop.id,
propertyUrl: page.url(),
address: propertyData.propertyAddress || '',
city: propertyData.city || '',
state: propertyData.state || '',
zip: propertyData.zip || '',
squareFootage: propertyData.squareFootage || '',
propertyType: propertyData.propertyType || '',
ownerNames: propertyData.ownerNames.join('; ') || '',
emails: propertyData.emails,
phones: propertyData.phones
};
log(` 📧 Emails: ${propertyData.emails.length}`);
log(` 📞 Phones: ${propertyData.phones.length}`);
log(` 👤 Owners: ${propertyData.ownerNames.length}`);
log(` 📍 Address: ${propertyData.propertyAddress || 'N/A'}`);
leads.push(lead);
log(` 🔙 Going back to search results...`);
await page.goto(`https://app.reonomy.com/#!/search/${searchId}`, {
waitUntil: 'networkidle2',
timeout: 30000
});
await sleep(3000);
}
if (leads.length > 0) {
log(`\n✅ Total leads scraped: ${leads.length}`);
const outputData = {
scrapeDate: new Date().toISOString(),
searchId: searchId,
leadCount: leads.length,
leads: leads
};
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(outputData, null, 2));
log(`💾 Saved to: ${OUTPUT_FILE}`);
} else {
log('\n⚠ No leads scraped.');
}
log('\n✅ Scraping complete!');
return { leadCount: leads.length, outputFile: OUTPUT_FILE };
} catch (error) {
log(`\n❌ Error: ${error.message}`);
log(error.stack);
try {
await page.screenshot({ path: '/tmp/reonomy-v9-error.png', fullPage: true });
log('📸 Error screenshot saved: /tmp/reonomy-v9-error.png');
} catch (e) {}
throw error;
} finally {
await browser.close();
log('\n🔚 Browser closed');
process.exit(1);
}
}
scrapeLeads()
.then(result => {
log(`\n🎉 Success! ${result.leadCount} leads scraped.`);
console.log(`\n💾 View your leads at: ${result.outputFile}`);
process.exit(0);
})
.catch(error => {
log(`\n💥 Scraper failed: ${error.message}`);
process.exit(1);
});