#!/bin/bash # # Quick validation script for the Reonomy scraper update # set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRAPER_FILE="$SCRIPT_DIR/reonomy-scraper.js" # Color codes GREEN='\033[0;32m' RED='\033[0;31m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' log_info() { echo -e "${BLUE}ℹ️ $1${NC}"; } log_success() { echo -e "${GREEN}✅ $1${NC}"; } log_error() { echo -e "${RED}❌ $1${NC}"; } log_warning() { echo -e "${YELLOW}⚠️ $1${NC}"; } echo "🔍 Reonomy Scraper Validation" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo # Check if scraper file exists if [ ! -f "$SCRAPER_FILE" ]; then log_error "Scraper file not found: $SCRAPER_FILE" exit 1 fi log_success "Scraper file found" # Check Node.js syntax log_info "Checking Node.js syntax..." if node --check "$SCRAPER_FILE" 2>/dev/null; then log_success "Syntax is valid" else log_error "Syntax errors found" node --check "$SCRAPER_FILE" exit 1 fi # Check for new functions log_info "Checking for new extraction functions..." if grep -q "extractPropertyContactInfo" "$SCRAPER_FILE"; then log_success "extractPropertyContactInfo function found" else log_error "extractPropertyContactInfo function missing" exit 1 fi if grep -q "extractOwnerContactInfo" "$SCRAPER_FILE"; then log_success "extractOwnerContactInfo function found" else log_error "extractOwnerContactInfo function missing" exit 1 fi if grep -q "extractLinksFromPage" "$SCRAPER_FILE"; then log_success "extractLinksFromPage function found" else log_error "extractLinksFromPage function missing" exit 1 fi # Check for rate limiting configuration log_info "Checking rate limiting configuration..." if grep -q "MAX_PROPERTIES" "$SCRAPER_FILE"; then log_success "MAX_PROPERTIES limit configured" else log_warning "MAX_PROPERTIES limit not found" fi if grep -q "MAX_OWNERS" "$SCRAPER_FILE"; then log_success "MAX_OWNERS limit configured" else log_warning "MAX_OWNERS limit not found" fi if grep -q "PAGE_DELAY_MS" "$SCRAPER_FILE"; then log_success "PAGE_DELAY_MS configured" else log_warning "PAGE_DELAY_MS not found" fi # Check for email/phone extraction patterns log_info "Checking contact extraction patterns..." email_patterns=( 'a\[href\^="mailto:"\]' '\.email' '\[a-zA-Z0-9._%+-]+@\[a-zA-Z0-9.-]+\.\[a-zA-Z\]{2,\}' ) phone_patterns=( 'a\[href\^="tel:"\]' '\.phone' '\(\?\d{3}\)\)?\[-.\s\]?\(\d{3}\)\[-.\s\]?\(\d{4}\)' ) for pattern in "${email_patterns[@]}"; do if grep -q "$pattern" "$SCRAPER_FILE"; then log_success "Email extraction pattern found: $pattern" break fi done for pattern in "${phone_patterns[@]}"; do if grep -q "$pattern" "$SCRAPER_FILE"; then log_success "Phone extraction pattern found: $pattern" break fi done # Check main scraper loop log_info "Checking main scraper loop..." if grep -q "visit each property page" "$SCRAPER_FILE"; then log_success "Property page scraping logic found" else log_warning "Property page scraping comment not found (may be present with different wording)" fi if grep -q "visit each owner page" "$SCRAPER_FILE"; then log_success "Owner page scraping logic found" else log_warning "Owner page scraping comment not found (may be present with different wording)" fi # Show configuration values log_info "Current configuration:" echo grep -E "^(const|let).*=.*//.*limit" "$SCRAPER_FILE" | sed 's/^/ /' || true grep -E "^(const|let).*=.*PAGE_DELAY_MS" "$SCRAPER_FILE" | sed 's/^/ /' || true echo # Check dependencies log_info "Checking dependencies..." if command -v node &> /dev/null; then NODE_VERSION=$(node --version) log_success "Node.js installed: $NODE_VERSION" else log_error "Node.js not found" exit 1 fi if [ -f "$SCRIPT_DIR/package.json" ]; then log_success "package.json found" else log_warning "package.json not found (npm install may be needed)" fi if [ -d "$SCRIPT_DIR/node_modules/puppeteer" ]; then log_success "puppeteer installed" else log_warning "puppeteer not found - run: npm install puppeteer" fi echo echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" log_success "All validation checks passed!" echo log_info "To run the scraper:" echo " cd $SCRIPT_DIR" echo " ./scrape-reonomy.sh --location 'New York, NY'" echo log_info "Or with credentials:" echo " export REONOMY_EMAIL='your@email.com'" echo " export REONOMY_PASSWORD='yourpassword'" echo " node reonomy-scraper.js" echo