clawdbot-workspace/genre-viz/analyzer/analyze_track.py

# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "librosa>=0.10.0",
#     "numpy",
#     "scipy",
#     "soundfile",
# ]
# ///

import librosa
import numpy as np
import sys
import json

def analyze_track(audio_path):
    """
    Analyze an audio track and extract features for Genre Universe positioning.
    """
    print(f"Loading: {audio_path}")

    # Load the audio file
    y, sr = librosa.load(audio_path, sr=22050, duration=180)  # First 3 minutes

    print("Analyzing audio features...")

    # === TEMPO / RHYTHM ===
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    tempo = float(tempo) if not hasattr(tempo, '__len__') else float(tempo[0])

    # === ENERGY ===
    rms = librosa.feature.rms(y=y)[0]
    energy = float(np.mean(rms))
    energy_std = float(np.std(rms))  # Dynamic range indicator

    # === SPECTRAL FEATURES ===
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    brightness = float(np.mean(spectral_centroid))  # Higher = brighter/more electronic

    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
    rolloff = float(np.mean(spectral_rolloff))

    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast = float(np.mean(spectral_contrast))

    # === ZERO CROSSING RATE (percussiveness) ===
    zcr = librosa.feature.zero_crossing_rate(y)[0]
    percussiveness = float(np.mean(zcr))

    # === MFCC (timbral texture) ===
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]

    # === CHROMA (harmonic content) ===
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = float(np.mean(chroma))

    # === ONSET STRENGTH (rhythmic density) ===
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    rhythmic_density = float(np.mean(onset_env))

    # === HARMONIC/PERCUSSIVE SEPARATION ===
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    harmonic_ratio = float(np.sum(np.abs(y_harmonic)) / (np.sum(np.abs(y)) + 1e-6))

    # === NORMALIZE TO 0-1 SCALES ===
    # These normalizations are rough estimates based on typical ranges

    # Tempo: 60-180 BPM typical range
    tempo_normalized = np.clip((tempo - 60) / 120, 0, 1)

    # Energy: RMS typically 0.01-0.3
    energy_normalized = np.clip(energy / 0.2, 0, 1)

    # Brightness: spectral centroid typically 1000-4000 Hz
    brightness_normalized = np.clip((brightness - 1000) / 3000, 0, 1)

    # Organic vs Electronic (inverse of brightness + harmonic ratio)
    organic_score = np.clip(harmonic_ratio * 1.5 - brightness_normalized * 0.3, 0, 1)

    # Valence estimation (very rough - higher brightness + major key tendencies = happier)
    # This is a simplification - real valence detection is complex
    valence_estimate = np.clip(0.3 + brightness_normalized * 0.3 + chroma_mean * 0.2, 0, 1)

    # Danceability (tempo in sweet spot + strong beats + rhythmic density)
    dance_tempo_factor = 1 - abs(tempo - 120) / 60  # Peak at 120 BPM
    danceability = np.clip(dance_tempo_factor * 0.5 + rhythmic_density * 0.3 + energy_normalized * 0.2, 0, 1)

    results = {
        "raw_features": {
            "tempo_bpm": round(tempo, 1),
            "energy_rms": round(energy, 4),
            "energy_std": round(energy_std, 4),
            "brightness_hz": round(brightness, 1),
            "spectral_rolloff": round(rolloff, 1),
            "spectral_contrast": round(contrast, 2),
            "percussiveness": round(percussiveness, 4),
            "rhythmic_density": round(rhythmic_density, 2),
            "harmonic_ratio": round(harmonic_ratio, 3),
            "chroma_mean": round(chroma_mean, 3),
        },
        "genre_universe_position": {
            "valence": round(float(valence_estimate), 2),
            "tempo": round(float(tempo_normalized), 2),
            "organic": round(float(organic_score), 2),
        },
        "genre_universe_spikes": {
            "energy": round(float(energy_normalized), 2),
            "acousticness": round(float(organic_score * 0.8), 2),
            "danceability": round(float(danceability), 2),
            "production_density": round(float(1 - harmonic_ratio + energy_std * 5), 2),
        },
        "insights": {
            "tempo_feel": "slow" if tempo < 90 else "medium" if tempo < 130 else "fast",
            "energy_level": "low" if energy_normalized < 0.33 else "medium" if energy_normalized < 0.66 else "high",
            "sonic_character": "organic/warm" if organic_score > 0.6 else "electronic/bright" if organic_score < 0.4 else "balanced",
        }
    }

    return results

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python analyze_track.py <audio_file>")
        sys.exit(1)

    audio_path = sys.argv[1]
    results = analyze_track(audio_path)

    print("\n" + "="*60)
    print("GENRE UNIVERSE AUDIO ANALYSIS")
    print("="*60)

    print("\n📊 RAW AUDIO FEATURES:")
    for key, value in results["raw_features"].items():
        print(f"  {key}: {value}")

    print("\n🎯 GENRE UNIVERSE POSITION (0-1 scale):")
    pos = results["genre_universe_position"]
    print(f"  X (Valence/Mood): {pos['valence']} {'← sad' if pos['valence'] < 0.4 else '→ happy' if pos['valence'] > 0.6 else '~ neutral'}")
    print(f"  Y (Tempo):        {pos['tempo']} {'← slow' if pos['tempo'] < 0.4 else '→ fast' if pos['tempo'] > 0.6 else '~ medium'}")
    print(f"  Z (Organic):      {pos['organic']} {'← electronic' if pos['organic'] < 0.4 else '→ organic' if pos['organic'] > 0.6 else '~ balanced'}")

    print("\n⚡ SPIKE VALUES (0-1 scale):")
    for key, value in results["genre_universe_spikes"].items():
        bar = "█" * int(value * 20) + "░" * (20 - int(value * 20))
        print(f"  {key:20} [{bar}] {value}")

    print("\n💡 INSIGHTS:")
    for key, value in results["insights"].items():
        print(f"  {key}: {value}")

    print("\n" + "="*60)

    # Also output JSON for programmatic use
    print("\n📄 JSON OUTPUT:")
    print(json.dumps(results, indent=2))