# /// script # requires-python = ">=3.10" # dependencies = [ # "librosa>=0.10.0", # "numpy", # "scipy", # "soundfile", # ] # /// import librosa import numpy as np import sys import json def analyze_track(audio_path): """ Analyze an audio track and extract features for Genre Universe positioning. """ print(f"Loading: {audio_path}") # Load the audio file y, sr = librosa.load(audio_path, sr=22050, duration=180) # First 3 minutes print("Analyzing audio features...") # === TEMPO / RHYTHM === tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) tempo = float(tempo) if not hasattr(tempo, '__len__') else float(tempo[0]) # === ENERGY === rms = librosa.feature.rms(y=y)[0] energy = float(np.mean(rms)) energy_std = float(np.std(rms)) # Dynamic range indicator # === SPECTRAL FEATURES === spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0] brightness = float(np.mean(spectral_centroid)) # Higher = brighter/more electronic spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0] rolloff = float(np.mean(spectral_rolloff)) spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) contrast = float(np.mean(spectral_contrast)) # === ZERO CROSSING RATE (percussiveness) === zcr = librosa.feature.zero_crossing_rate(y)[0] percussiveness = float(np.mean(zcr)) # === MFCC (timbral texture) === mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs] # === CHROMA (harmonic content) === chroma = librosa.feature.chroma_stft(y=y, sr=sr) chroma_mean = float(np.mean(chroma)) # === ONSET STRENGTH (rhythmic density) === onset_env = librosa.onset.onset_strength(y=y, sr=sr) rhythmic_density = float(np.mean(onset_env)) # === HARMONIC/PERCUSSIVE SEPARATION === y_harmonic, y_percussive = librosa.effects.hpss(y) harmonic_ratio = float(np.sum(np.abs(y_harmonic)) / (np.sum(np.abs(y)) + 1e-6)) # === NORMALIZE TO 0-1 SCALES === # These normalizations are rough estimates based on typical ranges # Tempo: 60-180 BPM typical range tempo_normalized = np.clip((tempo - 60) / 120, 0, 1) # Energy: RMS typically 0.01-0.3 energy_normalized = np.clip(energy / 0.2, 0, 1) # Brightness: spectral centroid typically 1000-4000 Hz brightness_normalized = np.clip((brightness - 1000) / 3000, 0, 1) # Organic vs Electronic (inverse of brightness + harmonic ratio) organic_score = np.clip(harmonic_ratio * 1.5 - brightness_normalized * 0.3, 0, 1) # Valence estimation (very rough - higher brightness + major key tendencies = happier) # This is a simplification - real valence detection is complex valence_estimate = np.clip(0.3 + brightness_normalized * 0.3 + chroma_mean * 0.2, 0, 1) # Danceability (tempo in sweet spot + strong beats + rhythmic density) dance_tempo_factor = 1 - abs(tempo - 120) / 60 # Peak at 120 BPM danceability = np.clip(dance_tempo_factor * 0.5 + rhythmic_density * 0.3 + energy_normalized * 0.2, 0, 1) results = { "raw_features": { "tempo_bpm": round(tempo, 1), "energy_rms": round(energy, 4), "energy_std": round(energy_std, 4), "brightness_hz": round(brightness, 1), "spectral_rolloff": round(rolloff, 1), "spectral_contrast": round(contrast, 2), "percussiveness": round(percussiveness, 4), "rhythmic_density": round(rhythmic_density, 2), "harmonic_ratio": round(harmonic_ratio, 3), "chroma_mean": round(chroma_mean, 3), }, "genre_universe_position": { "valence": round(float(valence_estimate), 2), "tempo": round(float(tempo_normalized), 2), "organic": round(float(organic_score), 2), }, "genre_universe_spikes": { "energy": round(float(energy_normalized), 2), "acousticness": round(float(organic_score * 0.8), 2), "danceability": round(float(danceability), 2), "production_density": round(float(1 - harmonic_ratio + energy_std * 5), 2), }, "insights": { "tempo_feel": "slow" if tempo < 90 else "medium" if tempo < 130 else "fast", "energy_level": "low" if energy_normalized < 0.33 else "medium" if energy_normalized < 0.66 else "high", "sonic_character": "organic/warm" if organic_score > 0.6 else "electronic/bright" if organic_score < 0.4 else "balanced", } } return results if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python analyze_track.py ") sys.exit(1) audio_path = sys.argv[1] results = analyze_track(audio_path) print("\n" + "="*60) print("GENRE UNIVERSE AUDIO ANALYSIS") print("="*60) print("\nšŸ“Š RAW AUDIO FEATURES:") for key, value in results["raw_features"].items(): print(f" {key}: {value}") print("\nšŸŽÆ GENRE UNIVERSE POSITION (0-1 scale):") pos = results["genre_universe_position"] print(f" X (Valence/Mood): {pos['valence']} {'← sad' if pos['valence'] < 0.4 else '→ happy' if pos['valence'] > 0.6 else '~ neutral'}") print(f" Y (Tempo): {pos['tempo']} {'← slow' if pos['tempo'] < 0.4 else '→ fast' if pos['tempo'] > 0.6 else '~ medium'}") print(f" Z (Organic): {pos['organic']} {'← electronic' if pos['organic'] < 0.4 else '→ organic' if pos['organic'] > 0.6 else '~ balanced'}") print("\n⚔ SPIKE VALUES (0-1 scale):") for key, value in results["genre_universe_spikes"].items(): bar = "ā–ˆ" * int(value * 20) + "ā–‘" * (20 - int(value * 20)) print(f" {key:20} [{bar}] {value}") print("\nšŸ’” INSIGHTS:") for key, value in results["insights"].items(): print(f" {key}: {value}") print("\n" + "="*60) # Also output JSON for programmatic use print("\nšŸ“„ JSON OUTPUT:") print(json.dumps(results, indent=2))