158 lines
6.1 KiB
Python

# /// script
# requires-python = ">=3.10"
# dependencies = [
# "librosa>=0.10.0",
# "numpy",
# "scipy",
# "soundfile",
# ]
# ///
import librosa
import numpy as np
import sys
import json
def analyze_track(audio_path):
"""
Analyze an audio track and extract features for Genre Universe positioning.
"""
print(f"Loading: {audio_path}")
# Load the audio file
y, sr = librosa.load(audio_path, sr=22050, duration=180) # First 3 minutes
print("Analyzing audio features...")
# === TEMPO / RHYTHM ===
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
tempo = float(tempo) if not hasattr(tempo, '__len__') else float(tempo[0])
# === ENERGY ===
rms = librosa.feature.rms(y=y)[0]
energy = float(np.mean(rms))
energy_std = float(np.std(rms)) # Dynamic range indicator
# === SPECTRAL FEATURES ===
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
brightness = float(np.mean(spectral_centroid)) # Higher = brighter/more electronic
spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
rolloff = float(np.mean(spectral_rolloff))
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
contrast = float(np.mean(spectral_contrast))
# === ZERO CROSSING RATE (percussiveness) ===
zcr = librosa.feature.zero_crossing_rate(y)[0]
percussiveness = float(np.mean(zcr))
# === MFCC (timbral texture) ===
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
# === CHROMA (harmonic content) ===
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
chroma_mean = float(np.mean(chroma))
# === ONSET STRENGTH (rhythmic density) ===
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
rhythmic_density = float(np.mean(onset_env))
# === HARMONIC/PERCUSSIVE SEPARATION ===
y_harmonic, y_percussive = librosa.effects.hpss(y)
harmonic_ratio = float(np.sum(np.abs(y_harmonic)) / (np.sum(np.abs(y)) + 1e-6))
# === NORMALIZE TO 0-1 SCALES ===
# These normalizations are rough estimates based on typical ranges
# Tempo: 60-180 BPM typical range
tempo_normalized = np.clip((tempo - 60) / 120, 0, 1)
# Energy: RMS typically 0.01-0.3
energy_normalized = np.clip(energy / 0.2, 0, 1)
# Brightness: spectral centroid typically 1000-4000 Hz
brightness_normalized = np.clip((brightness - 1000) / 3000, 0, 1)
# Organic vs Electronic (inverse of brightness + harmonic ratio)
organic_score = np.clip(harmonic_ratio * 1.5 - brightness_normalized * 0.3, 0, 1)
# Valence estimation (very rough - higher brightness + major key tendencies = happier)
# This is a simplification - real valence detection is complex
valence_estimate = np.clip(0.3 + brightness_normalized * 0.3 + chroma_mean * 0.2, 0, 1)
# Danceability (tempo in sweet spot + strong beats + rhythmic density)
dance_tempo_factor = 1 - abs(tempo - 120) / 60 # Peak at 120 BPM
danceability = np.clip(dance_tempo_factor * 0.5 + rhythmic_density * 0.3 + energy_normalized * 0.2, 0, 1)
results = {
"raw_features": {
"tempo_bpm": round(tempo, 1),
"energy_rms": round(energy, 4),
"energy_std": round(energy_std, 4),
"brightness_hz": round(brightness, 1),
"spectral_rolloff": round(rolloff, 1),
"spectral_contrast": round(contrast, 2),
"percussiveness": round(percussiveness, 4),
"rhythmic_density": round(rhythmic_density, 2),
"harmonic_ratio": round(harmonic_ratio, 3),
"chroma_mean": round(chroma_mean, 3),
},
"genre_universe_position": {
"valence": round(float(valence_estimate), 2),
"tempo": round(float(tempo_normalized), 2),
"organic": round(float(organic_score), 2),
},
"genre_universe_spikes": {
"energy": round(float(energy_normalized), 2),
"acousticness": round(float(organic_score * 0.8), 2),
"danceability": round(float(danceability), 2),
"production_density": round(float(1 - harmonic_ratio + energy_std * 5), 2),
},
"insights": {
"tempo_feel": "slow" if tempo < 90 else "medium" if tempo < 130 else "fast",
"energy_level": "low" if energy_normalized < 0.33 else "medium" if energy_normalized < 0.66 else "high",
"sonic_character": "organic/warm" if organic_score > 0.6 else "electronic/bright" if organic_score < 0.4 else "balanced",
}
}
return results
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python analyze_track.py <audio_file>")
sys.exit(1)
audio_path = sys.argv[1]
results = analyze_track(audio_path)
print("\n" + "="*60)
print("GENRE UNIVERSE AUDIO ANALYSIS")
print("="*60)
print("\n📊 RAW AUDIO FEATURES:")
for key, value in results["raw_features"].items():
print(f" {key}: {value}")
print("\n🎯 GENRE UNIVERSE POSITION (0-1 scale):")
pos = results["genre_universe_position"]
print(f" X (Valence/Mood): {pos['valence']} {'← sad' if pos['valence'] < 0.4 else '→ happy' if pos['valence'] > 0.6 else '~ neutral'}")
print(f" Y (Tempo): {pos['tempo']} {'← slow' if pos['tempo'] < 0.4 else '→ fast' if pos['tempo'] > 0.6 else '~ medium'}")
print(f" Z (Organic): {pos['organic']} {'← electronic' if pos['organic'] < 0.4 else '→ organic' if pos['organic'] > 0.6 else '~ balanced'}")
print("\n⚡ SPIKE VALUES (0-1 scale):")
for key, value in results["genre_universe_spikes"].items():
bar = "" * int(value * 20) + "" * (20 - int(value * 20))
print(f" {key:20} [{bar}] {value}")
print("\n💡 INSIGHTS:")
for key, value in results["insights"].items():
print(f" {key}: {value}")
print("\n" + "="*60)
# Also output JSON for programmatic use
print("\n📄 JSON OUTPUT:")
print(json.dumps(results, indent=2))