158 lines
6.1 KiB
Python
158 lines
6.1 KiB
Python
# /// script
|
|
# requires-python = ">=3.10"
|
|
# dependencies = [
|
|
# "librosa>=0.10.0",
|
|
# "numpy",
|
|
# "scipy",
|
|
# "soundfile",
|
|
# ]
|
|
# ///
|
|
|
|
import librosa
|
|
import numpy as np
|
|
import sys
|
|
import json
|
|
|
|
def analyze_track(audio_path):
|
|
"""
|
|
Analyze an audio track and extract features for Genre Universe positioning.
|
|
"""
|
|
print(f"Loading: {audio_path}")
|
|
|
|
# Load the audio file
|
|
y, sr = librosa.load(audio_path, sr=22050, duration=180) # First 3 minutes
|
|
|
|
print("Analyzing audio features...")
|
|
|
|
# === TEMPO / RHYTHM ===
|
|
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
|
|
tempo = float(tempo) if not hasattr(tempo, '__len__') else float(tempo[0])
|
|
|
|
# === ENERGY ===
|
|
rms = librosa.feature.rms(y=y)[0]
|
|
energy = float(np.mean(rms))
|
|
energy_std = float(np.std(rms)) # Dynamic range indicator
|
|
|
|
# === SPECTRAL FEATURES ===
|
|
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
|
|
brightness = float(np.mean(spectral_centroid)) # Higher = brighter/more electronic
|
|
|
|
spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
|
|
rolloff = float(np.mean(spectral_rolloff))
|
|
|
|
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
|
|
contrast = float(np.mean(spectral_contrast))
|
|
|
|
# === ZERO CROSSING RATE (percussiveness) ===
|
|
zcr = librosa.feature.zero_crossing_rate(y)[0]
|
|
percussiveness = float(np.mean(zcr))
|
|
|
|
# === MFCC (timbral texture) ===
|
|
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
|
mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
|
|
|
|
# === CHROMA (harmonic content) ===
|
|
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
|
|
chroma_mean = float(np.mean(chroma))
|
|
|
|
# === ONSET STRENGTH (rhythmic density) ===
|
|
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
|
rhythmic_density = float(np.mean(onset_env))
|
|
|
|
# === HARMONIC/PERCUSSIVE SEPARATION ===
|
|
y_harmonic, y_percussive = librosa.effects.hpss(y)
|
|
harmonic_ratio = float(np.sum(np.abs(y_harmonic)) / (np.sum(np.abs(y)) + 1e-6))
|
|
|
|
# === NORMALIZE TO 0-1 SCALES ===
|
|
# These normalizations are rough estimates based on typical ranges
|
|
|
|
# Tempo: 60-180 BPM typical range
|
|
tempo_normalized = np.clip((tempo - 60) / 120, 0, 1)
|
|
|
|
# Energy: RMS typically 0.01-0.3
|
|
energy_normalized = np.clip(energy / 0.2, 0, 1)
|
|
|
|
# Brightness: spectral centroid typically 1000-4000 Hz
|
|
brightness_normalized = np.clip((brightness - 1000) / 3000, 0, 1)
|
|
|
|
# Organic vs Electronic (inverse of brightness + harmonic ratio)
|
|
organic_score = np.clip(harmonic_ratio * 1.5 - brightness_normalized * 0.3, 0, 1)
|
|
|
|
# Valence estimation (very rough - higher brightness + major key tendencies = happier)
|
|
# This is a simplification - real valence detection is complex
|
|
valence_estimate = np.clip(0.3 + brightness_normalized * 0.3 + chroma_mean * 0.2, 0, 1)
|
|
|
|
# Danceability (tempo in sweet spot + strong beats + rhythmic density)
|
|
dance_tempo_factor = 1 - abs(tempo - 120) / 60 # Peak at 120 BPM
|
|
danceability = np.clip(dance_tempo_factor * 0.5 + rhythmic_density * 0.3 + energy_normalized * 0.2, 0, 1)
|
|
|
|
results = {
|
|
"raw_features": {
|
|
"tempo_bpm": round(tempo, 1),
|
|
"energy_rms": round(energy, 4),
|
|
"energy_std": round(energy_std, 4),
|
|
"brightness_hz": round(brightness, 1),
|
|
"spectral_rolloff": round(rolloff, 1),
|
|
"spectral_contrast": round(contrast, 2),
|
|
"percussiveness": round(percussiveness, 4),
|
|
"rhythmic_density": round(rhythmic_density, 2),
|
|
"harmonic_ratio": round(harmonic_ratio, 3),
|
|
"chroma_mean": round(chroma_mean, 3),
|
|
},
|
|
"genre_universe_position": {
|
|
"valence": round(float(valence_estimate), 2),
|
|
"tempo": round(float(tempo_normalized), 2),
|
|
"organic": round(float(organic_score), 2),
|
|
},
|
|
"genre_universe_spikes": {
|
|
"energy": round(float(energy_normalized), 2),
|
|
"acousticness": round(float(organic_score * 0.8), 2),
|
|
"danceability": round(float(danceability), 2),
|
|
"production_density": round(float(1 - harmonic_ratio + energy_std * 5), 2),
|
|
},
|
|
"insights": {
|
|
"tempo_feel": "slow" if tempo < 90 else "medium" if tempo < 130 else "fast",
|
|
"energy_level": "low" if energy_normalized < 0.33 else "medium" if energy_normalized < 0.66 else "high",
|
|
"sonic_character": "organic/warm" if organic_score > 0.6 else "electronic/bright" if organic_score < 0.4 else "balanced",
|
|
}
|
|
}
|
|
|
|
return results
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python analyze_track.py <audio_file>")
|
|
sys.exit(1)
|
|
|
|
audio_path = sys.argv[1]
|
|
results = analyze_track(audio_path)
|
|
|
|
print("\n" + "="*60)
|
|
print("GENRE UNIVERSE AUDIO ANALYSIS")
|
|
print("="*60)
|
|
|
|
print("\n📊 RAW AUDIO FEATURES:")
|
|
for key, value in results["raw_features"].items():
|
|
print(f" {key}: {value}")
|
|
|
|
print("\n🎯 GENRE UNIVERSE POSITION (0-1 scale):")
|
|
pos = results["genre_universe_position"]
|
|
print(f" X (Valence/Mood): {pos['valence']} {'← sad' if pos['valence'] < 0.4 else '→ happy' if pos['valence'] > 0.6 else '~ neutral'}")
|
|
print(f" Y (Tempo): {pos['tempo']} {'← slow' if pos['tempo'] < 0.4 else '→ fast' if pos['tempo'] > 0.6 else '~ medium'}")
|
|
print(f" Z (Organic): {pos['organic']} {'← electronic' if pos['organic'] < 0.4 else '→ organic' if pos['organic'] > 0.6 else '~ balanced'}")
|
|
|
|
print("\n⚡ SPIKE VALUES (0-1 scale):")
|
|
for key, value in results["genre_universe_spikes"].items():
|
|
bar = "█" * int(value * 20) + "░" * (20 - int(value * 20))
|
|
print(f" {key:20} [{bar}] {value}")
|
|
|
|
print("\n💡 INSIGHTS:")
|
|
for key, value in results["insights"].items():
|
|
print(f" {key}: {value}")
|
|
|
|
print("\n" + "="*60)
|
|
|
|
# Also output JSON for programmatic use
|
|
print("\n📄 JSON OUTPUT:")
|
|
print(json.dumps(results, indent=2))
|