Spaces:

Pavithiran
/

SAGAN

Sleeping

App Files Files Community

Pavithiran commited on May 20

Commit

c98cb1d

verified ·

1 Parent(s): 9ee0fa2

Create sagan_inference.py

Browse files

Files changed (1) hide show

sagan_inference.py +65 -0

sagan_inference.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import torch
+import numpy as np
+import librosa
+from huggingface_hub import hf_hub_download
+from sagan_model import SAGANModel  # your model definition
+### 1) Download & load your SAGAN weights from your HF repo ###
+SAGAN_WEIGHTS_PATH = hf_hub_download(
+    repo_id="YOUR_USERNAME/sagan-space",   # ← replace with your HF namespace
+    filename="sagan_weights.pth"
+)
+model = SAGANModel()
+state_dict = torch.load(SAGAN_WEIGHTS_PATH, map_location="cpu")
+model.load_state_dict(state_dict)
+model.eval()
+### 2) Age-group Z-score stats (proxy values from literature) ###
+import math
+STATS = {
+    "kindergarten": {
+        "pitch":  {"mu":  30.0, "sigma": 29.0},  # Wise & Sloboda (2008)
+        "rhythm": {"mu":  60.0, "sigma": 15.0},  # Demorest & Pfordresher (2015)
+        "timbre": {"mu": 0.65, "sigma": 0.10},
+    },
+    "grade_6": {
+        "pitch":  {"mu":  43.0, "sigma": 26.0},
+        "rhythm": {"mu":  75.0, "sigma": 10.0},
+        "timbre": {"mu": 0.75, "sigma": 0.08},
+    },
+    "adult": {
+        "pitch":  {"mu":  32.0, "sigma": 19.0},
+        "rhythm": {"mu":  80.0, "sigma":  8.0},
+        "timbre": {"mu": 0.85, "sigma": 0.05},
+    },
+}
+def sigmoid(z: float) -> float:
+    return 1 / (1 + math.exp(-z))
+def z_score_standardize(raw_metrics: dict, age_group: str) -> dict:
+    if age_group not in STATS:
+        raise ValueError(f"Unknown age_group '{age_group}'")
+    stats = STATS[age_group]
+    out = {}
+    for key, raw in raw_metrics.items():
+        μ, σ = stats[key]["mu"], stats[key]["sigma"]
+        z = (raw - μ) / σ
+        out[key] = round(sigmoid(z), 3)
+    return out
+def run_sagan(wav_path: str) -> dict:
+    """
+    1) Load audio
+    2) Run SAGANModel.evaluate → returns {'pitch_accuracy', 'rhythm_consistency', 'timbre_score'}
+    3) Return raw dict
+    """
+    y, sr = librosa.load(wav_path, sr=16000, mono=True)
+    with torch.no_grad():
+        metrics = model.evaluate(y, sr)
+    # Ensure keys:
+    return {
+        "pitch": float(metrics.get("pitch_accuracy", metrics[0])),
+        "rhythm": float(metrics.get("rhythm_consistency", metrics[1])),
+        "timbre": float(metrics.get("timbre_score", metrics[2])),
+    }