Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,7 @@ except ImportError as e:
|
|
17 |
raise ImportError("Could not import SNAC. Make sure 'snac' is listed in requirements.txt and installed correctly.") from e
|
18 |
|
19 |
# --- Configuration ---
|
20 |
-
TARGET_SR =
|
21 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
print(f"Using device: {DEVICE}")
|
23 |
|
@@ -26,7 +26,7 @@ snac_model = None
|
|
26 |
try:
|
27 |
print("Loading SNAC model...")
|
28 |
start_time = time.time()
|
29 |
-
snac_model = SNAC.from_pretrained("hubertsiuzdak/
|
30 |
snac_model = snac_model.to(DEVICE)
|
31 |
snac_model.eval() # Set model to evaluation mode
|
32 |
end_time = time.time()
|
@@ -80,7 +80,7 @@ def process_audio(audio_filepath):
|
|
80 |
waveform_to_encode = resampler(original_waveform)
|
81 |
logs.append(f"Resampling complete. New Shape: {waveform_to_encode.shape}")
|
82 |
else:
|
83 |
-
logs.append("Waveform is already at the target sample rate (
|
84 |
waveform_to_encode = original_waveform
|
85 |
resample_end = time.time()
|
86 |
logs.append(f"Resampling time: {resample_end - resample_start:.2f}s")
|
@@ -141,12 +141,12 @@ def process_audio(audio_filepath):
|
|
141 |
|
142 |
# --- Gradio Interface ---
|
143 |
DESCRIPTION = """
|
144 |
-
This Space demonstrates the **SNAC (Scalable Neural Audio Codec)** model (`hubertsiuzdak/
|
145 |
1. Upload an audio file (wav, mp3, flac, etc.).
|
146 |
-
2. The audio will be automatically resampled to
|
147 |
-
3. The
|
148 |
4. These codes are then decoded back into audio by SNAC.
|
149 |
-
5. You can listen to the original, the
|
150 |
|
151 |
**Note:** Processing happens on the server. Larger files will take longer. If the input is stereo, only the first channel is processed.
|
152 |
"""
|
@@ -156,11 +156,11 @@ iface = gr.Interface(
|
|
156 |
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
|
157 |
outputs=[
|
158 |
gr.Audio(label="Original Audio"),
|
159 |
-
gr.Audio(label="Resampled Audio (
|
160 |
gr.Audio(label="Reconstructed Audio (Output from SNAC)"),
|
161 |
gr.Textbox(label="Log Output", lines=15)
|
162 |
],
|
163 |
-
title="SNAC Audio Codec Demo (
|
164 |
description=DESCRIPTION,
|
165 |
examples=[
|
166 |
# Add paths to example audio files if you upload some to your Space repo
|
|
|
17 |
raise ImportError("Could not import SNAC. Make sure 'snac' is listed in requirements.txt and installed correctly.") from e
|
18 |
|
19 |
# --- Configuration ---
|
20 |
+
TARGET_SR = 32000 # SNAC operates at 32kHz
|
21 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
print(f"Using device: {DEVICE}")
|
23 |
|
|
|
26 |
try:
|
27 |
print("Loading SNAC model...")
|
28 |
start_time = time.time()
|
29 |
+
snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_32khz")
|
30 |
snac_model = snac_model.to(DEVICE)
|
31 |
snac_model.eval() # Set model to evaluation mode
|
32 |
end_time = time.time()
|
|
|
80 |
waveform_to_encode = resampler(original_waveform)
|
81 |
logs.append(f"Resampling complete. New Shape: {waveform_to_encode.shape}")
|
82 |
else:
|
83 |
+
logs.append("Waveform is already at the target sample rate (32kHz).")
|
84 |
waveform_to_encode = original_waveform
|
85 |
resample_end = time.time()
|
86 |
logs.append(f"Resampling time: {resample_end - resample_start:.2f}s")
|
|
|
141 |
|
142 |
# --- Gradio Interface ---
|
143 |
DESCRIPTION = """
|
144 |
+
This Space demonstrates the **SNAC (Scalable Neural Audio Codec)** model (`hubertsiuzdak/snac_32khz`).
|
145 |
1. Upload an audio file (wav, mp3, flac, etc.).
|
146 |
+
2. The audio will be automatically resampled to 32kHz if needed.
|
147 |
+
3. The 32kHz audio is encoded into discrete codes by SNAC.
|
148 |
4. These codes are then decoded back into audio by SNAC.
|
149 |
+
5. You can listen to the original, the 32kHz version (if resampled), and the final reconstructed audio.
|
150 |
|
151 |
**Note:** Processing happens on the server. Larger files will take longer. If the input is stereo, only the first channel is processed.
|
152 |
"""
|
|
|
156 |
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
|
157 |
outputs=[
|
158 |
gr.Audio(label="Original Audio"),
|
159 |
+
gr.Audio(label="Resampled Audio (32kHz Input to SNAC)"),
|
160 |
gr.Audio(label="Reconstructed Audio (Output from SNAC)"),
|
161 |
gr.Textbox(label="Log Output", lines=15)
|
162 |
],
|
163 |
+
title="SNAC Audio Codec Demo (32kHz)",
|
164 |
description=DESCRIPTION,
|
165 |
examples=[
|
166 |
# Add paths to example audio files if you upload some to your Space repo
|