Gapeleon commited on
Commit
ff11e39
·
verified ·
1 Parent(s): d54f19d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -17,7 +17,7 @@ except ImportError as e:
17
  raise ImportError("Could not import SNAC. Make sure 'snac' is listed in requirements.txt and installed correctly.") from e
18
 
19
  # --- Configuration ---
20
- TARGET_SR = 24000 # SNAC operates at 24kHz
21
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
22
  print(f"Using device: {DEVICE}")
23
 
@@ -26,7 +26,7 @@ snac_model = None
26
  try:
27
  print("Loading SNAC model...")
28
  start_time = time.time()
29
- snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz")
30
  snac_model = snac_model.to(DEVICE)
31
  snac_model.eval() # Set model to evaluation mode
32
  end_time = time.time()
@@ -80,7 +80,7 @@ def process_audio(audio_filepath):
80
  waveform_to_encode = resampler(original_waveform)
81
  logs.append(f"Resampling complete. New Shape: {waveform_to_encode.shape}")
82
  else:
83
- logs.append("Waveform is already at the target sample rate (24kHz).")
84
  waveform_to_encode = original_waveform
85
  resample_end = time.time()
86
  logs.append(f"Resampling time: {resample_end - resample_start:.2f}s")
@@ -141,12 +141,12 @@ def process_audio(audio_filepath):
141
 
142
  # --- Gradio Interface ---
143
  DESCRIPTION = """
144
- This Space demonstrates the **SNAC (Scalable Neural Audio Codec)** model (`hubertsiuzdak/snac_24khz`).
145
  1. Upload an audio file (wav, mp3, flac, etc.).
146
- 2. The audio will be automatically resampled to 24kHz if needed.
147
- 3. The 24kHz audio is encoded into discrete codes by SNAC.
148
  4. These codes are then decoded back into audio by SNAC.
149
- 5. You can listen to the original, the 24kHz version (if resampled), and the final reconstructed audio.
150
 
151
  **Note:** Processing happens on the server. Larger files will take longer. If the input is stereo, only the first channel is processed.
152
  """
@@ -156,11 +156,11 @@ iface = gr.Interface(
156
  inputs=gr.Audio(type="filepath", label="Upload Audio File"),
157
  outputs=[
158
  gr.Audio(label="Original Audio"),
159
- gr.Audio(label="Resampled Audio (24kHz Input to SNAC)"),
160
  gr.Audio(label="Reconstructed Audio (Output from SNAC)"),
161
  gr.Textbox(label="Log Output", lines=15)
162
  ],
163
- title="SNAC Audio Codec Demo (24kHz)",
164
  description=DESCRIPTION,
165
  examples=[
166
  # Add paths to example audio files if you upload some to your Space repo
 
17
  raise ImportError("Could not import SNAC. Make sure 'snac' is listed in requirements.txt and installed correctly.") from e
18
 
19
  # --- Configuration ---
20
+ TARGET_SR = 32000 # SNAC operates at 32kHz
21
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
22
  print(f"Using device: {DEVICE}")
23
 
 
26
  try:
27
  print("Loading SNAC model...")
28
  start_time = time.time()
29
+ snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_32khz")
30
  snac_model = snac_model.to(DEVICE)
31
  snac_model.eval() # Set model to evaluation mode
32
  end_time = time.time()
 
80
  waveform_to_encode = resampler(original_waveform)
81
  logs.append(f"Resampling complete. New Shape: {waveform_to_encode.shape}")
82
  else:
83
+ logs.append("Waveform is already at the target sample rate (32kHz).")
84
  waveform_to_encode = original_waveform
85
  resample_end = time.time()
86
  logs.append(f"Resampling time: {resample_end - resample_start:.2f}s")
 
141
 
142
  # --- Gradio Interface ---
143
  DESCRIPTION = """
144
+ This Space demonstrates the **SNAC (Scalable Neural Audio Codec)** model (`hubertsiuzdak/snac_32khz`).
145
  1. Upload an audio file (wav, mp3, flac, etc.).
146
+ 2. The audio will be automatically resampled to 32kHz if needed.
147
+ 3. The 32kHz audio is encoded into discrete codes by SNAC.
148
  4. These codes are then decoded back into audio by SNAC.
149
+ 5. You can listen to the original, the 32kHz version (if resampled), and the final reconstructed audio.
150
 
151
  **Note:** Processing happens on the server. Larger files will take longer. If the input is stereo, only the first channel is processed.
152
  """
 
156
  inputs=gr.Audio(type="filepath", label="Upload Audio File"),
157
  outputs=[
158
  gr.Audio(label="Original Audio"),
159
+ gr.Audio(label="Resampled Audio (32kHz Input to SNAC)"),
160
  gr.Audio(label="Reconstructed Audio (Output from SNAC)"),
161
  gr.Textbox(label="Log Output", lines=15)
162
  ],
163
+ title="SNAC Audio Codec Demo (32kHz)",
164
  description=DESCRIPTION,
165
  examples=[
166
  # Add paths to example audio files if you upload some to your Space repo