remove
Browse files- app.py +52 -52
- requirements.txt +1 -1
app.py
CHANGED
@@ -11,7 +11,7 @@ from PIL import Image, ImageOps
|
|
11 |
import numpy as np
|
12 |
from simple_lama_inpainting import SimpleLama
|
13 |
from contextlib import contextmanager
|
14 |
-
import whisperx
|
15 |
import gc
|
16 |
|
17 |
@contextmanager
|
@@ -174,61 +174,61 @@ def erase(image=None, mask=None):
|
|
174 |
return simple_lama(image, mask)
|
175 |
|
176 |
|
177 |
-
def transcribe(audio):
|
178 |
-
|
179 |
-
|
180 |
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
|
224 |
-
|
225 |
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
|
233 |
|
234 |
@spaces.GPU(duration=120)
|
@@ -245,8 +245,8 @@ def main(*args):
|
|
245 |
# return mask_generation(*args)
|
246 |
elif api_num == 5:
|
247 |
return erase(*args)
|
248 |
-
elif api_num == 6:
|
249 |
-
|
250 |
|
251 |
|
252 |
rmbg_tab = gr.Interface(
|
@@ -367,7 +367,7 @@ demo = gr.TabbedInterface(
|
|
367 |
"inpainting",
|
368 |
# "sam2",
|
369 |
"erase",
|
370 |
-
"transcribe",
|
371 |
],
|
372 |
title="Utilities that require GPU",
|
373 |
)
|
|
|
11 |
import numpy as np
|
12 |
from simple_lama_inpainting import SimpleLama
|
13 |
from contextlib import contextmanager
|
14 |
+
# import whisperx
|
15 |
import gc
|
16 |
|
17 |
@contextmanager
|
|
|
174 |
return simple_lama(image, mask)
|
175 |
|
176 |
|
177 |
+
# def transcribe(audio):
|
178 |
+
# if audio is None:
|
179 |
+
# raise gr.Error("No audio file submitted!")
|
180 |
|
181 |
+
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
182 |
+
# compute_type = "float16"
|
183 |
+
# batch_size = 8 # reduced batch size to be conservative with memory
|
184 |
|
185 |
+
# try:
|
186 |
+
# # 1. Load model and transcribe
|
187 |
+
# model = whisperx.load_model("large-v2", device, compute_type=compute_type)
|
188 |
+
# audio_input = whisperx.load_audio(audio)
|
189 |
+
# result = model.transcribe(audio_input, batch_size=batch_size)
|
190 |
|
191 |
+
# # Clear GPU memory
|
192 |
+
# del model
|
193 |
+
# gc.collect()
|
194 |
+
# torch.cuda.empty_cache()
|
195 |
+
|
196 |
+
# # 2. Align whisper output
|
197 |
+
# model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
|
198 |
+
# result = whisperx.align(result["segments"], model_a, metadata, audio_input, device, return_char_alignments=False)
|
199 |
+
|
200 |
+
# # Clear GPU memory
|
201 |
+
# del model_a
|
202 |
+
# gc.collect()
|
203 |
+
# torch.cuda.empty_cache()
|
204 |
+
|
205 |
+
# # 3. Assign speaker labels
|
206 |
+
# diarize_model = whisperx.DiarizationPipeline(device=device)
|
207 |
+
# diarize_segments = diarize_model(audio_input)
|
208 |
|
209 |
+
# # Combine transcription with speaker diarization
|
210 |
+
# result = whisperx.assign_word_speakers(diarize_segments, result)
|
211 |
+
|
212 |
+
# # Format output with speaker labels and timestamps
|
213 |
+
# formatted_text = []
|
214 |
+
# for segment in result["segments"]:
|
215 |
+
# if not isinstance(segment, dict):
|
216 |
+
# continue
|
217 |
|
218 |
+
# speaker = f"[Speaker {segment.get('speaker', 'Unknown')}]"
|
219 |
+
# start_time = f"{float(segment.get('start', 0)):.2f}"
|
220 |
+
# end_time = f"{float(segment.get('end', 0)):.2f}"
|
221 |
+
# text = segment.get('text', '').strip()
|
222 |
+
# formatted_text.append(f"[{start_time}s - {end_time}s] {speaker}: {text}")
|
223 |
|
224 |
+
# return "\n".join(formatted_text)
|
225 |
|
226 |
+
# except Exception as e:
|
227 |
+
# raise gr.Error(f"Transcription failed: {str(e)}")
|
228 |
+
# finally:
|
229 |
+
# # Ensure GPU memory is cleared even if an error occurs
|
230 |
+
# gc.collect()
|
231 |
+
# torch.cuda.empty_cache()
|
232 |
|
233 |
|
234 |
@spaces.GPU(duration=120)
|
|
|
245 |
# return mask_generation(*args)
|
246 |
elif api_num == 5:
|
247 |
return erase(*args)
|
248 |
+
# elif api_num == 6:
|
249 |
+
# return transcribe(*args)
|
250 |
|
251 |
|
252 |
rmbg_tab = gr.Interface(
|
|
|
367 |
"inpainting",
|
368 |
# "sam2",
|
369 |
"erase",
|
370 |
+
# "transcribe",
|
371 |
],
|
372 |
title="Utilities that require GPU",
|
373 |
)
|
requirements.txt
CHANGED
@@ -22,4 +22,4 @@ einops
|
|
22 |
# git+https://github.com/facebookresearch/sam2.git
|
23 |
matplotlib
|
24 |
simple-lama-inpainting
|
25 |
-
git+https://github.com/m-bain/whisperX.git
|
|
|
22 |
# git+https://github.com/facebookresearch/sam2.git
|
23 |
matplotlib
|
24 |
simple-lama-inpainting
|
25 |
+
# git+https://github.com/m-bain/whisperX.git
|