ras0k commited on
Commit
9d171cc
·
1 Parent(s): e9e7628
Files changed (2) hide show
  1. __pycache__/app.cpython-310.pyc +0 -0
  2. app.py +5 -10
__pycache__/app.cpython-310.pyc ADDED
Binary file (808 Bytes). View file
 
app.py CHANGED
@@ -3,7 +3,7 @@ import whisperx
3
  import whisper
4
 
5
  def transcribe(audio_file):
6
- device = "cuda"
7
 
8
  # Transcribe with original Whisper
9
  model = whisper.load_model("large", device)
@@ -15,14 +15,9 @@ def transcribe(audio_file):
15
  # Align Whisper output
16
  result_aligned = whisperx.align(result["segments"], model_a, metadata, audio_file, device)
17
 
18
- return result_aligned["segments"], result_aligned["word_segments"]
19
 
20
- # Define Gradio interface
21
- inputs = gr.inputs.Audio(source="upload", type="file")
22
- outputs = [
23
- gr.outputs.Textbox(label="Segments (before alignment)"),
24
- gr.outputs.Textbox(label="Segments (after alignment)"),
25
- ]
26
 
27
- iface = gr.Interface(fn=transcribe, inputs=inputs, outputs=outputs, title="WhisperX Transcription")
28
- iface.launch()
 
3
  import whisper
4
 
5
  def transcribe(audio_file):
6
+ device = "cuda" if torch.cuda.is_available() else "cpu"
7
 
8
  # Transcribe with original Whisper
9
  model = whisper.load_model("large", device)
 
15
  # Align Whisper output
16
  result_aligned = whisperx.align(result["segments"], model_a, metadata, audio_file, device)
17
 
18
+ return {"aligned": result_aligned["segments"], "word_segments": result_aligned["word_segments"]}
19
 
20
+ inputs = gr.inputs.Audio(source="upload", type="filepath")
21
+ outputs = gr.outputs.JSON(type="auto")
 
 
 
 
22
 
23
+ gr.Interface(fn=transcribe, inputs=inputs, outputs=outputs).launch()