DarwinAnim8or commited on
Commit
253867d
·
1 Parent(s): 02ad7fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -1,12 +1,24 @@
1
  import gradio as gr
2
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, WhisperForConditionalGeneration, WhisperProcessor
3
 
4
  # Load the model and processor
5
  model_id = "openai/whisper-medium"
6
- processor = WhisperProcessor.from_pretrained(model_id)
7
 
8
- model = WhisperForConditionalGeneration.from_pretrained(model_id)
9
- model.config.forced_decoder_ids = None
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def transcribelocal(microphone, file_upload):
12
  # Check which input is not None
@@ -15,18 +27,8 @@ def transcribelocal(microphone, file_upload):
15
  else:
16
  audio = file_upload
17
 
18
- # Use the processor to transcribe the audio
19
- transcription = processor.transcribe(audio, 48)
20
-
21
- # Extract the confidence score and the duration from the transcription
22
- confidence = transcription.confidence
23
- duration = transcription.duration
24
-
25
- # Remove the special tokens from the transcription text
26
- text = transcription.text.replace("<|startoftranscript|>", "").replace("<|endoftranscript|>", "")
27
-
28
- # Return the text, confidence and duration as outputs
29
- return text, confidence, duration
30
 
31
  # Create a Gradio interface with two modes: realtime and file upload
32
  iface = gr.Interface(
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
 
4
  # Load the model and processor
5
  model_id = "openai/whisper-medium"
 
6
 
7
+ device = 0 if torch.cuda.is_available() else "cpu"
8
+ BATCH_SIZE = 8
9
+ pipe = pipeline(
10
+ task="automatic-speech-recognition",
11
+ model=model_id,
12
+ chunk_length_s=30,
13
+ device=device,
14
+ )
15
+
16
+ def transcribe(inputs, task):
17
+ if inputs is None:
18
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
19
+
20
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
21
+ return text
22
 
23
  def transcribelocal(microphone, file_upload):
24
  # Check which input is not None
 
27
  else:
28
  audio = file_upload
29
 
30
+ text = pipe(audio, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
31
+ return text
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Create a Gradio interface with two modes: realtime and file upload
34
  iface = gr.Interface(