Spaces:

Shubham09
/

samplewhisper

Runtime error

Shubham09 commited on Dec 5, 2022

Commit

b9fbb26

1 Parent(s): cb420c2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer
 nltk.download("punkt")
@@ -28,22 +29,25 @@ def load_data(input_file):
 #   sentences = nltk.sent_tokenize(input_sentence)
 #   return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
 def asr_transcript(input_file):
-  speech = load_data(input_file)
-  #Tokenize
-  input_features = processor(speech).input_features  #, padding="longest" , return_tensors="pt"
-  #input_values = tokenizer(speech, return_tensors="pt").input_values
-  #Take logits
-  logits = model(input_features).logits
-  #Take argmax
-  predicted_ids = torch.argmax(logits, dim=-1)
-  #Get the words from predicted word ids
-  transcription = processor.batch_decode(predicted_ids)
-  #Correcting the letter casing
-  #transcription = correct_casing(transcription.lower())
-  return transcription
 gr.Interface(asr_transcript,
              inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker"),

 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer
 nltk.download("punkt")
+from transformers import pipeline
 #   sentences = nltk.sent_tokenize(input_sentence)
 #   return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
+pipe = pipeline(model="Shubham09/whisper31filescheck")  # change to "your-username/the-name-you-picked"
 def asr_transcript(input_file):
+    text = pipe(input_file)["text"]
+    return text
+  # speech = load_data(input_file)
+  # #Tokenize
+  # input_features = processor(speech).input_features  #, padding="longest" , return_tensors="pt"
+  # #input_values = tokenizer(speech, return_tensors="pt").input_values
+  # #Take logits
+  # logits = model(input_features).logits
+  # #Take argmax
+  # predicted_ids = torch.argmax(logits, dim=-1)
+  # #Get the words from predicted word ids
+  # transcription = processor.batch_decode(predicted_ids)
+  # #Correcting the letter casing
+  # #transcription = correct_casing(transcription.lower())
+  # return transcription
 gr.Interface(asr_transcript,
              inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker"),