Shubham09 commited on
Commit
b9fbb26
·
1 Parent(s): cb420c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -4,6 +4,7 @@ import torch
4
  import gradio as gr
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer
6
  nltk.download("punkt")
 
7
 
8
 
9
 
@@ -28,22 +29,25 @@ def load_data(input_file):
28
 
29
  # sentences = nltk.sent_tokenize(input_sentence)
30
  # return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
 
31
 
32
  def asr_transcript(input_file):
33
-
34
- speech = load_data(input_file)
35
- #Tokenize
36
- input_features = processor(speech).input_features #, padding="longest" , return_tensors="pt"
37
- #input_values = tokenizer(speech, return_tensors="pt").input_values
38
- #Take logits
39
- logits = model(input_features).logits
40
- #Take argmax
41
- predicted_ids = torch.argmax(logits, dim=-1)
42
- #Get the words from predicted word ids
43
- transcription = processor.batch_decode(predicted_ids)
44
- #Correcting the letter casing
45
- #transcription = correct_casing(transcription.lower())
46
- return transcription
 
 
47
 
48
  gr.Interface(asr_transcript,
49
  inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker"),
 
4
  import gradio as gr
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer
6
  nltk.download("punkt")
7
+ from transformers import pipeline
8
 
9
 
10
 
 
29
 
30
  # sentences = nltk.sent_tokenize(input_sentence)
31
  # return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
32
+ pipe = pipeline(model="Shubham09/whisper31filescheck") # change to "your-username/the-name-you-picked"
33
 
34
  def asr_transcript(input_file):
35
+ text = pipe(input_file)["text"]
36
+ return text
37
+
38
+ # speech = load_data(input_file)
39
+ # #Tokenize
40
+ # input_features = processor(speech).input_features #, padding="longest" , return_tensors="pt"
41
+ # #input_values = tokenizer(speech, return_tensors="pt").input_values
42
+ # #Take logits
43
+ # logits = model(input_features).logits
44
+ # #Take argmax
45
+ # predicted_ids = torch.argmax(logits, dim=-1)
46
+ # #Get the words from predicted word ids
47
+ # transcription = processor.batch_decode(predicted_ids)
48
+ # #Correcting the letter casing
49
+ # #transcription = correct_casing(transcription.lower())
50
+ # return transcription
51
 
52
  gr.Interface(asr_transcript,
53
  inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker"),