Bishan commited on
Commit
977912d
·
1 Parent(s): 540f9af

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +115 -0
  2. packages.txt +3 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import soundfile as sf
2
+ import torch
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor,Wav2Vec2ProcessorWithLM
4
+ import gradio as gr
5
+ import sox
6
+ import subprocess
7
+
8
+
9
+ def read_file_and_process(wav_file):
10
+ filename = wav_file.split('.')[0]
11
+ filename_16k = filename + "16k.wav"
12
+ resampler(wav_file, filename_16k)
13
+ speech, _ = sf.read(filename_16k)
14
+ inputs = processor(speech, sampling_rate=16_000, return_tensors="pt", padding=True)
15
+
16
+ return inputs
17
+
18
+
19
+ def resampler(input_file_path, output_file_path):
20
+ command = (
21
+ f"ffmpeg -hide_banner -loglevel panic -i {input_file_path} -ar 16000 -ac 1 -bits_per_raw_sample 16 -vn "
22
+ f"{output_file_path}"
23
+ )
24
+ subprocess.call(command, shell=True)
25
+
26
+
27
+
28
+ def parse_transcription(logits,processor):
29
+ predicted_ids = torch.argmax(logits, dim=-1)
30
+ transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
31
+ return transcription
32
+
33
+
34
+ def parse(wav_file, language):
35
+
36
+ if language == 'Hindi':
37
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
38
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
39
+ elif language == 'Odia':
40
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-odia-orm-100")
41
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-odia-orm-100")
42
+ elif language == 'Assamese':
43
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-assamese-asm-8")
44
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-assamese-asm-8")
45
+ elif language == 'Sanskrit':
46
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-sanskrit-sam-60")
47
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-sanskrit-sam-60")
48
+ elif language == 'Punjabi':
49
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-punjabi-pam-10")
50
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-punjabi-pam-10")
51
+ elif language == 'Urdu':
52
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-urdu-urm-60")
53
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-urdu-urm-60")
54
+ elif language == 'Rajasthani':
55
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-rajasthani-raj-45")
56
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-rajasthani-raj-45")
57
+ elif language == 'Marathi':
58
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-marathi-mrm-100")
59
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-marathi-mrm-100")
60
+ elif language == 'Malayalam':
61
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-malayalam-mlm-8")
62
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-malayalam-mlm-8")
63
+ elif language == 'Maithili':
64
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-maithili-maim-50")
65
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-maithili-maim-50")
66
+ elif language == 'Dogri':
67
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-dogri-doi-55")
68
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-dogri-doi-55")
69
+ elif language == 'Bhojpuri':
70
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-bhojpuri-bhom-60")
71
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-bhojpuri-bhom-60")
72
+ elif language == 'Tamil':
73
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
74
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
75
+ elif language == 'Telugu':
76
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-telugu-tem-100")
77
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-telugu-tem-100")
78
+ elif language == 'Nepali':
79
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-nepali-nem-130")
80
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-nepali-nem-130")
81
+ elif language == 'Kannada':
82
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-kannada-knm-560")
83
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-kannada-knm-560")
84
+ elif language == 'Gujarati':
85
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-gujarati-gnm-100")
86
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-gujarati-gnm-100")
87
+ elif language == 'Bengali':
88
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-bengali-bnm-200")
89
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-bengali-bnm-200")
90
+ elif language == 'English':
91
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-indian-english-enm-700")
92
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-indian-english-enm-700")
93
+
94
+
95
+ input_values = read_file_and_process(wav_file)
96
+ with torch.no_grad():
97
+ logits = model(**input_values).logits
98
+
99
+ return parse_transcription(logits, processor)
100
+
101
+
102
+ options = ['Hindi','Odia','Assamese','Sanskrit','Punjabi','Urdu','Rajasthani','Marathi','Malayalam','Maithili','Dogri','Bhojpuri','Tamil','Telugu','Nepali','Kannada','Gujarati','Bengali','English']
103
+
104
+
105
+ language = gr.Dropdown(options,label="Select language",value = "Hindi")
106
+ input_ = gr.Audio(source="upload", type="filepath")
107
+ txtbox = gr.Textbox(
108
+ label="Output from model will appear here:",
109
+ lines=5
110
+ )
111
+
112
+
113
+ gr.Interface(parse, inputs = [input_,language ], outputs=txtbox,
114
+ streaming=True, interactive=True,
115
+ analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);
packages.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ libsndfile1
2
+ sox
3
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ https://github.com/kpu/kenlm/archive/master.zip
3
+ pyctcdecode
4
+ soundfile
5
+ torch
6
+ transformers
7
+ sox
8
+ scipy