File size: 5,525 Bytes
5baf1ba
 
4da13bc
a13d2fb
4da13bc
 
5baf1ba
eaa3add
6dd1216
5baf1ba
 
 
6dd1216
5baf1ba
eaa3add
 
 
 
 
5baf1ba
 
 
 
a875242
 
cb9e139
a875242
 
cb9e139
 
a875242
cb9e139
eaa3add
 
 
 
a875242
 
cb9e139
a875242
 
 
 
 
cb9e139
5baf1ba
 
 
 
 
 
 
 
 
b54b57f
5baf1ba
eaa3add
 
 
 
9fc1cf9
 
 
a875242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc1cf9
cb9e139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc1cf9
 
 
 
 
 
cb9e139
 
 
 
9fc1cf9
 
 
 
 
 
 
 
71ca02a
 
 
 
 
 
 
 
 
cb9e139
a875242
 
9fc1cf9
 
 
5baf1ba
 
6dd1216
5baf1ba
6dd1216
eaa3add
b4d2c26
 
6dd1216
5baf1ba
 
 
ea5222d
5baf1ba
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import gradio as gr
from transformers import pipeline
import os
import re
os.system('git clone https://github.com/irshadbhat/indic-trans.git')
os.system('pip install ./indic-trans/.')
p1= pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-odia_v1")
p2=pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1")

def transcribe_odiya(speech):
    #print (p1(speech))
    text = p1(speech)["text"]
    #text=cleanhtml(text)
    return text
def transcribe_hindi(speech):
    #print (p1(speech))
    text = p2(speech)["text"]
    #text=cleanhtml(text)
    return text
def transcribe_odiya_eng(speech):
    from indictrans import Transliterator
    trn = Transliterator(source='ori', target='eng', build_lookup=True)
    text = p1(speech)["text"]
    if text is None:
        return "Error: ASR returned None"
    sentence = trn.transform(text)
    if sentence is None:
        return "Error: Transliteration returned None"
    replaced_words = replace_words(sentence)
    processed_sentence = process_doubles(replaced_words)
    return process_transcription(processed_sentence)

def transcribe_hin_eng(speech):
    from indictrans import Transliterator
    trn = Transliterator(source='hin', target='eng', build_lookup=True)
    text = p2(speech)["text"]
    if text is None:
        return "Error: ASR returned None"
    sentence = trn.transform(text)
    if sentence is None:
        return "Error: Transliteration returned None"
    replaced_words = replace_words(sentence)
    processed_sentence = process_doubles(replaced_words)
    return process_transcription(processed_sentence)
    
def sel_lng(lng,mic=None, file=None):
    if mic is not None:
        audio = mic
    elif file is not None:
        audio = file
    else:
        return "You must either provide a mic recording or a file"
    if (lng=="Odiya"):
        return transcribe_odiya(audio)
    elif (lng=="Odiya-trans"):
        return transcribe_odiya_eng(audio)
    elif (lng=="Hindi-trans"):
        return transcribe_hin_eng(audio)
    elif (lng=="Hindi"):
        return transcribe_hindi(audio)

#####################################################

def process_transcription(input_sentence):
    word_to_code_map = {}
    code_to_word_map = {}

    transcript_1 = sentence_to_transcript(input_sentence, word_to_code_map)
    if transcript_1 is None:
        return "Error: Transcript conversion returned None"

    numbers = text2int(transcript_1)
    if numbers is None:
        return "Error: Text to number conversion returned None"

    code_to_word_map = {v: k for k, v in word_to_code_map.items()}
    text = transcript_to_sentence(numbers, code_to_word_map)
    return text

def replace_words(sentence):
    replacements = [
        (r'\bjiro\b', 'zero'), (r'\bjero\b', 'zero'), (r'\bnn\b', 'one'),
        (r'\bn\b', 'one'), (r'\bna\b', 'one'), (r'\btu\b', 'two'),
        (r'\btoo\b', 'two'), (r'\bthiri\b', 'three'), (r'\bfor\b', 'four'),
        (r'\bfore\b', 'four'), (r'\bfib\b', 'five'), (r'\bdublseven\b', 'double seven'),
        (r'\bdubalathri\b', 'double three'), (r'\bnineeit\b', 'nine eight'),
        (r'\bfipeit\b', 'five eight'), (r'\bdubal\b', 'double'), (r'\bsevenatu\b', 'seven two'),
    ]
    for pattern, replacement in replacements:
        sentence = re.sub(pattern, replacement, sentence)
    return sentence

def process_doubles(sentence):
    tokens = sentence.split()
    result = []
    i = 0
    while i < len(tokens):
        if tokens[i] in ("double", "dubal"):
            if i + 1 < len(tokens):
                result.append(tokens[i + 1])
                result.append(tokens[i + 1])
                i += 2
            else:
                result.append(tokens[i])
                i += 1
        else:
            result.append(tokens[i])
            i += 1
    return ' '.join(result)

def soundex(word):
    word = word.upper()
    word = ''.join(filter(str.isalpha, word))
    if not word:
        return None
    soundex_mapping = {
        'B': '1', 'F': '1', 'P': '1', 'V': '1',
        'C': '2', 'G': '2', 'J': '2', 'K': '2', 'Q': '2', 'S': '2', 'X': '2', 'Z': '2',
        'D': '3', 'T': '3', 'L': '4', 'M': '5', 'N': '5', 'R': '6'
    }
    soundex_code = word[0]
    for char in word[1:]:
        if char not in ('H', 'W'):
            soundex_code += soundex_mapping.get(char, '0')
            soundex_code = soundex_code[0] + ''.join(c for i, c in enumerate(soundex_code[1:]) if c != soundex_code[i])
            soundex_code = soundex_code.replace('0', '') + '000'
    return soundex_code[:4]

def is_number(x):
    if type(x) == str:
        x = x.replace(',', '')
    try:
        float(x)
    except:
        return False
    return True

def text2int(textnum, numwords={}):
    if not textnum:
        return None

    
######################################################     
demo=gr.Interface(
    fn=sel_lng, 
      
    inputs=[
        
        gr.Dropdown(["Hindi","Hindi-trans","Odiya","Odiya-trans"],value="Hindi",label="Select Language"),
        gr.Audio(sources=["microphone","upload"], type="filepath"),
        #gr.Audio(sources="upload", type="filepath"),
        #"state"
    ],
    outputs=[
        "textbox"
#        #"state"
    ],
    title="Automatic Speech Recognition",
    description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
      ).launch()