mutisya commited on
Commit
7caf28a
·
verified ·
1 Parent(s): bdc5eb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -145
app.py CHANGED
@@ -11,148 +11,5 @@ import numpy as np
11
  auth_token = os.environ.get("key")
12
  os.environ["HUGGING_FACE_HUB_TOKEN"] = auth_token
13
 
14
-
15
- # set up transcription pipeline
16
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
17
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", device=device)
18
-
19
-
20
- # set up translation pipeline
21
- translation_model_path = "mutisya/m2m100_418M-en-kik-v24.03.2"
22
-
23
- def update_tokenizer_settings(tokenizer):
24
- new_langTokens = { k: tokenizer.convert_tokens_to_ids(k) for k in tokenizer.additional_special_tokens }
25
- all_lang_tokens =dict(list(tokenizer.lang_token_to_id.items()) + list(new_langTokens.items()))
26
-
27
- tokenizer.id_to_lang_token = { v : k for k,v in all_lang_tokens.items() }
28
- tokenizer.lang_token_to_id = { k : v for k,v in all_lang_tokens.items() }
29
- tokenizer.lang_code_to_token = { k.replace("_", ""): k for k in all_lang_tokens.keys() }
30
- tokenizer.lang_code_to_id = { k.replace("_", ""): v for k, v in all_lang_tokens.items() }
31
-
32
-
33
- translation_model = M2M100ForConditionalGeneration.from_pretrained(translation_model_path)
34
- translation_tokenizer = M2M100Tokenizer.from_pretrained(translation_model_path)
35
-
36
- update_tokenizer_settings(translation_tokenizer)
37
-
38
- # set translation direction
39
- src_lang = "en"
40
- tgt_lang = "kik"
41
-
42
- translation_tokenizer.src_lang = src_lang
43
- translation_tokenizer.tgt_lang = tgt_lang
44
-
45
-
46
- translation_device = 0 if torch.cuda.is_available() else -1
47
- translator = pipeline('translation', model=translation_model, tokenizer=translation_tokenizer, device=translation_device)
48
-
49
-
50
- # transcribe sections while keeping state
51
- chunk_tracker = []
52
- ready_to_translate = []
53
- text_at_chunk_end = ""
54
- chunk_index = 0;
55
- translated_text = ""
56
- transcribed_text = ""
57
-
58
-
59
- def get_next_translation_block():
60
- global text_at_chunk_end
61
- global chunk_tracker
62
- global ready_to_translate
63
- global translated_text
64
- global transcribed_text
65
-
66
- last_stop = text_at_chunk_end[0:-1].rfind('.')
67
- ready_sentences = text_at_chunk_end[0:last_stop+1]
68
- chunks_to_remove = []
69
-
70
- if len(ready_sentences) > 0:
71
- print("Trying to match: "+ ready_sentences)
72
- found_match = False
73
- for i in range(0, len(chunk_tracker)):
74
- curr_chunk = chunk_tracker[i]
75
- chunks_to_remove.append(curr_chunk)
76
- if curr_chunk["text_at_begining"] == curr_chunk["text_at_end"] and curr_chunk["text_at_begining"] == ready_sentences:
77
- found_match = True
78
- break
79
-
80
- if found_match == False:
81
- print("ERROR: no match found for "+ ready_sentences)
82
- chunks_to_remove = []
83
- else:
84
- transcribed_text += ready_sentences
85
- translated_text += translator(ready_sentences, src_lang=src_lang,tgt_lang=tgt_lang)[0]['translation_text']
86
- print("TRANSLATED: "+ translated_text)
87
-
88
- return ready_sentences, chunks_to_remove
89
-
90
- def transcribe(stream, new_chunk):
91
- global text_at_chunk_end
92
- global chunk_tracker
93
- global ready_to_translate
94
- global chunk_index
95
- global translated_text
96
- global transcribed_text
97
-
98
- chunk_index +=1
99
-
100
- sr, y = new_chunk
101
- y = y.astype(np.float32)
102
- y /= np.max(np.abs(y))
103
-
104
- chunk_value = y
105
- chunk_length = len(y)
106
-
107
- if stream is not None:
108
- stream = np.concatenate([stream, y])
109
- else:
110
- stream = y
111
-
112
- text_at_chunk_begining = text_at_chunk_end
113
- text_at_chunk_end = transcriber({"sampling_rate": sr, "raw": stream})["text"]
114
-
115
- curr_chunk = {
116
- "value": chunk_value,
117
- "length": chunk_length,
118
- "text_at_begining": text_at_chunk_begining,
119
- "text_at_end": text_at_chunk_end
120
- }
121
-
122
- #print(curr_chunk)
123
- chunk_tracker.append(curr_chunk)
124
-
125
- # get translation block
126
- if chunk_index % 5 == 0:
127
- ready_sentences, chunks_to_remove = get_next_translation_block();
128
- if len(chunks_to_remove) >0:
129
- ready_to_translate.append(ready_sentences)
130
- total_trim_length = 0
131
- for i in range(0, len(chunks_to_remove)):
132
- total_trim_length += chunks_to_remove[i]["length"]
133
- removed = chunk_tracker.pop(0)
134
- # print("REMOVED: "+ removed["text_at_begining"] +" -> " + removed["text_at_end"])
135
-
136
- # set up new stream with remaining chunks
137
- new_stream = chunk_tracker[0]["value"]
138
- for i in range(1, len(chunk_tracker)):
139
- new_stream = np.concatenate([new_stream, chunk_tracker[i]["value"]])
140
-
141
- stream = new_stream
142
-
143
- return stream, text_at_chunk_end, transcribed_text, translated_text
144
-
145
-
146
- # set up UI
147
- demo = gr.Interface(
148
- transcribe,
149
- ["state", gr.Audio(sources=["microphone"], streaming=True)],
150
- ["state", gr.Textbox(label="in progress"), gr.Textbox(label="Transcribed text"), gr.Textbox(label="Translated text")],
151
- live=True,
152
- allow_flagging="never"
153
- )
154
-
155
- demo.dependencies[0]["show_progress"] = False # this should hide the progress report?
156
-
157
- if __name__ == "__main__":
158
- demo.launch(debug=True)
 
11
  auth_token = os.environ.get("key")
12
  os.environ["HUGGING_FACE_HUB_TOKEN"] = auth_token
13
 
14
+ iface = gr.load(name="mutisya/transcribe-api-feb24", hf_token=hf_token, src="spaces")
15
+ iface.queue(api_open=False).launch(show_api=False)