Spaces:
Runtime error
Runtime error
Zeimoto
commited on
Commit
·
e25d134
1
Parent(s):
2d84f70
add translation
Browse files- app.py +22 -15
- speech2text.py +3 -3
- translation.py +20 -0
app.py
CHANGED
@@ -3,6 +3,7 @@ from st_audiorec import st_audiorec
|
|
3 |
|
4 |
from nameder import init_model_ner, get_entity_labels
|
5 |
from speech2text import init_model_trans, transcribe
|
|
|
6 |
from resources import audit_elapsedtime, set_start
|
7 |
import subprocess
|
8 |
|
@@ -10,24 +11,30 @@ def main ():
|
|
10 |
print("------------------------------")
|
11 |
print(f"Running main")
|
12 |
|
13 |
-
print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
print("Rendering UI...")
|
18 |
-
start_render = set_start()
|
19 |
-
wav_audio_data = st_audiorec()
|
20 |
-
audit_elapsedtime(function="Rendering UI", start=start_render)
|
21 |
|
22 |
-
if wav_audio_data is not None and s2t is not None:
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
|
32 |
if __name__ == "__main__":
|
33 |
print("IN __name__")
|
|
|
3 |
|
4 |
from nameder import init_model_ner, get_entity_labels
|
5 |
from speech2text import init_model_trans, transcribe
|
6 |
+
from translation import get_translation
|
7 |
from resources import audit_elapsedtime, set_start
|
8 |
import subprocess
|
9 |
|
|
|
11 |
print("------------------------------")
|
12 |
print(f"Running main")
|
13 |
|
14 |
+
#print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
|
15 |
+
text = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores outsystems, 300 euros por dia e um periodo de seis meses."
|
16 |
+
st.write(text)
|
17 |
+
traducao = get_translation(text_to_translate=text, languageCode="pt")
|
18 |
+
st.write(traducao)
|
19 |
+
# s2t = init_model_trans()
|
20 |
+
# ner = init_model_ner() #async
|
21 |
|
22 |
+
# print("Rendering UI...")
|
23 |
+
# start_render = set_start()
|
24 |
+
# wav_audio_data = st_audiorec()
|
25 |
+
# audit_elapsedtime(function="Rendering UI", start=start_render)
|
26 |
|
27 |
+
# if wav_audio_data is not None and s2t is not None:
|
28 |
+
# print("Loading data...")
|
29 |
+
# start_loading = set_start()
|
30 |
+
# st.audio(wav_audio_data, format='audio/wav')
|
31 |
+
# text = transcribe(wav_audio_data, s2t)
|
32 |
+
# print("translating audio...")
|
33 |
+
# translation = get_translation("pt")
|
34 |
|
35 |
+
# if text is not None and ner is not None:
|
36 |
+
# st.write('Entities: ', get_entity_labels(model=ner, text=text))
|
37 |
+
# audit_elapsedtime(function="Loading data", start=start_loading)
|
38 |
|
39 |
if __name__ == "__main__":
|
40 |
print("IN __name__")
|
speech2text.py
CHANGED
@@ -43,7 +43,7 @@ def transcribe (audio_sample: bytes, pipe) -> str:
|
|
43 |
# sample = dataset[0]["audio"]
|
44 |
result = pipe(audio_sample)
|
45 |
audit_elapsedtime(function="Transcription", start=start)
|
46 |
-
print(result)
|
47 |
-
|
48 |
-
st.write('trancription: ', result["text"])
|
49 |
return result["text"]
|
|
|
43 |
# sample = dataset[0]["audio"]
|
44 |
result = pipe(audio_sample)
|
45 |
audit_elapsedtime(function="Transcription", start=start)
|
46 |
+
print("transcription result",result)
|
47 |
+
|
48 |
+
#st.write('trancription: ', result["text"])
|
49 |
return result["text"]
|
translation.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import MarianMTModel, MarianTokenizer
|
2 |
+
|
3 |
+
def get_model_name(languageCode: str) -> str:
|
4 |
+
model_name = "Helsinki-NLP/opus-mt-pt-en"
|
5 |
+
return model_name
|
6 |
+
|
7 |
+
def init_translation_model():
|
8 |
+
model_name = get_model_name("pt")
|
9 |
+
model = MarianMTModel.from_pretrained(model_name)
|
10 |
+
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
11 |
+
return model, tokenizer
|
12 |
+
|
13 |
+
def get_translation(text_to_translate: str, languageCode: str) -> str:
|
14 |
+
model, tokenizer = init_translation_model()
|
15 |
+
inputs = tokenizer(text_to_translate, return_tensors="pt", truncation=True, padding=True)
|
16 |
+
translated_ids = model.generate(**inputs)
|
17 |
+
translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
|
18 |
+
|
19 |
+
print("Translated text:", translated_text)
|
20 |
+
return translated_text
|