Spaces:
Runtime error
Runtime error
Zeimoto
commited on
Commit
·
ecb1d96
1
Parent(s):
636e850
different translation model
Browse files- app.py +5 -4
- translation.py +6 -2
app.py
CHANGED
@@ -2,8 +2,8 @@ import streamlit as st
|
|
2 |
from st_audiorec import st_audiorec
|
3 |
|
4 |
from nameder import init_model_ner, get_entity_labels
|
5 |
-
from speech2text import init_model_trans, transcribe
|
6 |
-
|
7 |
from resources import audit_elapsedtime, set_start
|
8 |
import subprocess
|
9 |
|
@@ -29,9 +29,10 @@ def main ():
|
|
29 |
start_loading = set_start()
|
30 |
st.audio(wav_audio_data, format='audio/wav')
|
31 |
original = transcribe(wav_audio_data, s2t)
|
|
|
32 |
print("translating audio...")
|
33 |
-
translation = translate(original
|
34 |
-
st.write(f"
|
35 |
|
36 |
# if text is not None and ner is not None:
|
37 |
# st.write('Entities: ', get_entity_labels(model=ner, text=text))
|
|
|
2 |
from st_audiorec import st_audiorec
|
3 |
|
4 |
from nameder import init_model_ner, get_entity_labels
|
5 |
+
from speech2text import init_model_trans, transcribe
|
6 |
+
from translation import translate
|
7 |
from resources import audit_elapsedtime, set_start
|
8 |
import subprocess
|
9 |
|
|
|
29 |
start_loading = set_start()
|
30 |
st.audio(wav_audio_data, format='audio/wav')
|
31 |
original = transcribe(wav_audio_data, s2t)
|
32 |
+
st.write(f"Original: {original}")
|
33 |
print("translating audio...")
|
34 |
+
translation = translate(original)
|
35 |
+
st.write(f"Transcription: {translation}")
|
36 |
|
37 |
# if text is not None and ner is not None:
|
38 |
# st.write('Entities: ', get_entity_labels(model=ner, text=text))
|
translation.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from transformers import MarianMTModel, MarianTokenizer
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
|
|
3 |
|
4 |
def get_model_name(languageCode: str) -> str:
|
5 |
match languageCode:
|
@@ -8,12 +9,15 @@ def get_model_name(languageCode: str) -> str:
|
|
8 |
|
9 |
return model_name
|
10 |
|
11 |
-
def
|
12 |
-
|
|
|
|
|
13 |
tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
14 |
model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
15 |
pten_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
|
16 |
translated_text = pten_pipeline(text_to_translate)
|
17 |
|
|
|
18 |
print("Translated text:", translated_text)
|
19 |
return translated_text
|
|
|
1 |
from transformers import MarianMTModel, MarianTokenizer
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
3 |
+
from resources import set_start, audit_elapsedtime
|
4 |
|
5 |
def get_model_name(languageCode: str) -> str:
|
6 |
match languageCode:
|
|
|
9 |
|
10 |
return model_name
|
11 |
|
12 |
+
def translate(text_to_translate: str) -> str:
|
13 |
+
|
14 |
+
start = set_start()
|
15 |
+
print("Initiating translation model...")
|
16 |
tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
17 |
model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
18 |
pten_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
|
19 |
translated_text = pten_pipeline(text_to_translate)
|
20 |
|
21 |
+
audit_elapsedtime(function="Finished translation", start=start)
|
22 |
print("Translated text:", translated_text)
|
23 |
return translated_text
|