Zeimoto commited on
Commit
ecb1d96
·
1 Parent(s): 636e850

different translation model

Browse files
Files changed (2) hide show
  1. app.py +5 -4
  2. translation.py +6 -2
app.py CHANGED
@@ -2,8 +2,8 @@ import streamlit as st
2
  from st_audiorec import st_audiorec
3
 
4
  from nameder import init_model_ner, get_entity_labels
5
- from speech2text import init_model_trans, transcribe, translate
6
- # from translation import get_translation
7
  from resources import audit_elapsedtime, set_start
8
  import subprocess
9
 
@@ -29,9 +29,10 @@ def main ():
29
  start_loading = set_start()
30
  st.audio(wav_audio_data, format='audio/wav')
31
  original = transcribe(wav_audio_data, s2t)
 
32
  print("translating audio...")
33
- translation = translate(original, s2t)
34
- st.write(f"Original: {original}/nTranscription: {translation}")
35
 
36
  # if text is not None and ner is not None:
37
  # st.write('Entities: ', get_entity_labels(model=ner, text=text))
 
2
  from st_audiorec import st_audiorec
3
 
4
  from nameder import init_model_ner, get_entity_labels
5
+ from speech2text import init_model_trans, transcribe
6
+ from translation import translate
7
  from resources import audit_elapsedtime, set_start
8
  import subprocess
9
 
 
29
  start_loading = set_start()
30
  st.audio(wav_audio_data, format='audio/wav')
31
  original = transcribe(wav_audio_data, s2t)
32
+ st.write(f"Original: {original}")
33
  print("translating audio...")
34
+ translation = translate(original)
35
+ st.write(f"Transcription: {translation}")
36
 
37
  # if text is not None and ner is not None:
38
  # st.write('Entities: ', get_entity_labels(model=ner, text=text))
translation.py CHANGED
@@ -1,5 +1,6 @@
1
  from transformers import MarianMTModel, MarianTokenizer
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
3
 
4
  def get_model_name(languageCode: str) -> str:
5
  match languageCode:
@@ -8,12 +9,15 @@ def get_model_name(languageCode: str) -> str:
8
 
9
  return model_name
10
 
11
- def get_translation(text_to_translate: str, languageCode: str) -> str:
12
-
 
 
13
  tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
14
  model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
15
  pten_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
16
  translated_text = pten_pipeline(text_to_translate)
17
 
 
18
  print("Translated text:", translated_text)
19
  return translated_text
 
1
  from transformers import MarianMTModel, MarianTokenizer
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
+ from resources import set_start, audit_elapsedtime
4
 
5
  def get_model_name(languageCode: str) -> str:
6
  match languageCode:
 
9
 
10
  return model_name
11
 
12
+ def translate(text_to_translate: str) -> str:
13
+
14
+ start = set_start()
15
+ print("Initiating translation model...")
16
  tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
17
  model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
18
  pten_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
19
  translated_text = pten_pipeline(text_to_translate)
20
 
21
+ audit_elapsedtime(function="Finished translation", start=start)
22
  print("Translated text:", translated_text)
23
  return translated_text