IsmayilMasimov36 commited on
Commit
e89cc5c
·
1 Parent(s): e74b3dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -20
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
  from transformers import T5Tokenizer, T5ForConditionalGeneration
3
- from pathlib import Path
4
  from pdfminer.high_level import extract_text
5
 
6
  def main():
@@ -8,11 +7,11 @@ def main():
8
  st.write("Upload a PDF file and we will translate the text inside to German and French.")
9
 
10
  # Upload the pdf
11
- uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
12
 
13
  if uploaded_file is not None:
14
  # Extract text from pdf
15
- documents = extract_text(uploaded_file)
16
  tokenizer = T5Tokenizer.from_pretrained("t5-small")
17
  model = T5ForConditionalGeneration.from_pretrained("t5-small")
18
 
@@ -22,7 +21,7 @@ def main():
22
  "french": "translate English to French: "
23
  }
24
 
25
- # Generate translations for each language for each document
26
  translations = {}
27
 
28
  # Buttons to trigger translation
@@ -30,30 +29,25 @@ def main():
30
  translate_french = st.button("Translate to French")
31
 
32
  for language, prefix in translation_prefixes.items():
33
- document_translations = []
34
-
35
- for idx, document in enumerate(documents, 1):
36
- text = prefix + document.text
37
- input_ids = tokenizer(text, return_tensors="pt").input_ids
38
- outputs = model.generate(input_ids=input_ids, max_length=50, num_beams=4, no_repeat_ngram_size=2)
39
- translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
- document_translations.append(translated_text)
41
-
42
- translations[language] = document_translations
43
 
44
  # Display the translations based on the button clicked
45
  if translate_german:
46
- display_translations(translations["german"], "German")
47
 
48
  if translate_french:
49
- display_translations(translations["french"], "French")
50
 
51
 
52
- def display_translations(translations, language):
53
  st.write(f"\nLanguage: {language}")
54
- for idx, translation in enumerate(translations, 1):
55
- st.write(f"Page {idx}: {translation}")
56
 
57
 
58
  if __name__ == "__main__":
59
- main()
 
1
  import streamlit as st
2
  from transformers import T5Tokenizer, T5ForConditionalGeneration
 
3
  from pdfminer.high_level import extract_text
4
 
5
  def main():
 
7
  st.write("Upload a PDF file and we will translate the text inside to German and French.")
8
 
9
  # Upload the pdf
10
+ uploaded_file = st.file_uploader("", type=["pdf"])
11
 
12
  if uploaded_file is not None:
13
  # Extract text from pdf
14
+ text = extract_text(uploaded_file)
15
  tokenizer = T5Tokenizer.from_pretrained("t5-small")
16
  model = T5ForConditionalGeneration.from_pretrained("t5-small")
17
 
 
21
  "french": "translate English to French: "
22
  }
23
 
24
+ # Generate translations for each language
25
  translations = {}
26
 
27
  # Buttons to trigger translation
 
29
  translate_french = st.button("Translate to French")
30
 
31
  for language, prefix in translation_prefixes.items():
32
+ # Translate the entire text, not page by page
33
+ text_to_translate = prefix + text
34
+ input_ids = tokenizer(text_to_translate, return_tensors="pt").input_ids
35
+ outputs = model.generate(input_ids=input_ids, max_length=150, num_beams=4, no_repeat_ngram_size=2)
36
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+ translations[language] = translated_text
 
 
 
 
38
 
39
  # Display the translations based on the button clicked
40
  if translate_german:
41
+ display_translation(translations["german"], "German")
42
 
43
  if translate_french:
44
+ display_translation(translations["french"], "French")
45
 
46
 
47
+ def display_translation(translation, language):
48
  st.write(f"\nLanguage: {language}")
49
+ st.write(f"Translation: {translation}")
 
50
 
51
 
52
  if __name__ == "__main__":
53
+ main()