Aytaj commited on
Commit
4f58b0b
·
1 Parent(s): e33bbd1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
3
+ from pdfminer.high_level import extract_text
4
+
5
+ def main():
6
+ st.title("PDF Translation")
7
+ # Upload the pdf
8
+ uploaded_file = st.file_uploader("Upload a PDF file and we will translate the text inside to German and French", type=["pdf"])
9
+
10
+ if uploaded_file is not None:
11
+ # Extract text from pdf
12
+ text = extract_text(uploaded_file)
13
+ tokenizer = T5Tokenizer.from_pretrained("t5-small")
14
+ model = T5ForConditionalGeneration.from_pretrained("t5-small")
15
+
16
+ # Define translation prefixes for each language
17
+ translation_prefixes = {
18
+ "german": "translate English to German: ",
19
+ "french": "translate English to French: "
20
+ }
21
+
22
+ # Generate translations for each language
23
+ translations = {}
24
+
25
+ # Buttons to trigger translation
26
+ translate_german = st.button("Translate to German")
27
+ translate_french = st.button("Translate to French")
28
+
29
+ for language, prefix in translation_prefixes.items():
30
+ # Translate the entire text, not page by page
31
+ text_to_translate = prefix + text
32
+ input_ids = tokenizer(text_to_translate, return_tensors="pt").input_ids
33
+ outputs = model.generate(input_ids=input_ids, max_length=150, num_beams=4, no_repeat_ngram_size=2)
34
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
+ translations[language] = translated_text
36
+
37
+ # Display the translations based on the button clicked
38
+ if translate_german:
39
+ display_translation(translations["german"], "German")
40
+
41
+ if translate_french:
42
+ display_translation(translations["french"], "French")
43
+
44
+
45
+ def display_translation(translation, language):
46
+ st.write(f"\nLanguage: {language}")
47
+ st.write(f"Translation:\n {translation}")
48
+
49
+
50
+ if __name__ == "__main__":
51
+ main()