Spaces:

IsmayilMasimov36
/

test

Sleeping

App Files Files Community

IsmayilMasimov36 commited on Jan 4, 2024

Commit

e89cc5c

1 Parent(s): e74b3dc

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -20

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import streamlit as st
 from transformers import T5Tokenizer, T5ForConditionalGeneration
-from pathlib import Path
 from pdfminer.high_level import extract_text
 def main():
@@ -8,11 +7,11 @@ def main():
     st.write("Upload a PDF file and we will translate the text inside to German and French.")
     # Upload the pdf
-    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
     if uploaded_file is not None:
         # Extract text from pdf
-        documents = extract_text(uploaded_file)
         tokenizer = T5Tokenizer.from_pretrained("t5-small")
         model = T5ForConditionalGeneration.from_pretrained("t5-small")
@@ -22,7 +21,7 @@ def main():
             "french": "translate English to French: "
         }
-        # Generate translations for each language for each document
         translations = {}
         # Buttons to trigger translation
@@ -30,30 +29,25 @@ def main():
         translate_french = st.button("Translate to French")
         for language, prefix in translation_prefixes.items():
-            document_translations = []
-            for idx, document in enumerate(documents, 1):
-                text = prefix + document.text
-                input_ids = tokenizer(text, return_tensors="pt").input_ids
-                outputs = model.generate(input_ids=input_ids, max_length=50, num_beams=4, no_repeat_ngram_size=2)
-                translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-                document_translations.append(translated_text)
-            translations[language] = document_translations
         # Display the translations based on the button clicked
         if translate_german:
-            display_translations(translations["german"], "German")
         if translate_french:
-            display_translations(translations["french"], "French")
-def display_translations(translations, language):
     st.write(f"\nLanguage: {language}")
-    for idx, translation in enumerate(translations, 1):
-        st.write(f"Page {idx}: {translation}")
 if __name__ == "__main__":
-    main()

 import streamlit as st
 from transformers import T5Tokenizer, T5ForConditionalGeneration
 from pdfminer.high_level import extract_text
 def main():
     st.write("Upload a PDF file and we will translate the text inside to German and French.")
     # Upload the pdf
+    uploaded_file = st.file_uploader("", type=["pdf"])
     if uploaded_file is not None:
         # Extract text from pdf
+        text = extract_text(uploaded_file)
         tokenizer = T5Tokenizer.from_pretrained("t5-small")
         model = T5ForConditionalGeneration.from_pretrained("t5-small")
             "french": "translate English to French: "
         }
+        # Generate translations for each language
         translations = {}
         # Buttons to trigger translation
         translate_french = st.button("Translate to French")
         for language, prefix in translation_prefixes.items():
+            # Translate the entire text, not page by page
+            text_to_translate = prefix + text
+            input_ids = tokenizer(text_to_translate, return_tensors="pt").input_ids
+            outputs = model.generate(input_ids=input_ids, max_length=150, num_beams=4, no_repeat_ngram_size=2)
+            translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            translations[language] = translated_text
         # Display the translations based on the button clicked
         if translate_german:
+            display_translation(translations["german"], "German")
         if translate_french:
+            display_translation(translations["french"], "French")
+def display_translation(translation, language):
     st.write(f"\nLanguage: {language}")
+    st.write(f"Translation: {translation}")
 if __name__ == "__main__":
+    main()