Spaces:

KalbeDigitalLab
/

dossier-translation

Running

App Files Files Community

fadliaulawi commited on Dec 9, 2024

Commit

cdda69e

1 Parent(s): c52dbb5

Implement displaying file (pdf)

Browse files

Files changed (1) hide show

app.py +25 -10

app.py CHANGED Viewed

@@ -7,7 +7,9 @@ import zipfile
 from azure.core.credentials import AzureKeyCredential
 from azure.ai.translation.document import DocumentTranslationClient
 from dotenv import load_dotenv
 from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
 load_dotenv()
@@ -44,10 +46,7 @@ lang = st.selectbox('Target language selection:', langs, key='lang')
 lang_id = lang.split()[0]  # Get language code (e.g., 'en')
 lang_name = lang.split()[-1]  # Get language name (e.g., 'English')
-def process_sync(uploaded_file):
-    file_name = uploaded_file.name
-    file_content = uploaded_file.read()
     # Set up Azure Translator API headers
     headers = {
@@ -67,10 +66,7 @@ def process_sync(uploaded_file):
     return response.status_code == 200, response.content
-def process_async(uploaded_file):
-    file_name = uploaded_file.name
-    file_content = uploaded_file.read()
     # Upload the original file to Azure Blob Storage source container
     upload_to_azure(blob_service_client, "source", file_content, file_name)
@@ -91,6 +87,13 @@ def process_async(uploaded_file):
     for document in result:
         return document.status == 'Succeeded', downloaded_file_content
 if uploaded_files:
     submit = st.button("Get Result", key='submit')
@@ -104,11 +107,14 @@ if uploaded_files and submit:
             # Start timing
             start_time = time.time()
             # Check file extension to determine translation method
             if uploaded_file.name.split('.')[-1] in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
-                result, response = process_sync(uploaded_file)
             elif uploaded_file.name.split('.')[-1] in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
-                result, response = process_async(uploaded_file)
             # Calculate duration
             duration = time.time() - start_time
@@ -121,6 +127,15 @@ if uploaded_files and submit:
             else:
                 st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
             # Update progress bar based on completed translations
             progress = (idx + 1) / len(uploaded_files)
             progress_bar.progress(progress)

 from azure.core.credentials import AzureKeyCredential
 from azure.ai.translation.document import DocumentTranslationClient
+from docx import Document
 from dotenv import load_dotenv
+from streamlit_pdf_viewer import pdf_viewer
 from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
 load_dotenv()
 lang_id = lang.split()[0]  # Get language code (e.g., 'en')
 lang_name = lang.split()[-1]  # Get language name (e.g., 'English')
+def process_sync(file_name, file_content):
     # Set up Azure Translator API headers
     headers = {
     return response.status_code == 200, response.content
+def process_async(file_name, file_content):
     # Upload the original file to Azure Blob Storage source container
     upload_to_azure(blob_service_client, "source", file_content, file_name)
     for document in result:
         return document.status == 'Succeeded', downloaded_file_content
+def read_word_file(file):
+    doc = Document(file)
+    full_text = []
+    for para in doc.paragraphs:
+        full_text.append(para.text)
+    return '\n'.join(full_text)
 if uploaded_files:
     submit = st.button("Get Result", key='submit')
             # Start timing
             start_time = time.time()
+            file_name = uploaded_file.name
+            file_content = uploaded_file.read()
             # Check file extension to determine translation method
             if uploaded_file.name.split('.')[-1] in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
+                result, response = process_sync(file_name, file_content)
             elif uploaded_file.name.split('.')[-1] in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
+                result, response = process_async(file_name, file_content)
             # Calculate duration
             duration = time.time() - start_time
             else:
                 st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
+            # Display the original and translated files side by side
+            col1, col2 = st.columns(2)
+            with col1:
+                st.write(f"Original File: {uploaded_file.name}")
+                pdf_viewer(file_content)
+            with col2:
+                st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
+                pdf_viewer(response)
             # Update progress bar based on completed translations
             progress = (idx + 1) / len(uploaded_files)
             progress_bar.progress(progress)