Spaces:
Sleeping
Sleeping
Commit
·
cdda69e
1
Parent(s):
c52dbb5
Implement displaying file (pdf)
Browse files
app.py
CHANGED
@@ -7,7 +7,9 @@ import zipfile
|
|
7 |
|
8 |
from azure.core.credentials import AzureKeyCredential
|
9 |
from azure.ai.translation.document import DocumentTranslationClient
|
|
|
10 |
from dotenv import load_dotenv
|
|
|
11 |
from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
|
12 |
|
13 |
load_dotenv()
|
@@ -44,10 +46,7 @@ lang = st.selectbox('Target language selection:', langs, key='lang')
|
|
44 |
lang_id = lang.split()[0] # Get language code (e.g., 'en')
|
45 |
lang_name = lang.split()[-1] # Get language name (e.g., 'English')
|
46 |
|
47 |
-
def process_sync(
|
48 |
-
|
49 |
-
file_name = uploaded_file.name
|
50 |
-
file_content = uploaded_file.read()
|
51 |
|
52 |
# Set up Azure Translator API headers
|
53 |
headers = {
|
@@ -67,10 +66,7 @@ def process_sync(uploaded_file):
|
|
67 |
|
68 |
return response.status_code == 200, response.content
|
69 |
|
70 |
-
def process_async(
|
71 |
-
|
72 |
-
file_name = uploaded_file.name
|
73 |
-
file_content = uploaded_file.read()
|
74 |
|
75 |
# Upload the original file to Azure Blob Storage source container
|
76 |
upload_to_azure(blob_service_client, "source", file_content, file_name)
|
@@ -91,6 +87,13 @@ def process_async(uploaded_file):
|
|
91 |
for document in result:
|
92 |
return document.status == 'Succeeded', downloaded_file_content
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
if uploaded_files:
|
95 |
submit = st.button("Get Result", key='submit')
|
96 |
|
@@ -104,11 +107,14 @@ if uploaded_files and submit:
|
|
104 |
# Start timing
|
105 |
start_time = time.time()
|
106 |
|
|
|
|
|
|
|
107 |
# Check file extension to determine translation method
|
108 |
if uploaded_file.name.split('.')[-1] in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
|
109 |
-
result, response = process_sync(
|
110 |
elif uploaded_file.name.split('.')[-1] in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
|
111 |
-
result, response = process_async(
|
112 |
|
113 |
# Calculate duration
|
114 |
duration = time.time() - start_time
|
@@ -121,6 +127,15 @@ if uploaded_files and submit:
|
|
121 |
else:
|
122 |
st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
# Update progress bar based on completed translations
|
125 |
progress = (idx + 1) / len(uploaded_files)
|
126 |
progress_bar.progress(progress)
|
|
|
7 |
|
8 |
from azure.core.credentials import AzureKeyCredential
|
9 |
from azure.ai.translation.document import DocumentTranslationClient
|
10 |
+
from docx import Document
|
11 |
from dotenv import load_dotenv
|
12 |
+
from streamlit_pdf_viewer import pdf_viewer
|
13 |
from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
|
14 |
|
15 |
load_dotenv()
|
|
|
46 |
lang_id = lang.split()[0] # Get language code (e.g., 'en')
|
47 |
lang_name = lang.split()[-1] # Get language name (e.g., 'English')
|
48 |
|
49 |
+
def process_sync(file_name, file_content):
|
|
|
|
|
|
|
50 |
|
51 |
# Set up Azure Translator API headers
|
52 |
headers = {
|
|
|
66 |
|
67 |
return response.status_code == 200, response.content
|
68 |
|
69 |
+
def process_async(file_name, file_content):
|
|
|
|
|
|
|
70 |
|
71 |
# Upload the original file to Azure Blob Storage source container
|
72 |
upload_to_azure(blob_service_client, "source", file_content, file_name)
|
|
|
87 |
for document in result:
|
88 |
return document.status == 'Succeeded', downloaded_file_content
|
89 |
|
90 |
+
def read_word_file(file):
|
91 |
+
doc = Document(file)
|
92 |
+
full_text = []
|
93 |
+
for para in doc.paragraphs:
|
94 |
+
full_text.append(para.text)
|
95 |
+
return '\n'.join(full_text)
|
96 |
+
|
97 |
if uploaded_files:
|
98 |
submit = st.button("Get Result", key='submit')
|
99 |
|
|
|
107 |
# Start timing
|
108 |
start_time = time.time()
|
109 |
|
110 |
+
file_name = uploaded_file.name
|
111 |
+
file_content = uploaded_file.read()
|
112 |
+
|
113 |
# Check file extension to determine translation method
|
114 |
if uploaded_file.name.split('.')[-1] in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
|
115 |
+
result, response = process_sync(file_name, file_content)
|
116 |
elif uploaded_file.name.split('.')[-1] in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
|
117 |
+
result, response = process_async(file_name, file_content)
|
118 |
|
119 |
# Calculate duration
|
120 |
duration = time.time() - start_time
|
|
|
127 |
else:
|
128 |
st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
|
129 |
|
130 |
+
# Display the original and translated files side by side
|
131 |
+
col1, col2 = st.columns(2)
|
132 |
+
with col1:
|
133 |
+
st.write(f"Original File: {uploaded_file.name}")
|
134 |
+
pdf_viewer(file_content)
|
135 |
+
with col2:
|
136 |
+
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
|
137 |
+
pdf_viewer(response)
|
138 |
+
|
139 |
# Update progress bar based on completed translations
|
140 |
progress = (idx + 1) / len(uploaded_files)
|
141 |
progress_bar.progress(progress)
|