|
import io |
|
import os |
|
import time |
|
import streamlit as st |
|
import requests |
|
import zipfile |
|
|
|
from azure.core.credentials import AzureKeyCredential |
|
from azure.ai.translation.document import DocumentTranslationClient |
|
from dotenv import load_dotenv |
|
from streamlit_pdf_viewer import pdf_viewer |
|
from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure |
|
from streamlit_msal import Msal |
|
|
|
load_dotenv() |
|
st.set_page_config(layout="wide") |
|
|
|
|
|
with st.sidebar: |
|
auth_data = Msal.initialize_ui( |
|
client_id=os.environ['AZURE_CLIENT_ID'], |
|
authority=os.environ['AZURE_AUTHORITY_URL'], |
|
scopes=[], |
|
connecting_label="Connecting", |
|
disconnected_label="Disconnected", |
|
sign_in_label="Sign in", |
|
sign_out_label="Sign out" |
|
) |
|
|
|
if not auth_data: |
|
st.warning("Please login to continue") |
|
st.stop() |
|
else: |
|
|
|
st.title("Azure Translation Tools") |
|
uploaded_files = st.file_uploader("Upload files to start the process", accept_multiple_files=True) |
|
|
|
|
|
client = DocumentTranslationClient(os.environ["AZURE_AI_ENDPOINT_URL"], AzureKeyCredential(os.environ["AZURE_AI_TRANSLATOR_KEY"])) |
|
sourceUri = "https://cbdtranslation.blob.core.windows.net/source" |
|
targetUri = "https://cbdtranslation.blob.core.windows.net/target" |
|
|
|
|
|
langs = ( |
|
'id - Indonesian', |
|
'en - English', |
|
'es - Spanish', |
|
'zh - Chinese', |
|
'ar - Arabic', |
|
'fr - French', |
|
'ru - Russian', |
|
'hi - Hindi', |
|
'pt - Portuguese', |
|
'de - German', |
|
'ms - Malay', |
|
'ta - Tamil', |
|
'ko - Korean', |
|
'th - Thai', |
|
) |
|
|
|
|
|
lang = st.selectbox('Target language selection:', langs, key='lang') |
|
lang_id = lang.split()[0] |
|
lang_name = lang.split()[-1] |
|
|
|
def process_sync(file_name, file_content): |
|
|
|
|
|
headers = { |
|
"Ocp-Apim-Subscription-Key": os.environ["AZURE_AI_TRANSLATOR_KEY"], |
|
} |
|
|
|
|
|
files = { |
|
"document": (file_name, file_content, "ContentType/file-extension"), |
|
} |
|
|
|
|
|
url = f"{os.environ['AZURE_AI_ENDPOINT_URL']}/translator/document:translate?targetLanguage={lang_id}&api-version={os.environ['AZURE_AI_API_VERSION']}" |
|
|
|
|
|
response = requests.post(url, headers=headers, files=files) |
|
|
|
return response.status_code == 200, response.content |
|
|
|
def process_async(file_name, file_content): |
|
|
|
|
|
upload_to_azure(blob_service_client, "source", file_content, file_name) |
|
|
|
|
|
|
|
poller = client.begin_translation(sourceUri, targetUri, lang_id) |
|
result = poller.result() |
|
|
|
|
|
downloaded_file_content = download_from_azure(blob_service_client, "target", file_name) |
|
|
|
|
|
delete_from_azure(blob_service_client, "source", file_name) |
|
delete_from_azure(blob_service_client, "target", file_name) |
|
|
|
|
|
for document in result: |
|
return document.status == 'Succeeded', downloaded_file_content |
|
|
|
if uploaded_files: |
|
submit = st.button("Get Result", key='submit') |
|
|
|
if uploaded_files and submit: |
|
|
|
zip_buffer = io.BytesIO() |
|
with zipfile.ZipFile(zip_buffer, 'w') as zip_file: |
|
|
|
progress_bar = st.progress(0) |
|
for idx, uploaded_file in enumerate(uploaded_files): |
|
|
|
start_time = time.time() |
|
|
|
file_name = uploaded_file.name |
|
file_content = uploaded_file.read() |
|
file_type = file_name.split('.')[-1] |
|
|
|
|
|
if file_type in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']: |
|
result, response = process_sync(file_name, file_content) |
|
elif file_type in ['pdf', 'odt', 'odp', 'ods', 'rtf']: |
|
result, response = process_async(file_name, file_content) |
|
|
|
|
|
duration = time.time() - start_time |
|
|
|
|
|
if result: |
|
|
|
zip_file.writestr(f"{lang_name}-translated-{uploaded_file.name}", response) |
|
st.success(f"Successfully translated: {uploaded_file.name} (Time taken: {duration:.2f} seconds)") |
|
else: |
|
st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)") |
|
|
|
if file_type == 'pdf': |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.write(f"Original File: {uploaded_file.name}") |
|
st.divider() |
|
pdf_viewer(file_content) |
|
with col2: |
|
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}") |
|
st.divider() |
|
pdf_viewer(response) |
|
elif file_type == 'docx': |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.write(f"Original File: {uploaded_file.name}") |
|
st.divider() |
|
st.write("On development") |
|
with col2: |
|
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}") |
|
st.divider() |
|
st.write("On development") |
|
elif file_type == 'txt': |
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.write(f"Original File: {uploaded_file.name}") |
|
st.divider() |
|
st.write(file_content) |
|
with col2: |
|
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}") |
|
st.divider() |
|
st.write(response) |
|
|
|
|
|
progress = (idx + 1) / len(uploaded_files) |
|
progress_bar.progress(progress) |
|
|
|
|
|
st.download_button( |
|
label="Download All Translated Files", |
|
data=zip_buffer.getvalue(), |
|
file_name=f"{lang_name}-translated-files.zip", |
|
mime="application/zip" |
|
) |
|
|