Spaces:
Sleeping
Sleeping
import os | |
import shutil | |
from pathlib import Path | |
import gradio as gr | |
import tqdm | |
from pdf_translate import extract_text, pdf_preview | |
# 语言映射 | |
lang_map = { | |
"Chinese": "zh", | |
"English": "en", | |
"French": "fr", | |
"German": "de", | |
"Japanese": "ja", | |
"Korean": "ko", | |
"Russian": "ru", | |
"Spanish": "es", | |
"Italian": "it", | |
} | |
# 页范围映射 | |
page_map = { | |
"All Pages": "all", | |
"First Page": [0], | |
"First 5 Pages": list(range(5)), | |
} | |
# 服务映射 | |
# Map service names to pdf2zh service options | |
service_map = { | |
#"Google": ("google", None, None), | |
#"DeepL": ("deepl", "DEEPL_AUTH_KEY", None), | |
#"DeepLX": ("deeplx", "DEEPLX_AUTH_KEY", None), | |
#"Ollama": ("ollama", None, "gemma2"), | |
"OpenAI": ("openai", "OPENAI_API_KEY", "gpt-4o"), | |
#"Azure": ("azure", "AZURE_APIKEY", None), | |
#"Tencent": ("tencent", "TENCENT_SECRET_KEY", None), | |
} | |
def download_with_limit(url, output_path, size_limit=None): | |
"""下载文件并限制大小""" | |
# 这里可以实现你的文件下载逻辑 | |
raise NotImplementedError | |
# 核心翻译函数 | |
def translate( | |
file_type, | |
file_input, | |
link_input, | |
service, | |
apikey, | |
model_id, | |
lang_from, | |
lang_to, | |
page_range, | |
progress=gr.Progress(), | |
): | |
progress(0, desc="Starting translation...") | |
output = Path("pdf2zh_files") | |
output.mkdir(parents=True, exist_ok=True) | |
if file_type == "File": | |
if not file_input: | |
raise gr.Error("No input") | |
file_path = shutil.copy(file_input, output) | |
else: | |
if not link_input: | |
raise gr.Error("No input") | |
file_path = download_with_limit( | |
link_input, | |
output, | |
5 * 1024 * 1024, # 限制为 5MB | |
) | |
filename = os.path.splitext(os.path.basename(file_path))[0] | |
file_en = output / f"{filename}.pdf" | |
file_zh = output / f"{filename}-zh.pdf" | |
file_dual = output / f"{filename}-dual.pdf" | |
selected_service = service_map[service][0] | |
if service_map[service][1]: | |
os.environ.setdefault(service_map[service][1], apikey) | |
selected_page = page_map[page_range] | |
lang_from = lang_map[lang_from] | |
lang_to = lang_map[lang_to] | |
if selected_service == "google": | |
lang_from = "zh-CN" if lang_from == "zh" else lang_from | |
lang_to = "zh-CN" if lang_to == "zh" else lang_to | |
print(f"Files before translation: {os.listdir(output)}") | |
def progress_bar(t: tqdm.tqdm): | |
progress(t.n / t.total, desc="Translating...") | |
param = { | |
"files": [file_en], | |
"pages": selected_page, | |
"lang_in": lang_from, | |
"lang_out": lang_to, | |
"service": f"{selected_service}:{model_id}", | |
"output": output, | |
"thread": 4, | |
"callback": progress_bar, | |
} | |
print(param) | |
extract_text(**param) | |
print(f"Files after translation: {os.listdir(output)}") | |
if not file_zh.exists() or not file_dual.exists(): | |
raise gr.Error("No output") | |
try: | |
translated_preview = pdf_preview(str(file_zh)) | |
except Exception: | |
raise gr.Error("No preview") | |
progress(1.0, desc="Translation complete!") | |
return ( | |
str(file_zh), | |
translated_preview, | |
str(file_dual), | |
gr.update(visible=True), | |
gr.update(visible=True), | |
gr.update(visible=True), | |
) | |
# Gradio App 配置 | |
with gr.Blocks( | |
title="PDFBestTranslate - PDF Translation with Preserved Formats", | |
theme=gr.themes.Default(), | |
) as demo: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
file_type = gr.Radio( | |
choices=["File", "Link"], | |
value="File", | |
label="Input Type", | |
) | |
file_input = gr.File(label="Upload PDF File") | |
link_input = gr.Textbox(label="Enter File URL", visible=False) | |
file_type.change( | |
lambda x: (gr.update(visible=x == "File"), gr.update(visible=x == "Link")), | |
file_type, | |
[file_input, link_input], | |
) | |
service = gr.Radio( | |
choices=["Google Translate", "DeepL"], | |
value="Google Translate", | |
label="Translation Service", | |
) | |
apikey = gr.Textbox(label="API Key (Optional)") | |
model_id = gr.Textbox(label="Model ID (Optional)", visible=False) | |
lang_from = gr.Dropdown( | |
choices=list(lang_map.keys()), | |
value="Auto", | |
label="From Language", | |
) | |
lang_to = gr.Dropdown( | |
choices=list(lang_map.keys()), | |
value="Chinese", | |
label="To Language", | |
) | |
page_range = gr.Radio( | |
choices=list(page_map.keys()), | |
value="All Pages", | |
label="Page Range", | |
) | |
translate_btn = gr.Button("Translate") | |
with gr.Column(scale=2): | |
output_file = gr.File(label="Translated PDF File", visible=False) | |
preview = gr.Textbox(label="Translated Preview", visible=False) | |
output_file_dual = gr.File(label="Dual-Language PDF File", visible=False) | |
translate_btn.click( | |
translate, | |
inputs=[ | |
file_type, | |
file_input, | |
link_input, | |
service, | |
apikey, | |
model_id, | |
lang_from, | |
lang_to, | |
page_range, | |
], | |
outputs=[ | |
output_file, | |
preview, | |
output_file_dual, | |
output_file, | |
output_file_dual, | |
preview, | |
], | |
) | |
# 启动 Gradio 应用 | |
if __name__ == "__main__": | |
demo.launch() | |