import os import shutil from pathlib import Path import gradio as gr import tqdm from pdf_translate import extract_text, pdf_preview # 语言映射 lang_map = { "Chinese": "zh", "English": "en", "French": "fr", "German": "de", "Japanese": "ja", "Korean": "ko", "Russian": "ru", "Spanish": "es", "Italian": "it", } # 页范围映射 page_map = { "All Pages": "all", "First Page": [0], "First 5 Pages": list(range(5)), } # 服务映射 # Map service names to pdf2zh service options service_map = { #"Google": ("google", None, None), #"DeepL": ("deepl", "DEEPL_AUTH_KEY", None), #"DeepLX": ("deeplx", "DEEPLX_AUTH_KEY", None), #"Ollama": ("ollama", None, "gemma2"), "OpenAI": ("openai", "OPENAI_API_KEY", "gpt-4o"), #"Azure": ("azure", "AZURE_APIKEY", None), #"Tencent": ("tencent", "TENCENT_SECRET_KEY", None), } def download_with_limit(url, output_path, size_limit=None): """下载文件并限制大小""" # 这里可以实现你的文件下载逻辑 raise NotImplementedError # 核心翻译函数 def translate( file_type, file_input, link_input, service, apikey, model_id, lang_from, lang_to, page_range, progress=gr.Progress(), ): progress(0, desc="Starting translation...") output = Path("pdf2zh_files") output.mkdir(parents=True, exist_ok=True) if file_type == "File": if not file_input: raise gr.Error("No input") file_path = shutil.copy(file_input, output) else: if not link_input: raise gr.Error("No input") file_path = download_with_limit( link_input, output, 5 * 1024 * 1024, # 限制为 5MB ) filename = os.path.splitext(os.path.basename(file_path))[0] file_en = output / f"{filename}.pdf" file_zh = output / f"{filename}-zh.pdf" file_dual = output / f"{filename}-dual.pdf" selected_service = service_map[service][0] if service_map[service][1]: os.environ.setdefault(service_map[service][1], apikey) selected_page = page_map[page_range] lang_from = lang_map[lang_from] lang_to = lang_map[lang_to] if selected_service == "google": lang_from = "zh-CN" if lang_from == "zh" else lang_from lang_to = "zh-CN" if lang_to == "zh" else lang_to print(f"Files before translation: {os.listdir(output)}") def progress_bar(t: tqdm.tqdm): progress(t.n / t.total, desc="Translating...") param = { "files": [file_en], "pages": selected_page, "lang_in": lang_from, "lang_out": lang_to, "service": f"{selected_service}:{model_id}", "output": output, "thread": 4, "callback": progress_bar, } print(param) extract_text(**param) print(f"Files after translation: {os.listdir(output)}") if not file_zh.exists() or not file_dual.exists(): raise gr.Error("No output") try: translated_preview = pdf_preview(str(file_zh)) except Exception: raise gr.Error("No preview") progress(1.0, desc="Translation complete!") return ( str(file_zh), translated_preview, str(file_dual), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), ) # Gradio App 配置 with gr.Blocks( title="PDFBestTranslate - PDF Translation with Preserved Formats", theme=gr.themes.Default(), ) as demo: with gr.Row(): with gr.Column(scale=1): file_type = gr.Radio( choices=["File", "Link"], value="File", label="Input Type", ) file_input = gr.File(label="Upload PDF File") link_input = gr.Textbox(label="Enter File URL", visible=False) file_type.change( lambda x: (gr.update(visible=x == "File"), gr.update(visible=x == "Link")), file_type, [file_input, link_input], ) service = gr.Radio( choices=["Google Translate", "DeepL"], value="Google Translate", label="Translation Service", ) apikey = gr.Textbox(label="API Key (Optional)") model_id = gr.Textbox(label="Model ID (Optional)", visible=False) lang_from = gr.Dropdown( choices=list(lang_map.keys()), value="Auto", label="From Language", ) lang_to = gr.Dropdown( choices=list(lang_map.keys()), value="Chinese", label="To Language", ) page_range = gr.Radio( choices=list(page_map.keys()), value="All Pages", label="Page Range", ) translate_btn = gr.Button("Translate") with gr.Column(scale=2): output_file = gr.File(label="Translated PDF File", visible=False) preview = gr.Textbox(label="Translated Preview", visible=False) output_file_dual = gr.File(label="Dual-Language PDF File", visible=False) translate_btn.click( translate, inputs=[ file_type, file_input, link_input, service, apikey, model_id, lang_from, lang_to, page_range, ], outputs=[ output_file, preview, output_file_dual, output_file, output_file_dual, preview, ], ) # 启动 Gradio 应用 if __name__ == "__main__": demo.launch()