whisper_audio-text-translate

Sleeping

App Files Files Community

Marathon23 commited on Oct 22, 2024

Commit

e4531cf

verified ·

1 Parent(s): a477784

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -92

app.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import torch
 import gradio as gr
 from transformers import pipeline
-import openai  # Import OpenAI for GPT-4 API integration
-import os  # 確保導入 os
-import tempfile
-# 使用 Whisper Large 模型進行語音轉錄
-MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 device = 0 if torch.cuda.is_available() else "cpu"
@@ -17,107 +16,83 @@ pipe = pipeline(
     device=device,
 )
-openai_api_key = os.getenv('OPENAI_API_KEY')  # Load OpenAI API key
-# 語音轉文字的功能
-def transcribe(inputs, task):
-    if inputs is None:
-        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
     return text
-# 增加翻譯功能，呼叫 GPT-4 API
 def translate_text(text, target_language):
-    prompt = f"Translate the following text to {target_language}:\n\n{text}"
     try:
         response = openai.ChatCompletion.create(
-            model="gpt-4o",  # 使用 GPT-4o 模型
             messages=[{"role": "user", "content": prompt}],
-            max_tokens=500
         )
-        translation = response.choices[0].message["content"]
         return translation
     except Exception as e:
-        return f"翻譯時出錯: {str(e)}"
-# Gradio 介面
-demo = gr.Blocks()
-mf_transcribe = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(source="microphone", type="filepath"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-    ],
-    outputs="text",
-    title="Whisper Large V3 Turbo: Transcribe Audio",
-    description=(
-        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
-        " of arbitrary length."
-    ),
-    allow_flagging="never",
-)
-file_transcribe = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(source="upload", type="filepath", label="Audio file"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-    ],
-    outputs="text",
-    title="Whisper Large V3: Transcribe Audio",
-    description=(
-        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
-        " of arbitrary length."
-    ),
-    allow_flagging="never",
-)
-# 增加翻譯功能，讓使用者選擇要翻譯的語言
-def transcribe_and_translate(inputs, task, target_language):
-    text = transcribe(inputs, task)
-    if target_language != "None":
-        translated_text = translate_text(text, target_language)
-        return text, translated_text
-    return text, None
-# 介面結合了轉錄和翻譯功能
-mf_transcribe_and_translate = gr.Interface(
-    fn=transcribe_and_translate,
-    inputs=[
-        gr.Audio(source="microphone", type="filepath"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-        gr.Dropdown(choices=["French", "German", "Spanish", "Chinese", "None"], label="Translate to Language", value="None")
-    ],
-    outputs=["text", "text"],  # 兩個輸出，一個是原文，一個是翻譯後的文字
-    title="Whisper Large V3 Turbo: Transcribe and Translate",
-    description=(
-        "Transcribe audio from microphone inputs and optionally translate it to a selected language using OpenAI GPT-4."
-    ),
-    allow_flagging="never",
-)
-file_transcribe_and_translate = gr.Interface(
-    fn=transcribe_and_translate,
-    inputs=[
-        gr.Audio(source="upload", type="filepath", label="Audio file"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-        gr.Dropdown(choices=["French", "German", "Spanish", "Chinese", "None"], label="Translate to Language", value="None")
-    ],
-    outputs=["text", "text"],  # 兩個輸出，一個是原文，一個是翻譯後的文字
-    title="Whisper Large V3: Transcribe and Translate",
-    description=(
-        "Transcribe audio from uploaded files and optionally translate it to a selected language using OpenAI GPT-4."
-    ),
-    allow_flagging="never",
-)
-# 結合 Gradio 介面
-with demo:
-    gr.TabbedInterface([mf_transcribe_and_translate, file_transcribe_and_translate], ["Microphone Transcription", "File Transcription"])
-demo.queue().launch()

 import torch
 import gradio as gr
 from transformers import pipeline
+import openai
+import os
+# 使用 Whisper 模型进行语音转录
+MODEL_NAME = "openai/whisper-large-v2"  # 使用支持的模型名称
 BATCH_SIZE = 8
 device = 0 if torch.cuda.is_available() else "cpu"
     device=device,
 )
+# 设置 OpenAI API 密钥
+openai.api_key = os.getenv('OPENAI_API_KEY')
+if openai.api_key is None:
+    raise ValueError("请设置 OpenAI API 密钥为环境变量 'OPENAI_API_KEY'。")
+# 定义语音转文字函数
+def transcribe(audio):
+    if audio is None:
+        raise gr.Error("请上传或录制音频文件。")
+    text = pipe(audio)["text"]
     return text
+# 定义翻译函数
 def translate_text(text, target_language):
+    if target_language == "None" or not target_language:
+        return "未选择翻译语言。", None
+    prompt = f"请将以下文本翻译成 {target_language}：\n\n{text}"
     try:
         response = openai.ChatCompletion.create(
+            model="gpt-4",  # 使用 GPT-4 模型
             messages=[{"role": "user", "content": prompt}],
+            max_tokens=1000,
+            n=1,
+            temperature=0.5,
         )
+        translation = response.choices[0].message["content"].strip()
         return translation
     except Exception as e:
+        return f"翻译出错：{str(e)}"
+# 定义完整的处理流程函数
+def transcribe_and_translate(audio, target_language):
+    text = transcribe(audio)
+    if target_language != "None":
+        translation = translate_text(text, target_language)
+        return text, translation
+    else:
+        return text, "未选择翻译语言。"
+# 构建 Gradio 界面
+with gr.Blocks() as demo:
+    gr.Markdown("# 语音转文字并翻译应用")
+    with gr.Tabs():
+        with gr.TabItem("麦克风输入"):
+            audio_input = gr.Audio(source="microphone", type="filepath", label="录制音频")
+            language_dropdown = gr.Dropdown(
+                choices=["None", "English", "French", "German", "Spanish", "Chinese"],
+                value="None",
+                label="翻译成以下语言",
+            )
+            transcribe_button = gr.Button("开始转录和翻译")
+            original_text_output = gr.Textbox(label="转录文本")
+            translated_text_output = gr.Textbox(label="翻译文本")
+            transcribe_button.click(
+                fn=transcribe_and_translate,
+                inputs=[audio_input, language_dropdown],
+                outputs=[original_text_output, translated_text_output],
+            )
+        with gr.TabItem("上传音频文件"):
+            file_input = gr.Audio(type="filepath", label="上传音频文件")
+            language_dropdown_file = gr.Dropdown(
+                choices=["None", "English", "French", "German", "Spanish", "Chinese"],
+                value="None",
+                label="翻译成以下语言",
+            )
+            transcribe_button_file = gr.Button("开始转录和翻译")
+            original_text_output_file = gr.Textbox(label="转录文本")
+            translated_text_output_file = gr.Textbox(label="翻译文本")
+            transcribe_button_file.click(
+                fn=transcribe_and_translate,
+                inputs=[file_input, language_dropdown_file],
+                outputs=[original_text_output_file, translated_text_output_file],
+            )
+demo.launch()