Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
from transformers import pipeline
|
4 |
-
import openai
|
5 |
-
import os
|
6 |
-
import tempfile
|
7 |
|
8 |
-
# 使用 Whisper
|
9 |
-
MODEL_NAME = "openai/whisper-large-
|
10 |
BATCH_SIZE = 8
|
11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
12 |
|
@@ -17,107 +16,83 @@ pipe = pipeline(
|
|
17 |
device=device,
|
18 |
)
|
19 |
|
20 |
-
|
|
|
|
|
|
|
21 |
|
22 |
-
#
|
23 |
-
def transcribe(
|
24 |
-
if
|
25 |
-
raise gr.Error("
|
26 |
-
|
27 |
-
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
28 |
return text
|
29 |
|
30 |
-
#
|
31 |
def translate_text(text, target_language):
|
32 |
-
|
33 |
-
|
|
|
|
|
34 |
try:
|
35 |
response = openai.ChatCompletion.create(
|
36 |
-
model="gpt-
|
37 |
messages=[{"role": "user", "content": prompt}],
|
38 |
-
max_tokens=
|
|
|
|
|
39 |
)
|
40 |
-
translation = response.choices[0].message["content"]
|
41 |
return translation
|
42 |
except Exception as e:
|
43 |
-
return f"
|
44 |
-
|
45 |
-
# Gradio 介面
|
46 |
-
demo = gr.Blocks()
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
description=(
|
57 |
-
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
58 |
-
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
59 |
-
" of arbitrary length."
|
60 |
-
),
|
61 |
-
allow_flagging="never",
|
62 |
-
)
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
gr.Audio(source="upload", type="filepath", label="Audio file"),
|
68 |
-
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
69 |
-
],
|
70 |
-
outputs="text",
|
71 |
-
title="Whisper Large V3: Transcribe Audio",
|
72 |
-
description=(
|
73 |
-
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
74 |
-
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
75 |
-
" of arbitrary length."
|
76 |
-
),
|
77 |
-
allow_flagging="never",
|
78 |
-
)
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
94 |
-
gr.Dropdown(choices=["French", "German", "Spanish", "Chinese", "None"], label="Translate to Language", value="None")
|
95 |
-
],
|
96 |
-
outputs=["text", "text"], # 兩個輸出,一個是原文,一個是翻譯後的文字
|
97 |
-
title="Whisper Large V3 Turbo: Transcribe and Translate",
|
98 |
-
description=(
|
99 |
-
"Transcribe audio from microphone inputs and optionally translate it to a selected language using OpenAI GPT-4."
|
100 |
-
),
|
101 |
-
allow_flagging="never",
|
102 |
-
)
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
"Transcribe audio from uploaded files and optionally translate it to a selected language using OpenAI GPT-4."
|
115 |
-
),
|
116 |
-
allow_flagging="never",
|
117 |
-
)
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
122 |
|
123 |
-
demo.
|
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
from transformers import pipeline
|
4 |
+
import openai
|
5 |
+
import os
|
|
|
6 |
|
7 |
+
# 使用 Whisper 模型进行语音转录
|
8 |
+
MODEL_NAME = "openai/whisper-large-v2" # 使用支持的模型名称
|
9 |
BATCH_SIZE = 8
|
10 |
device = 0 if torch.cuda.is_available() else "cpu"
|
11 |
|
|
|
16 |
device=device,
|
17 |
)
|
18 |
|
19 |
+
# 设置 OpenAI API 密钥
|
20 |
+
openai.api_key = os.getenv('OPENAI_API_KEY')
|
21 |
+
if openai.api_key is None:
|
22 |
+
raise ValueError("请设置 OpenAI API 密钥为环境变量 'OPENAI_API_KEY'。")
|
23 |
|
24 |
+
# 定义语音转文字函数
|
25 |
+
def transcribe(audio):
|
26 |
+
if audio is None:
|
27 |
+
raise gr.Error("请上传或录制音频文件。")
|
28 |
+
text = pipe(audio)["text"]
|
|
|
29 |
return text
|
30 |
|
31 |
+
# 定义翻译函数
|
32 |
def translate_text(text, target_language):
|
33 |
+
if target_language == "None" or not target_language:
|
34 |
+
return "未选择翻译语言。", None
|
35 |
+
|
36 |
+
prompt = f"请将以下文本翻译成 {target_language}:\n\n{text}"
|
37 |
try:
|
38 |
response = openai.ChatCompletion.create(
|
39 |
+
model="gpt-4", # 使用 GPT-4 模型
|
40 |
messages=[{"role": "user", "content": prompt}],
|
41 |
+
max_tokens=1000,
|
42 |
+
n=1,
|
43 |
+
temperature=0.5,
|
44 |
)
|
45 |
+
translation = response.choices[0].message["content"].strip()
|
46 |
return translation
|
47 |
except Exception as e:
|
48 |
+
return f"翻译出错:{str(e)}"
|
|
|
|
|
|
|
49 |
|
50 |
+
# 定义完整的处理流程函数
|
51 |
+
def transcribe_and_translate(audio, target_language):
|
52 |
+
text = transcribe(audio)
|
53 |
+
if target_language != "None":
|
54 |
+
translation = translate_text(text, target_language)
|
55 |
+
return text, translation
|
56 |
+
else:
|
57 |
+
return text, "未选择翻译语言。"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
# 构建 Gradio 界面
|
60 |
+
with gr.Blocks() as demo:
|
61 |
+
gr.Markdown("# 语音转文字并翻译应用")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
+
with gr.Tabs():
|
64 |
+
with gr.TabItem("麦克风输入"):
|
65 |
+
audio_input = gr.Audio(source="microphone", type="filepath", label="录制音频")
|
66 |
+
language_dropdown = gr.Dropdown(
|
67 |
+
choices=["None", "English", "French", "German", "Spanish", "Chinese"],
|
68 |
+
value="None",
|
69 |
+
label="翻译成以下语言",
|
70 |
+
)
|
71 |
+
transcribe_button = gr.Button("开始转录和翻译")
|
72 |
+
original_text_output = gr.Textbox(label="转录文本")
|
73 |
+
translated_text_output = gr.Textbox(label="翻译文本")
|
74 |
|
75 |
+
transcribe_button.click(
|
76 |
+
fn=transcribe_and_translate,
|
77 |
+
inputs=[audio_input, language_dropdown],
|
78 |
+
outputs=[original_text_output, translated_text_output],
|
79 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
+
with gr.TabItem("上传音频文件"):
|
82 |
+
file_input = gr.Audio(type="filepath", label="上传音频文件")
|
83 |
+
language_dropdown_file = gr.Dropdown(
|
84 |
+
choices=["None", "English", "French", "German", "Spanish", "Chinese"],
|
85 |
+
value="None",
|
86 |
+
label="翻译成以下语言",
|
87 |
+
)
|
88 |
+
transcribe_button_file = gr.Button("开始转录和翻译")
|
89 |
+
original_text_output_file = gr.Textbox(label="转录文本")
|
90 |
+
translated_text_output_file = gr.Textbox(label="翻译文本")
|
|
|
|
|
|
|
|
|
91 |
|
92 |
+
transcribe_button_file.click(
|
93 |
+
fn=transcribe_and_translate,
|
94 |
+
inputs=[file_input, language_dropdown_file],
|
95 |
+
outputs=[original_text_output_file, translated_text_output_file],
|
96 |
+
)
|
97 |
|
98 |
+
demo.launch()
|