Marathon23 commited on
Commit
e4531cf
·
verified ·
1 Parent(s): a477784

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -92
app.py CHANGED
@@ -1,12 +1,11 @@
1
  import torch
2
  import gradio as gr
3
  from transformers import pipeline
4
- import openai # Import OpenAI for GPT-4 API integration
5
- import os # 確保導入 os
6
- import tempfile
7
 
8
- # 使用 Whisper Large 模型進行語音轉錄
9
- MODEL_NAME = "openai/whisper-large-v3-turbo"
10
  BATCH_SIZE = 8
11
  device = 0 if torch.cuda.is_available() else "cpu"
12
 
@@ -17,107 +16,83 @@ pipe = pipeline(
17
  device=device,
18
  )
19
 
20
- openai_api_key = os.getenv('OPENAI_API_KEY') # Load OpenAI API key
 
 
 
21
 
22
- # 語音轉文字的功能
23
- def transcribe(inputs, task):
24
- if inputs is None:
25
- raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
26
-
27
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
28
  return text
29
 
30
- # 增加翻譯功能,呼叫 GPT-4 API
31
  def translate_text(text, target_language):
32
- prompt = f"Translate the following text to {target_language}:\n\n{text}"
33
-
 
 
34
  try:
35
  response = openai.ChatCompletion.create(
36
- model="gpt-4o", # 使用 GPT-4o 模型
37
  messages=[{"role": "user", "content": prompt}],
38
- max_tokens=500
 
 
39
  )
40
- translation = response.choices[0].message["content"]
41
  return translation
42
  except Exception as e:
43
- return f"翻譯時出錯: {str(e)}"
44
-
45
- # Gradio 介面
46
- demo = gr.Blocks()
47
 
48
- mf_transcribe = gr.Interface(
49
- fn=transcribe,
50
- inputs=[
51
- gr.Audio(source="microphone", type="filepath"),
52
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
53
- ],
54
- outputs="text",
55
- title="Whisper Large V3 Turbo: Transcribe Audio",
56
- description=(
57
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
58
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
59
- " of arbitrary length."
60
- ),
61
- allow_flagging="never",
62
- )
63
 
64
- file_transcribe = gr.Interface(
65
- fn=transcribe,
66
- inputs=[
67
- gr.Audio(source="upload", type="filepath", label="Audio file"),
68
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
69
- ],
70
- outputs="text",
71
- title="Whisper Large V3: Transcribe Audio",
72
- description=(
73
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
74
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
75
- " of arbitrary length."
76
- ),
77
- allow_flagging="never",
78
- )
79
 
80
- # 增加翻譯功能,讓使用者選擇要翻譯的語言
81
- def transcribe_and_translate(inputs, task, target_language):
82
- text = transcribe(inputs, task)
83
- if target_language != "None":
84
- translated_text = translate_text(text, target_language)
85
- return text, translated_text
86
- return text, None
 
 
 
 
87
 
88
- # 介面結合了轉錄和翻譯功能
89
- mf_transcribe_and_translate = gr.Interface(
90
- fn=transcribe_and_translate,
91
- inputs=[
92
- gr.Audio(source="microphone", type="filepath"),
93
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
94
- gr.Dropdown(choices=["French", "German", "Spanish", "Chinese", "None"], label="Translate to Language", value="None")
95
- ],
96
- outputs=["text", "text"], # 兩個輸出,一個是原文,一個是翻譯後的文字
97
- title="Whisper Large V3 Turbo: Transcribe and Translate",
98
- description=(
99
- "Transcribe audio from microphone inputs and optionally translate it to a selected language using OpenAI GPT-4."
100
- ),
101
- allow_flagging="never",
102
- )
103
 
104
- file_transcribe_and_translate = gr.Interface(
105
- fn=transcribe_and_translate,
106
- inputs=[
107
- gr.Audio(source="upload", type="filepath", label="Audio file"),
108
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
109
- gr.Dropdown(choices=["French", "German", "Spanish", "Chinese", "None"], label="Translate to Language", value="None")
110
- ],
111
- outputs=["text", "text"], # 兩個輸出,一個是原文,一個是翻譯後的文字
112
- title="Whisper Large V3: Transcribe and Translate",
113
- description=(
114
- "Transcribe audio from uploaded files and optionally translate it to a selected language using OpenAI GPT-4."
115
- ),
116
- allow_flagging="never",
117
- )
118
 
119
- # 結合 Gradio 介面
120
- with demo:
121
- gr.TabbedInterface([mf_transcribe_and_translate, file_transcribe_and_translate], ["Microphone Transcription", "File Transcription"])
 
 
122
 
123
- demo.queue().launch()
 
1
  import torch
2
  import gradio as gr
3
  from transformers import pipeline
4
+ import openai
5
+ import os
 
6
 
7
+ # 使用 Whisper 模型进行语音转录
8
+ MODEL_NAME = "openai/whisper-large-v2" # 使用支持的模型名称
9
  BATCH_SIZE = 8
10
  device = 0 if torch.cuda.is_available() else "cpu"
11
 
 
16
  device=device,
17
  )
18
 
19
+ # 设置 OpenAI API 密钥
20
+ openai.api_key = os.getenv('OPENAI_API_KEY')
21
+ if openai.api_key is None:
22
+ raise ValueError("请设置 OpenAI API 密钥为环境变量 'OPENAI_API_KEY'。")
23
 
24
+ # 定义语音转文字函数
25
+ def transcribe(audio):
26
+ if audio is None:
27
+ raise gr.Error("请上传或录制音频文件。")
28
+ text = pipe(audio)["text"]
 
29
  return text
30
 
31
+ # 定义翻译函数
32
  def translate_text(text, target_language):
33
+ if target_language == "None" or not target_language:
34
+ return "未选择翻译语言。", None
35
+
36
+ prompt = f"请将以下文本翻译成 {target_language}:\n\n{text}"
37
  try:
38
  response = openai.ChatCompletion.create(
39
+ model="gpt-4", # 使用 GPT-4 模型
40
  messages=[{"role": "user", "content": prompt}],
41
+ max_tokens=1000,
42
+ n=1,
43
+ temperature=0.5,
44
  )
45
+ translation = response.choices[0].message["content"].strip()
46
  return translation
47
  except Exception as e:
48
+ return f"翻译出错:{str(e)}"
 
 
 
49
 
50
+ # 定义完整的处理流程函数
51
+ def transcribe_and_translate(audio, target_language):
52
+ text = transcribe(audio)
53
+ if target_language != "None":
54
+ translation = translate_text(text, target_language)
55
+ return text, translation
56
+ else:
57
+ return text, "未选择翻译语言。"
 
 
 
 
 
 
 
58
 
59
+ # 构建 Gradio 界面
60
+ with gr.Blocks() as demo:
61
+ gr.Markdown("# 语音转文字并翻译应用")
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ with gr.Tabs():
64
+ with gr.TabItem("麦克风输入"):
65
+ audio_input = gr.Audio(source="microphone", type="filepath", label="录制音频")
66
+ language_dropdown = gr.Dropdown(
67
+ choices=["None", "English", "French", "German", "Spanish", "Chinese"],
68
+ value="None",
69
+ label="翻译成以下语言",
70
+ )
71
+ transcribe_button = gr.Button("开始转录和翻译")
72
+ original_text_output = gr.Textbox(label="转录文本")
73
+ translated_text_output = gr.Textbox(label="翻译文本")
74
 
75
+ transcribe_button.click(
76
+ fn=transcribe_and_translate,
77
+ inputs=[audio_input, language_dropdown],
78
+ outputs=[original_text_output, translated_text_output],
79
+ )
 
 
 
 
 
 
 
 
 
 
80
 
81
+ with gr.TabItem("上传音频文件"):
82
+ file_input = gr.Audio(type="filepath", label="上传音频文件")
83
+ language_dropdown_file = gr.Dropdown(
84
+ choices=["None", "English", "French", "German", "Spanish", "Chinese"],
85
+ value="None",
86
+ label="翻译成以下语言",
87
+ )
88
+ transcribe_button_file = gr.Button("开始转录和翻译")
89
+ original_text_output_file = gr.Textbox(label="转录文本")
90
+ translated_text_output_file = gr.Textbox(label="翻译文本")
 
 
 
 
91
 
92
+ transcribe_button_file.click(
93
+ fn=transcribe_and_translate,
94
+ inputs=[file_input, language_dropdown_file],
95
+ outputs=[original_text_output_file, translated_text_output_file],
96
+ )
97
 
98
+ demo.launch()