Marathon23 commited on
Commit
7b70f57
·
verified ·
1 Parent(s): e4531cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -78
app.py CHANGED
@@ -1,12 +1,23 @@
 
1
  import torch
 
2
  import gradio as gr
 
3
  from transformers import pipeline
4
- import openai
 
 
5
  import os
 
 
 
 
6
 
7
- # 使用 Whisper 模型进行语音转录
8
- MODEL_NAME = "openai/whisper-large-v2" # 使用支持的模型名称
9
  BATCH_SIZE = 8
 
 
 
10
  device = 0 if torch.cuda.is_available() else "cpu"
11
 
12
  pipe = pipeline(
@@ -16,83 +27,146 @@ pipe = pipeline(
16
  device=device,
17
  )
18
 
19
- # 设置 OpenAI API 密钥
20
- openai.api_key = os.getenv('OPENAI_API_KEY')
21
- if openai.api_key is None:
22
- raise ValueError("请设置 OpenAI API 密钥为环境变量 'OPENAI_API_KEY'。")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # 定义语音转文字函数
25
- def transcribe(audio):
26
- if audio is None:
27
- raise gr.Error("请上传或录制音频文件。")
28
- text = pipe(audio)["text"]
29
  return text
30
 
31
- # 定义翻译函数
32
- def translate_text(text, target_language):
33
- if target_language == "None" or not target_language:
34
- return "未选择翻译语言。", None
 
 
 
35
 
36
- prompt = f"请将以下文本翻译成 {target_language}:\n\n{text}"
 
 
37
  try:
38
- response = openai.ChatCompletion.create(
39
- model="gpt-4", # 使用 GPT-4 模型
40
- messages=[{"role": "user", "content": prompt}],
41
- max_tokens=1000,
42
- n=1,
43
- temperature=0.5,
44
- )
45
- translation = response.choices[0].message["content"].strip()
46
- return translation
47
- except Exception as e:
48
- return f"翻译出错:{str(e)}"
49
-
50
- # 定义完整的处理流程函数
51
- def transcribe_and_translate(audio, target_language):
52
- text = transcribe(audio)
53
- if target_language != "None":
54
- translation = translate_text(text, target_language)
55
- return text, translation
56
- else:
57
- return text, "未选择翻译语言。"
58
-
59
- # 构建 Gradio 界面
60
- with gr.Blocks() as demo:
61
- gr.Markdown("# 语音转文字并翻译应用")
62
-
63
- with gr.Tabs():
64
- with gr.TabItem("麦克风输入"):
65
- audio_input = gr.Audio(source="microphone", type="filepath", label="录制音频")
66
- language_dropdown = gr.Dropdown(
67
- choices=["None", "English", "French", "German", "Spanish", "Chinese"],
68
- value="None",
69
- label="翻译成以下语言",
70
- )
71
- transcribe_button = gr.Button("开始转录和翻译")
72
- original_text_output = gr.Textbox(label="转录文本")
73
- translated_text_output = gr.Textbox(label="翻译文本")
74
-
75
- transcribe_button.click(
76
- fn=transcribe_and_translate,
77
- inputs=[audio_input, language_dropdown],
78
- outputs=[original_text_output, translated_text_output],
79
- )
80
-
81
- with gr.TabItem("上传音频文件"):
82
- file_input = gr.Audio(type="filepath", label="上传音频文件")
83
- language_dropdown_file = gr.Dropdown(
84
- choices=["None", "English", "French", "German", "Spanish", "Chinese"],
85
- value="None",
86
- label="翻译成以下语言",
87
- )
88
- transcribe_button_file = gr.Button("开始转录和翻译")
89
- original_text_output_file = gr.Textbox(label="转录文本")
90
- translated_text_output_file = gr.Textbox(label="翻译文本")
91
-
92
- transcribe_button_file.click(
93
- fn=transcribe_and_translate,
94
- inputs=[file_input, language_dropdown_file],
95
- outputs=[original_text_output_file, translated_text_output_file],
96
- )
97
-
98
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
  import torch
3
+
4
  import gradio as gr
5
+ import yt_dlp as youtube_dl
6
  from transformers import pipeline
7
+ from transformers.pipelines.audio_utils import ffmpeg_read
8
+
9
+ import tempfile
10
  import os
11
+ import openai
12
+
13
+ # 設定 OpenAI API 金鑰(請替換為您自己的 API 金鑰)
14
+ openai.api_key = "YOUR_OPENAI_API_KEY"
15
 
16
+ MODEL_NAME = "openai/whisper-large-v3-turbo"
 
17
  BATCH_SIZE = 8
18
+ FILE_LIMIT_MB = 1000
19
+ YT_LENGTH_LIMIT_S = 3600 # 限制 YouTube 檔案為 1 小時
20
+
21
  device = 0 if torch.cuda.is_available() else "cpu"
22
 
23
  pipe = pipeline(
 
27
  device=device,
28
  )
29
 
30
+ def translate_text(input_text, target_language):
31
+ prompt = f"請將以下文字翻譯成{target_language}:\n\n{input_text}"
32
+
33
+ response = openai.ChatCompletion.create(
34
+ model="gpt-4o",
35
+ messages=[
36
+ {"role": "user", "content": prompt}
37
+ ]
38
+ )
39
+
40
+ translated_text = response['choices'][0]['message']['content'].strip()
41
+ return translated_text
42
+
43
+ @spaces.GPU
44
+ def transcribe(inputs, task, translate_option, target_language):
45
+ if inputs is None:
46
+ raise gr.Error("未提交音訊檔案!請在提交請求前上傳或錄製音訊檔案。")
47
+
48
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
49
+ text = result["text"]
50
+
51
+ if translate_option == "是":
52
+ text = translate_text(text, target_language)
53
 
 
 
 
 
 
54
  return text
55
 
56
+ def _return_yt_html_embed(yt_url):
57
+ video_id = yt_url.split("?v=")[-1]
58
+ HTML_str = (
59
+ f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
60
+ " </center>"
61
+ )
62
+ return HTML_str
63
 
64
+ def download_yt_audio(yt_url, filename):
65
+ info_loader = youtube_dl.YoutubeDL()
66
+
67
  try:
68
+ info = info_loader.extract_info(yt_url, download=False)
69
+ except youtube_dl.utils.DownloadError as err:
70
+ raise gr.Error(str(err))
71
+
72
+ file_length = info["duration_string"]
73
+ file_h_m_s = file_length.split(":")
74
+ file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
75
+
76
+ if len(file_h_m_s) == 1:
77
+ file_h_m_s.insert(0, 0)
78
+ if len(file_h_m_s) == 2:
79
+ file_h_m_s.insert(0, 0)
80
+ file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
81
+
82
+ if file_length_s > YT_LENGTH_LIMIT_S:
83
+ yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
84
+ file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
85
+ raise gr.Error(f"最大 YouTube 長度為 {yt_length_limit_hms},但獲得了長度為 {file_length_hms} 的影片。")
86
+
87
+ ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
88
+
89
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
90
+ try:
91
+ ydl.download([yt_url])
92
+ except youtube_dl.utils.ExtractorError as err:
93
+ raise gr.Error(str(err))
94
+
95
+ @spaces.GPU
96
+ def yt_transcribe(yt_url, task, translate_option, target_language, max_filesize=75.0):
97
+ html_embed_str = _return_yt_html_embed(yt_url)
98
+
99
+ with tempfile.TemporaryDirectory() as tmpdirname:
100
+ filepath = os.path.join(tmpdirname, "video.mp4")
101
+ download_yt_audio(yt_url, filepath)
102
+ with open(filepath, "rb") as f:
103
+ inputs = f.read()
104
+
105
+ inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
106
+ inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
107
+
108
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
109
+ text = result["text"]
110
+
111
+ if translate_option == "":
112
+ text = translate_text(text, target_language)
113
+
114
+ return html_embed_str, text
115
+
116
+ demo = gr.Blocks(theme=gr.themes.Ocean())
117
+
118
+ mf_transcribe = gr.Interface(
119
+ fn=transcribe,
120
+ inputs=[
121
+ gr.Audio(sources="microphone", type="filepath"),
122
+ gr.Radio(["transcribe", "translate"], label="任務", value="transcribe"),
123
+ gr.Radio(["是", "否"], label="是否翻譯轉錄結果", value="否"),
124
+ gr.Dropdown(["英文", "日文", "法文", "德文", "西班牙文"], label="目標語言", value="英文")
125
+ ],
126
+ outputs="text",
127
+ title="清華大學多模態課程&廖老師嫡傳弟子-第二組 「語音轉文字」model",
128
+ description=(
129
+ "只需點擊一下按鈕,即可轉錄長篇的麥克風或音訊輸入!演示使用了"
130
+ f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的音訊文件。"
131
+ ),
132
+ allow_flagging="never",
133
+ )
134
+
135
+ file_transcribe = gr.Interface(
136
+ fn=transcribe,
137
+ inputs=[
138
+ gr.Audio(sources="upload", type="filepath", label="音訊檔案"),
139
+ gr.Radio(["transcribe", "translate"], label="任務", value="transcribe"),
140
+ gr.Radio(["是", "否"], label="是否翻譯轉錄結果", value="否"),
141
+ gr.Dropdown(["英文", "日文", "法文", "德文", "西班牙文"], label="目標語言", value="英文")
142
+ ],
143
+ outputs="text",
144
+ title="Whisper Large V3: 轉錄音訊",
145
+ description=(
146
+ "只需點擊一下按鈕,即可轉錄長篇的麥克風或音訊輸入!演示使用了"
147
+ f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的音訊文件。"
148
+ ),
149
+ allow_flagging="never",
150
+ )
151
+
152
+ yt_transcribe = gr.Interface(
153
+ fn=yt_transcribe,
154
+ inputs=[
155
+ gr.Textbox(lines=1, placeholder="在此處貼上 YouTube 視頻的 URL", label="YouTube URL"),
156
+ gr.Radio(["transcribe", "translate"], label="任務", value="transcribe"),
157
+ gr.Radio(["是", "否"], label="是否翻譯轉錄結果", value="否"),
158
+ gr.Dropdown(["英文", "日文", "法文", "德文", "西班牙文"], label="目標語言", value="英文")
159
+ ],
160
+ outputs=["html", "text"],
161
+ title="Whisper Large V3: 轉錄 YouTube",
162
+ description=(
163
+ "只需點擊一下按鈕,即可轉錄長篇的 YouTube 視頻!演示使用了"
164
+ f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的視頻文件。"
165
+ ),
166
+ allow_flagging="never",
167
+ )
168
+
169
+ with demo:
170
+ gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["麥克風", "音訊檔案", "YouTube"])
171
+
172
+ demo.queue().launch(ssr_mode=False)