Marathon23 commited on
Commit
ee7903b
·
verified ·
1 Parent(s): 080c996

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -71
app.py CHANGED
@@ -8,15 +8,11 @@ from transformers.pipelines.audio_utils import ffmpeg_read
8
 
9
  import tempfile
10
  import os
11
- import openai
12
-
13
- # 設定 OpenAI API 金鑰(請替換為您自己的 API 金鑰)
14
- openai.api_key = "YOUR_OPENAI_API_KEY"
15
 
16
  MODEL_NAME = "openai/whisper-large-v3-turbo"
17
  BATCH_SIZE = 8
18
  FILE_LIMIT_MB = 1000
19
- YT_LENGTH_LIMIT_S = 3600 # 限制 YouTube 檔案為 1 小時
20
 
21
  device = 0 if torch.cuda.is_available() else "cpu"
22
 
@@ -27,34 +23,16 @@ pipe = pipeline(
27
  device=device,
28
  )
29
 
30
- def translate_text(input_text, target_language):
31
- prompt = f"請將以下文字翻譯成{target_language}:\n\n{input_text}"
32
-
33
- try:
34
- response = openai.ChatCompletion.create(
35
- model="gpt-4o",
36
- messages=[
37
- {"role": "user", "content": prompt}
38
- ]
39
- )
40
- translated_text = response['choices'][0]['message']['content'].strip()
41
- return translated_text
42
- except Exception as e:
43
- raise gr.Error(f"翻譯過程中出現錯誤:{str(e)}")
44
 
45
  @spaces.GPU
46
- def transcribe(inputs, task, translate_option, target_language):
47
  if inputs is None:
48
- raise gr.Error("未提交音訊檔案!請在提交請求前上傳或錄製音訊檔案。")
49
-
50
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
51
- text = result["text"]
52
-
53
- if translate_option == "是":
54
- text = translate_text(text, target_language)
55
 
 
56
  return text
57
 
 
58
  def _return_yt_html_embed(yt_url):
59
  video_id = yt_url.split("?v=")[-1]
60
  HTML_str = (
@@ -65,29 +43,29 @@ def _return_yt_html_embed(yt_url):
65
 
66
  def download_yt_audio(yt_url, filename):
67
  info_loader = youtube_dl.YoutubeDL()
68
-
69
  try:
70
  info = info_loader.extract_info(yt_url, download=False)
71
  except youtube_dl.utils.DownloadError as err:
72
  raise gr.Error(str(err))
73
-
74
  file_length = info["duration_string"]
75
  file_h_m_s = file_length.split(":")
76
  file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
77
-
78
  if len(file_h_m_s) == 1:
79
  file_h_m_s.insert(0, 0)
80
  if len(file_h_m_s) == 2:
81
  file_h_m_s.insert(0, 0)
82
  file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
83
-
84
  if file_length_s > YT_LENGTH_LIMIT_S:
85
  yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
86
  file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
87
- raise gr.Error(f"最大 YouTube 長度為 {yt_length_limit_hms},但獲得了長度為 {file_length_hms} 的影片。")
88
-
89
  ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
90
-
91
  with youtube_dl.YoutubeDL(ydl_opts) as ydl:
92
  try:
93
  ydl.download([yt_url])
@@ -95,7 +73,7 @@ def download_yt_audio(yt_url, filename):
95
  raise gr.Error(str(err))
96
 
97
  @spaces.GPU
98
- def yt_transcribe(yt_url, task, translate_option, target_language, max_filesize=75.0):
99
  html_embed_str = _return_yt_html_embed(yt_url)
100
 
101
  with tempfile.TemporaryDirectory() as tmpdirname:
@@ -107,70 +85,50 @@ def yt_transcribe(yt_url, task, translate_option, target_language, max_filesize=
107
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
108
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
109
 
110
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
111
- text = result["text"]
112
-
113
- if translate_option == "是":
114
- text = translate_text(text, target_language)
115
 
116
  return html_embed_str, text
117
 
118
- demo = gr.Blocks(theme=gr.themes.Ocean())
119
 
120
- language_options = ["英文", "日文", "法文", "德文", "西班牙文", "繁體中文", "簡體中文", "越南文", "泰文"]
121
 
122
  mf_transcribe = gr.Interface(
123
  fn=transcribe,
124
- inputs=[
125
- gr.Audio(sources="microphone", type="filepath"),
126
- gr.Radio(["transcribe", "translate"], label="任務", value="transcribe"),
127
- gr.Radio(["是", "否"], label="是否翻譯轉錄結果", value="否"),
128
- gr.Dropdown(language_options, label="目標語言", value="英文")
129
- ],
130
  outputs="text",
131
- title="清華大學多模態課程&廖老師嫡傳弟子-第二組 「語音轉文字」model",
132
  description=(
133
- "只需點擊一下按鈕,即可轉錄長篇的麥克風或音訊輸入!演示使用了"
134
- f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的音訊文件。"
135
  ),
136
  allow_flagging="never",
137
  )
138
 
139
  file_transcribe = gr.Interface(
140
  fn=transcribe,
141
- inputs=[
142
- gr.Audio(sources="upload", type="filepath", label="音訊檔案"),
143
- gr.Radio(["transcribe", "translate"], label="任務", value="transcribe"),
144
- gr.Radio(["是", "否"], label="是否翻譯轉錄結果", value="否"),
145
- gr.Dropdown(language_options, label="目標語言", value="英文")
146
- ],
147
  outputs="text",
148
- title="清華大學多模態課程&廖老師嫡傳弟子-第二組 「語音轉文字」model:上傳音檔",
149
  description=(
150
- "只需點擊一下按鈕,即可轉錄長篇的麥克風或音訊輸入!演示使用了"
151
- f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的音訊文件。"
152
  ),
153
  allow_flagging="never",
154
  )
155
 
156
- yt_transcribe = gr.Interface(
157
  fn=yt_transcribe,
158
- inputs=[
159
- gr.Textbox(lines=1, placeholder="在此處貼上 YouTube 視頻的 URL", label="YouTube URL"),
160
- gr.Radio(["transcribe", "translate"], label="任務", value="transcribe"),
161
- gr.Radio(["是", "否"], label="是否翻譯轉錄結果", value="否"),
162
- gr.Dropdown(language_options, label="目標語言", value="英文")
163
- ],
164
  outputs=["html", "text"],
165
- title="清華大學多模態課程&廖老師嫡傳弟子-第二組 「語音轉文字」model: 轉錄 YouTube",
166
  description=(
167
- "只需點擊一下按鈕,即可轉錄長篇的 YouTube 視頻!演示使用了"
168
- f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的視頻文件。"
169
  ),
170
  allow_flagging="never",
171
  )
172
 
173
  with demo:
174
- gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["麥克風", "音訊檔案", "YouTube"])
175
 
176
  demo.queue().launch(ssr_mode=False)
 
8
 
9
  import tempfile
10
  import os
 
 
 
 
11
 
12
  MODEL_NAME = "openai/whisper-large-v3-turbo"
13
  BATCH_SIZE = 8
14
  FILE_LIMIT_MB = 1000
15
+ YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
16
 
17
  device = 0 if torch.cuda.is_available() else "cpu"
18
 
 
23
  device=device,
24
  )
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  @spaces.GPU
28
+ def transcribe(inputs):
29
  if inputs is None:
30
+ raise gr.Error("未提供音訊檔案!請在提交請求前上傳或錄製一個音訊檔案。")
 
 
 
 
 
 
31
 
32
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
33
  return text
34
 
35
+
36
  def _return_yt_html_embed(yt_url):
37
  video_id = yt_url.split("?v=")[-1]
38
  HTML_str = (
 
43
 
44
  def download_yt_audio(yt_url, filename):
45
  info_loader = youtube_dl.YoutubeDL()
46
+
47
  try:
48
  info = info_loader.extract_info(yt_url, download=False)
49
  except youtube_dl.utils.DownloadError as err:
50
  raise gr.Error(str(err))
51
+
52
  file_length = info["duration_string"]
53
  file_h_m_s = file_length.split(":")
54
  file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
55
+
56
  if len(file_h_m_s) == 1:
57
  file_h_m_s.insert(0, 0)
58
  if len(file_h_m_s) == 2:
59
  file_h_m_s.insert(0, 0)
60
  file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
61
+
62
  if file_length_s > YT_LENGTH_LIMIT_S:
63
  yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
64
  file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
65
+ raise gr.Error(f"最大YouTube影片長度為 {yt_length_limit_hms},但提供的影片長度為 {file_length_hms}")
66
+
67
  ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
68
+
69
  with youtube_dl.YoutubeDL(ydl_opts) as ydl:
70
  try:
71
  ydl.download([yt_url])
 
73
  raise gr.Error(str(err))
74
 
75
  @spaces.GPU
76
+ def yt_transcribe(yt_url, max_filesize=75.0):
77
  html_embed_str = _return_yt_html_embed(yt_url)
78
 
79
  with tempfile.TemporaryDirectory() as tmpdirname:
 
85
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
86
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
87
 
88
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
 
 
 
 
89
 
90
  return html_embed_str, text
91
 
 
92
 
93
+ demo = gr.Blocks(theme=gr.themes.Ocean())
94
 
95
  mf_transcribe = gr.Interface(
96
  fn=transcribe,
97
+ inputs=gr.Audio(sources="microphone", type="filepath"),
 
 
 
 
 
98
  outputs="text",
99
+ title="清華大學多模態課程&廖老師嫡傳弟子-第二組 「語音轉文字」模型",
100
  description=(
101
+ "只需點擊一下按鈕,即可轉錄長篇的麥克風或音訊輸入!此示範使用"
102
+ f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的音訊檔案。"
103
  ),
104
  allow_flagging="never",
105
  )
106
 
107
  file_transcribe = gr.Interface(
108
  fn=transcribe,
109
+ inputs=gr.Audio(sources="upload", type="filepath", label="音訊檔案"),
 
 
 
 
 
110
  outputs="text",
111
+ title="Whisper Large V3: 音訊轉錄",
112
  description=(
113
+ "只需點擊一下按鈕,即可轉錄長篇的麥克風或音訊輸入!此示範使用"
114
+ f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的音訊檔案。"
115
  ),
116
  allow_flagging="never",
117
  )
118
 
119
+ yt_transcribe_interface = gr.Interface(
120
  fn=yt_transcribe,
121
+ inputs=gr.Textbox(lines=1, placeholder="在此貼上YouTube影片的URL", label="YouTube URL"),
 
 
 
 
 
122
  outputs=["html", "text"],
123
+ title="Whisper Large V3: YouTube轉錄",
124
  description=(
125
+ "只需點擊一下按鈕,即可轉錄長篇的YouTube影片!此示範使用"
126
+ f"檢查點 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 和 🤗 Transformers 來轉錄任意長度的影片檔案。"
127
  ),
128
  allow_flagging="never",
129
  )
130
 
131
  with demo:
132
+ gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe_interface], ["麥克風", "音訊檔案", "YouTube"])
133
 
134
  demo.queue().launch(ssr_mode=False)