Update app.py
Browse files
app.py
CHANGED
@@ -8,9 +8,13 @@ import whisper
|
|
8 |
from llm.openai_api import openai_call
|
9 |
from llm.yi_moe_api import yi_moe
|
10 |
from utils.trans_utils import extract_timestamps
|
|
|
|
|
|
|
|
|
11 |
|
12 |
API_URL_TEMPLATE = "https://api-yidong.lingyiwanwu.com/v1/ops/api_key?user_email={user_email}&user_source=huggingface"
|
13 |
-
model = whisper.load_model("tiny")
|
14 |
audio_clipper = VideoClipper(model)
|
15 |
|
16 |
def get_user_email(oauth_token: gr.OAuthToken | None) -> str | None:
|
@@ -27,6 +31,66 @@ def get_user_email(oauth_token: gr.OAuthToken | None) -> str | None:
|
|
27 |
email = user_info.get("email")
|
28 |
return call_api(email)
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
def audio_recog(audio_input, output_dir):
|
31 |
return audio_clipper.recog(audio_input, None, output_dir=output_dir)
|
32 |
|
@@ -62,31 +126,47 @@ def mix_recog(video_input, audio_input,output_dir,ASR="whisper"):
|
|
62 |
|
63 |
def llm_inference(system_content, user_content, srt_text, model, apikey):
|
64 |
SUPPORT_LLM_PREFIX = ['qwen', 'gpt', 'g4f', 'moonshot',"gpt-4o","22A"]
|
|
|
|
|
65 |
if model.startswith('gpt') or model.startswith('moonshot'):
|
66 |
return openai_call(apikey, model, system_content = system_content, user_content = user_content+'\n'+srt_text)
|
67 |
-
|
68 |
return yi_moe(apikey, model, user_content+'\n'+srt_text, system_content)
|
|
|
|
|
|
|
69 |
else:
|
70 |
logging.error("LLM name error, only {} are supported as LLM name prefix."
|
71 |
.format(SUPPORT_LLM_PREFIX))
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
output_dir =
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
with gr.Blocks() as clip_service:
|
92 |
video_state, audio_state = gr.State(), gr.State()
|
@@ -98,6 +178,8 @@ with gr.Blocks() as clip_service:
|
|
98 |
)
|
99 |
clip_service.load(get_user_email, inputs=None, outputs=user_email_display)
|
100 |
logging.info(f"The value of the current variable is: {user_email_display}")
|
|
|
|
|
101 |
video_input = gr.Video(label="视频输入 | Video Input")
|
102 |
audio_input = gr.Audio(label="音频输入 | Audio Input")
|
103 |
with gr.Column():
|
@@ -147,8 +229,10 @@ with gr.Blocks() as clip_service:
|
|
147 |
video_end_ost = gr.Slider(minimum=-500, maximum=1000, value=100, step=50, label="⏩ 结束位置偏移 | End Offset (ms)",visible=False)
|
148 |
video_output = gr.Video(label="裁剪结果 | Video Clipped")
|
149 |
audio_output = gr.Audio(label="裁剪结果 | Audio Clipped")
|
150 |
-
clip_message = gr.Textbox(label="⚠️ 裁剪信息 | Clipping Log")
|
151 |
-
srt_clipped = gr.Textbox(label="📖 裁剪部分SRT字幕内容 | Clipped RST Subtitles")
|
|
|
|
|
152 |
recog_button.click(mix_recog,
|
153 |
inputs=[video_input,
|
154 |
audio_input,
|
@@ -160,7 +244,7 @@ with gr.Blocks() as clip_service:
|
|
160 |
llm_button.click(llm_inference,
|
161 |
inputs=[prompt_head, prompt_head2, video_srt_output, llm_model, apikey_input],
|
162 |
outputs=[llm_result])
|
163 |
-
llm_clip_button.click(
|
164 |
inputs=[llm_result,
|
165 |
video_text_input,
|
166 |
video_spk_input,
|
@@ -169,8 +253,9 @@ with gr.Blocks() as clip_service:
|
|
169 |
video_state,
|
170 |
audio_state,
|
171 |
output_dir,
|
|
|
172 |
],
|
173 |
-
outputs=[video_output, audio_output, clip_message, srt_clipped])
|
174 |
|
175 |
|
176 |
|
|
|
8 |
from llm.openai_api import openai_call
|
9 |
from llm.yi_moe_api import yi_moe
|
10 |
from utils.trans_utils import extract_timestamps
|
11 |
+
import os
|
12 |
+
from pytubefix import YouTube
|
13 |
+
import ffmpeg
|
14 |
+
from yidong import YiDong
|
15 |
|
16 |
API_URL_TEMPLATE = "https://api-yidong.lingyiwanwu.com/v1/ops/api_key?user_email={user_email}&user_source=huggingface"
|
17 |
+
model = whisper.load_model("tiny.en")
|
18 |
audio_clipper = VideoClipper(model)
|
19 |
|
20 |
def get_user_email(oauth_token: gr.OAuthToken | None) -> str | None:
|
|
|
31 |
email = user_info.get("email")
|
32 |
return call_api(email)
|
33 |
|
34 |
+
|
35 |
+
|
36 |
+
def get_video_size(stream):
|
37 |
+
|
38 |
+
return stream.filesize / (1024 * 1024)
|
39 |
+
|
40 |
+
def download_youtube_video(url):
|
41 |
+
try:
|
42 |
+
yt = YouTube(url, use_po_token=True)
|
43 |
+
|
44 |
+
video_streams = yt.streams.filter(type="video").order_by('resolution').desc()
|
45 |
+
audio_stream = yt.streams.filter(only_audio=True).first()
|
46 |
+
|
47 |
+
print("Available video streams:")
|
48 |
+
for i, stream in enumerate(video_streams):
|
49 |
+
size = get_video_size(stream)
|
50 |
+
stream_type = "Progressive" if stream.is_progressive else "Adaptive"
|
51 |
+
print(f"{i}. Resolution: {stream.resolution}, Size: {size:.2f} MB, Type: {stream_type}")
|
52 |
+
|
53 |
+
choice = int(input("Enter the number of the video stream to download: "))
|
54 |
+
selected_stream = video_streams[choice]
|
55 |
+
|
56 |
+
if not os.path.exists('videos'):
|
57 |
+
os.makedirs('videos')
|
58 |
+
|
59 |
+
print(f"Downloading video: {yt.title}")
|
60 |
+
video_file = selected_stream.download(output_path='videos', filename_prefix="video_")
|
61 |
+
|
62 |
+
if not selected_stream.is_progressive:
|
63 |
+
print("Downloading audio...")
|
64 |
+
audio_file = audio_stream.download(output_path='videos', filename_prefix="audio_")
|
65 |
+
|
66 |
+
print("Merging video and audio...")
|
67 |
+
output_file = os.path.join('videos', f"{yt.title}.mp4")
|
68 |
+
stream = ffmpeg.input(video_file)
|
69 |
+
audio = ffmpeg.input(audio_file)
|
70 |
+
stream = ffmpeg.output(stream, audio, output_file, vcodec='libx264', acodec='aac', strict='experimental')
|
71 |
+
ffmpeg.run(stream, overwrite_output=True)
|
72 |
+
|
73 |
+
os.remove(video_file)
|
74 |
+
os.remove(audio_file)
|
75 |
+
else:
|
76 |
+
output_file = video_file
|
77 |
+
|
78 |
+
|
79 |
+
print(f"Downloaded: {yt.title} to 'videos' folder")
|
80 |
+
print(f"File path: {output_file}")
|
81 |
+
return output_file
|
82 |
+
|
83 |
+
except Exception as e:
|
84 |
+
print(f"An error occurred: {str(e)}")
|
85 |
+
print("Please make sure you have the latest version of pytube and ffmpeg-python installed.")
|
86 |
+
print("You can update them by running:")
|
87 |
+
print("pip install --upgrade pytube ffmpeg-python")
|
88 |
+
print("Also, ensure that ffmpeg is installed on your system and available in your PATH.")
|
89 |
+
|
90 |
+
def updata_video(url):
|
91 |
+
video_path = download_youtube_video(url)
|
92 |
+
return video_path
|
93 |
+
|
94 |
def audio_recog(audio_input, output_dir):
|
95 |
return audio_clipper.recog(audio_input, None, output_dir=output_dir)
|
96 |
|
|
|
126 |
|
127 |
def llm_inference(system_content, user_content, srt_text, model, apikey):
|
128 |
SUPPORT_LLM_PREFIX = ['qwen', 'gpt', 'g4f', 'moonshot',"gpt-4o","22A"]
|
129 |
+
if model.startswith('qwen'):
|
130 |
+
return call_qwen_model(apikey, model, user_content+'\n'+srt_text, system_content)
|
131 |
if model.startswith('gpt') or model.startswith('moonshot'):
|
132 |
return openai_call(apikey, model, system_content = system_content, user_content = user_content+'\n'+srt_text)
|
133 |
+
if model.startswith('22A'):
|
134 |
return yi_moe(apikey, model, user_content+'\n'+srt_text, system_content)
|
135 |
+
elif model.startswith('g4f'):
|
136 |
+
model = "-".join(model.split('-')[1:])
|
137 |
+
return g4f_openai_call(model, system_content, user_content+'\n'+srt_text)
|
138 |
else:
|
139 |
logging.error("LLM name error, only {} are supported as LLM name prefix."
|
140 |
.format(SUPPORT_LLM_PREFIX))
|
141 |
|
142 |
+
|
143 |
+
def clip_and_summary(LLM_res, dest_text, video_spk_input, start_ost, end_ost, video_state, audio_state, output_dir, apikey):
|
144 |
+
def AI_clip(LLM_res, dest_text, video_spk_input, start_ost, end_ost, video_state, audio_state, output_dir):
|
145 |
+
timestamp_list = extract_timestamps(LLM_res)
|
146 |
+
output_dir = output_dir.strip()
|
147 |
+
if not len(output_dir):
|
148 |
+
output_dir = None
|
149 |
+
else:
|
150 |
+
output_dir = os.path.abspath(output_dir)
|
151 |
+
if video_state is not None:
|
152 |
+
clip_video_file, message, clip_srt = audio_clipper.video_clip(
|
153 |
+
dest_text, start_ost, end_ost, video_state,
|
154 |
+
dest_spk=video_spk_input, output_dir=output_dir, timestamp_list=timestamp_list, add_sub=False)
|
155 |
+
return clip_video_file, None, message, clip_srt
|
156 |
+
if audio_state is not None:
|
157 |
+
(sr, res_audio), message, clip_srt = audio_clipper.clip(
|
158 |
+
dest_text, start_ost, end_ost, audio_state,
|
159 |
+
dest_spk=video_spk_input, output_dir=output_dir, timestamp_list=timestamp_list, add_sub=False)
|
160 |
+
return None, (sr, res_audio), message, clip_srt
|
161 |
+
def get_summarizes(api_key,input_file):
|
162 |
+
yd = YiDong(api_key = api_key)
|
163 |
+
rid = yd.add_resource(input_file)
|
164 |
+
t = yd.video_summary(rid)
|
165 |
+
data = t()
|
166 |
+
return data.video_summary.summary
|
167 |
+
clip_video_file, nouse, message, clip_srt = AI_clip(LLM_res, dest_text, video_spk_input, start_ost, end_ost, video_state, audio_state, output_dir)
|
168 |
+
summary = get_summarizes(apikey, clip_video_file)
|
169 |
+
return clip_video_file, nouse, message, clip_srt, summary
|
170 |
|
171 |
with gr.Blocks() as clip_service:
|
172 |
video_state, audio_state = gr.State(), gr.State()
|
|
|
178 |
)
|
179 |
clip_service.load(get_user_email, inputs=None, outputs=user_email_display)
|
180 |
logging.info(f"The value of the current variable is: {user_email_display}")
|
181 |
+
youtube_url = gr.Textbox(label="🔗 Youtube视频链接|Youtube Video URL")
|
182 |
+
download_button = gr.Button("📥 下载 | Download", variant="primary")
|
183 |
video_input = gr.Video(label="视频输入 | Video Input")
|
184 |
audio_input = gr.Audio(label="音频输入 | Audio Input")
|
185 |
with gr.Column():
|
|
|
229 |
video_end_ost = gr.Slider(minimum=-500, maximum=1000, value=100, step=50, label="⏩ 结束位置偏移 | End Offset (ms)",visible=False)
|
230 |
video_output = gr.Video(label="裁剪结果 | Video Clipped")
|
231 |
audio_output = gr.Audio(label="裁剪结果 | Audio Clipped")
|
232 |
+
clip_message = gr.Textbox(label="⚠️ 裁剪信息 | Clipping Log",visible=False)
|
233 |
+
srt_clipped = gr.Textbox(label="📖 裁剪部分SRT字幕内容 | Clipped RST Subtitles",visible=False)
|
234 |
+
summary = gr.Textbox(label="📖 视频摘要 | Video Summary")
|
235 |
+
download_button.click(updata_video, inputs=youtube_url, outputs=video_input)
|
236 |
recog_button.click(mix_recog,
|
237 |
inputs=[video_input,
|
238 |
audio_input,
|
|
|
244 |
llm_button.click(llm_inference,
|
245 |
inputs=[prompt_head, prompt_head2, video_srt_output, llm_model, apikey_input],
|
246 |
outputs=[llm_result])
|
247 |
+
llm_clip_button.click(clip_and_summary,
|
248 |
inputs=[llm_result,
|
249 |
video_text_input,
|
250 |
video_spk_input,
|
|
|
253 |
video_state,
|
254 |
audio_state,
|
255 |
output_dir,
|
256 |
+
user_email_display,
|
257 |
],
|
258 |
+
outputs=[video_output, audio_output, clip_message, srt_clipped,summary])
|
259 |
|
260 |
|
261 |
|