Spaces:
Running
Running
lines=40,
Browse files
app.py
CHANGED
@@ -256,8 +256,9 @@ def process_transcript_and_screenshots(video_id):
|
|
256 |
service = init_drive_service()
|
257 |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
258 |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
|
|
|
|
259 |
file_name = f'{video_id}_transcript.json'
|
260 |
-
|
261 |
# 检查逐字稿是否存在
|
262 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
263 |
if not exists:
|
@@ -298,8 +299,6 @@ def process_youtube_link(link):
|
|
298 |
video_id = extract_youtube_id(link)
|
299 |
global VIDEO_ID
|
300 |
VIDEO_ID = video_id
|
301 |
-
|
302 |
-
|
303 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
304 |
|
305 |
try:
|
@@ -345,6 +344,7 @@ def process_youtube_link(link):
|
|
345 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
346 |
summary = summary_json["summary"]
|
347 |
html_content = format_transcript_to_html(formatted_transcript)
|
|
|
348 |
first_image = formatted_transcript[0]['screenshot_path']
|
349 |
first_text = formatted_transcript[0]['text']
|
350 |
mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
|
@@ -360,6 +360,7 @@ def process_youtube_link(link):
|
|
360 |
mind_map, \
|
361 |
mind_map_html, \
|
362 |
html_content, \
|
|
|
363 |
first_image, \
|
364 |
first_text,
|
365 |
|
@@ -372,7 +373,12 @@ def format_transcript_to_html(formatted_transcript):
|
|
372 |
html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
|
373 |
return html_content
|
374 |
|
375 |
-
|
|
|
|
|
|
|
|
|
|
|
376 |
|
377 |
def get_embedded_youtube_link(video_id, start_time):
|
378 |
int_start_time = int(start_time)
|
@@ -863,6 +869,8 @@ with gr.Blocks() as demo:
|
|
863 |
next_button = gr.Button("Next")
|
864 |
prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text])
|
865 |
next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text])
|
|
|
|
|
866 |
with gr.Tab("本文"):
|
867 |
df_string_output = gr.Textbox(lines=40, label="Data Text")
|
868 |
with gr.Tab("重點"):
|
@@ -907,7 +915,8 @@ with gr.Blocks() as demo:
|
|
907 |
df_summarise,
|
908 |
mind_map,
|
909 |
mind_map_html,
|
910 |
-
transcript_html,
|
|
|
911 |
slide_image,
|
912 |
slide_text
|
913 |
]
|
@@ -925,6 +934,7 @@ with gr.Blocks() as demo:
|
|
925 |
mind_map,
|
926 |
mind_map_html,
|
927 |
transcript_html,
|
|
|
928 |
slide_image,
|
929 |
slide_text
|
930 |
]
|
|
|
256 |
service = init_drive_service()
|
257 |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
258 |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
259 |
+
|
260 |
+
# 逐字稿文件名
|
261 |
file_name = f'{video_id}_transcript.json'
|
|
|
262 |
# 检查逐字稿是否存在
|
263 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
264 |
if not exists:
|
|
|
299 |
video_id = extract_youtube_id(link)
|
300 |
global VIDEO_ID
|
301 |
VIDEO_ID = video_id
|
|
|
|
|
302 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
303 |
|
304 |
try:
|
|
|
344 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
345 |
summary = summary_json["summary"]
|
346 |
html_content = format_transcript_to_html(formatted_transcript)
|
347 |
+
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
|
348 |
first_image = formatted_transcript[0]['screenshot_path']
|
349 |
first_text = formatted_transcript[0]['text']
|
350 |
mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
|
|
|
360 |
mind_map, \
|
361 |
mind_map_html, \
|
362 |
html_content, \
|
363 |
+
simple_html_content, \
|
364 |
first_image, \
|
365 |
first_text,
|
366 |
|
|
|
373 |
html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
|
374 |
return html_content
|
375 |
|
376 |
+
def format_simple_transcript_to_html(formatted_transcript):
|
377 |
+
html_content = ""
|
378 |
+
for entry in formatted_transcript:
|
379 |
+
html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
|
380 |
+
html_content += f"<p>{entry['text']}</p>"
|
381 |
+
return html_content
|
382 |
|
383 |
def get_embedded_youtube_link(video_id, start_time):
|
384 |
int_start_time = int(start_time)
|
|
|
869 |
next_button = gr.Button("Next")
|
870 |
prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text])
|
871 |
next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text])
|
872 |
+
with gr.Tab("逐字稿"):
|
873 |
+
simple_html_content = gr.HTML(label="Simple Transcript", lines=40)
|
874 |
with gr.Tab("本文"):
|
875 |
df_string_output = gr.Textbox(lines=40, label="Data Text")
|
876 |
with gr.Tab("重點"):
|
|
|
915 |
df_summarise,
|
916 |
mind_map,
|
917 |
mind_map_html,
|
918 |
+
transcript_html,
|
919 |
+
simple_html_content,
|
920 |
slide_image,
|
921 |
slide_text
|
922 |
]
|
|
|
934 |
mind_map,
|
935 |
mind_map_html,
|
936 |
transcript_html,
|
937 |
+
simple_html_content,
|
938 |
slide_image,
|
939 |
slide_text
|
940 |
]
|