zman1x1's picture
add setup script
e5ec50a unverified
raw
history blame
2.72 kB
# from utils.subtitles import getSubs
# subs = getSubs("G8gEos8F9R0")
# print(subs)
# import json
# with open("subs.json", "w") as f:
# json.dump(subs, f)
import datetime
import gradio as gr
def greet(name):
pass
def run_model(video_id, outname, chunk_size):
CHUNK_SIZE = chunk_size
OUT_PPT_NAME = outname
from rich.progress import track
from utils.subtitles import getSubsText
from models.distilbart_cnn_12_6 import summarize
from models.t5_small_medium_title_generation import t5model as generate_title
from utils.marp_wrapper import marp
import utils.markdown as md
# from utils.chunk import LangChainChunker as chunker
from utils.subtitles import subs as chunker
from utils.ppt import generate_ppt
from utils.video import video
import os
# check for marp command
if os.system("command -v marp >> /dev/null"):
print("Marp not found. Please install marp-cli.")
os.system("sudo bash setup.sh")
# Intermediary Markdown file
print("Creating Markdown file...")
ppt = marp("summary.md")
ppt.add_header(
theme="uncover",
background="",
_class="invert",
)
# smaller font size (1.5rem)
ppt.add_body("<style> section { font-size: 1.5rem; } </style>")
# Generate video
vid = video(f"https://youtu.be/{video_id}",
f"out/vid-{video_id}")
vid.download()
# Get the Subtitles from the YouTube video
print("Getting subtitles...")
chunker_init = chunker(video_id)
chunks = chunker_init.getSubsList(size=CHUNK_SIZE)
chunk_len = len(chunks)
print(f"subtitles divided to {chunk_len} chunks")
chunk_num = 1
for chunk in track(chunks, description="Processing chunks"):
print(f"processing Chunk: {chunk_num}/{chunk_len}")
timestamp = str(datetime.timedelta(seconds=chunk[1]))
# TODO: better file path
img_path = f"out/vid-{video_id}_{timestamp}.png"
summary = summarize(chunk[0])
vid.getframe(timestamp)
title = generate_title(summary)
ppt.add_page( md.h2(title), summary )
if os.path.exists(img_path):
ppt.add_body(md.image( img_path,
align="left", setAsBackground=True, size="contain"))
ppt.marp_end()
chunk_num += 1
continue
print(f"Generating {OUT_PPT_NAME}..")
ppt.close_file()
generate_ppt("summary.md", OUT_PPT_NAME)
# return full path to the ppt file
return os.path.abspath(OUT_PPT_NAME)
demo = gr.Interface(fn=run_model, inputs=["text", "text", gr.Slider(200, 1000)], outputs="file")
demo.launch()