AIRider's picture
Update app.py
d3555b8 verified
raw
history blame
7.28 kB
import gradio as gr
from gradio_client import Client
import json
import logging
import ast
import openai
import os
import random
import re
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import urllib.parse
# nltk 데이터 λ‹€μš΄λ‘œλ“œ (졜초 ν•œ 번 μ‹€ν–‰)
nltk.download('punkt')
# λ‘œκΉ… μ„€μ •
logging.basicConfig(
filename='youtube_script_extractor.log',
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
def parse_api_response(response):
try:
if isinstance(response, str):
response = ast.literal_eval(response)
if not isinstance(response, dict):
raise ValueError(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 응닡 ν˜•μ‹μž…λ‹ˆλ‹€. 받은 데이터 νƒ€μž…: {type(response)}")
return response
except Exception as e:
raise ValueError(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
def get_youtube_script(url):
logging.info(f"슀크립트 μΆ”μΆœ μ‹œμž‘: URL = {url}")
client = Client("whispersound/YT_Ts_R")
try:
logging.debug("API 호좜 μ‹œμž‘")
result = client.predict(youtube_url=url, api_name="/predict")
logging.debug("API 호좜 μ™„λ£Œ")
parsed_result = parse_api_response(result)
# 데이터 ꡬ쑰에 맞게 μˆ˜μ •
data_list = parsed_result.get("data", [])
if not data_list:
raise ValueError("데이터λ₯Ό κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€.")
# 첫 번째 데이터 μ‚¬μš©
data = data_list[0]
title = data.get("title", "")
transcription = data.get("transcription", [])
transcription_as_text = data.get("transcriptionAsText", "")
logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
script_json = json.dumps({
"title": title,
"transcription": transcription,
"transcriptionAsText": transcription_as_text
})
return title, script_json
except Exception as e:
error_msg = f"슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
logging.exception(error_msg)
return "", ""
# OpenAI API ν‚€ μ„€μ •
openai.api_key = os.getenv("OPENAI_API_KEY")
def call_api(prompt, max_tokens, temperature, top_p):
try:
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
return response['choices'][0]['message']['content']
except Exception as e:
logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
return "μš”μ•½μ„ μƒμ„±ν•˜λŠ” λ™μ•ˆ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‚˜μ€‘μ— λ‹€μ‹œ μ‹œλ„ν•΄ μ£Όμ„Έμš”."
def extract_video_id(url):
parsed_url = urllib.parse.urlparse(url)
if parsed_url.hostname in ('www.youtube.com', 'youtube.com'):
query_params = urllib.parse.parse_qs(parsed_url.query)
return query_params.get('v', [None])[0]
elif parsed_url.hostname == 'youtu.be':
return parsed_url.path[1:]
else:
return None
def summarize_section(section_text):
prompt = f"""λ‹€μŒ λ‚΄μš©μ˜ 핡심을 μš”μ•½ν•΄ μ£Όμ„Έμš”:
{section_text}
μš”μ•½μ€ ν•œκ΅­μ–΄λ‘œ κ°„κ²°ν•˜κ²Œ μž‘μ„±ν•΄ μ£Όμ„Έμš”.
"""
return call_api(prompt, max_tokens=500, temperature=0.3, top_p=0.9)
def segment_transcript(transcript):
sentences = []
start_times = []
for entry in transcript:
subtitle = entry.get('subtitle', '')
start_time = entry.get('start', 0)
if not subtitle:
continue
split_sentences = nltk.tokenize.sent_tokenize(subtitle)
sentences.extend(split_sentences)
start_times.extend([start_time] * len(split_sentences))
if not sentences:
return []
vectorizer = TfidfVectorizer().fit_transform(sentences)
vectors = vectorizer.toarray()
boundaries = [0]
threshold = 0.3
for i in range(1, len(sentences)):
similarity = cosine_similarity([vectors[i - 1]], [vectors[i]])[0][0]
if similarity < threshold:
boundaries.append(i)
boundaries.append(len(sentences))
sections = []
for i in range(len(boundaries) - 1):
start_idx = boundaries[i]
end_idx = boundaries[i + 1]
section_sentences = sentences[start_idx:end_idx]
section_text = ' '.join(section_sentences)
section_start_time = start_times[start_idx]
sections.append({
'text': section_text,
'start_time': section_start_time
})
return sections
def generate_summary(sections, url):
video_id = extract_video_id(url)
summary_html = "<h3>μš”μ•½:</h3>"
for idx, section in enumerate(sections):
start_time = section['start_time']
hours = int(start_time // 3600)
minutes = int((start_time % 3600) // 60)
seconds = int(start_time % 60)
timestamp_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
timestamp_link = f"https://www.youtube.com/watch?v={video_id}&t={int(start_time)}s"
summary = summarize_section(section['text'])
summary_html += f"""
<h4><a href="{timestamp_link}" target="_blank">{timestamp_str}</a></h4>
<div style="white-space: pre-wrap; margin-bottom: 20px;">{summary}</div>
"""
return summary_html
with gr.Blocks() as demo:
gr.Markdown("## YouTube 슀크립트 μΆ”μΆœ 및 μš”μ•½ 도ꡬ")
youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
script_output = gr.HTML(label="슀크립트")
summary_output = gr.HTML(label="μš”μ•½")
cached_data = gr.State({"url": "", "title": "", "script": ""})
def extract_and_cache(url, cache):
if url == cache.get("url"):
return cache["title"], cache
title, script = get_youtube_script(url)
new_cache = {"url": url, "title": title, "script": script}
return title, new_cache
def display_script(title):
script_html = f"""<h2 style='font-size:24px;'>{title}</h2>"""
return script_html
def update_summary(cache):
if not cache.get("script"):
return "μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € YouTube URL을 μž…λ ₯ν•˜κ³  뢄석을 μ‹€ν–‰ν•΄μ£Όμ„Έμš”."
try:
parsed_result = json.loads(cache["script"])
transcript = parsed_result.get("transcription", [])
if not transcript:
return "트랜슀크립트λ₯Ό κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€."
sections = segment_transcript(transcript)
if not sections:
return "μ„Ήμ…˜μ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€."
return generate_summary(sections, cache["url"])
except Exception as e:
logging.exception("μš”μ•½ 생성 쀑 였λ₯˜ λ°œμƒ")
return "μš”μ•½μ„ μƒμ„±ν•˜λŠ” λ™μ•ˆ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‚˜μ€‘μ— λ‹€μ‹œ μ‹œλ„ν•΄ μ£Όμ„Έμš”."
analyze_button.click(
extract_and_cache,
inputs=[youtube_url_input, cached_data],
outputs=[script_output, cached_data]
).then(
update_summary,
inputs=cached_data,
outputs=summary_output
)
demo.launch(share=True)