Spaces:
Running
Running
import gradio as gr | |
from gradio_client import Client | |
import json | |
import logging | |
import ast | |
import openai | |
import os | |
import random | |
import re | |
import nltk | |
import numpy as np | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import urllib.parse | |
# nltk λ°μ΄ν° λ€μ΄λ‘λ (μ΅μ΄ ν λ² μ€ν) | |
nltk.download('punkt') | |
# λ‘κΉ μ€μ | |
logging.basicConfig( | |
filename='youtube_script_extractor.log', | |
level=logging.DEBUG, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
def parse_api_response(response): | |
try: | |
if isinstance(response, str): | |
response = ast.literal_eval(response) | |
if not isinstance(response, dict): | |
raise ValueError(f"μμμΉ λͺ»ν μλ΅ νμμ λλ€. λ°μ λ°μ΄ν° νμ : {type(response)}") | |
return response | |
except Exception as e: | |
raise ValueError(f"API μλ΅ νμ± μ€ν¨: {str(e)}") | |
def get_youtube_script(url): | |
logging.info(f"μ€ν¬λ¦½νΈ μΆμΆ μμ: URL = {url}") | |
client = Client("whispersound/YT_Ts_R") | |
try: | |
logging.debug("API νΈμΆ μμ") | |
result = client.predict(youtube_url=url, api_name="/predict") | |
logging.debug("API νΈμΆ μλ£") | |
parsed_result = parse_api_response(result) | |
# λ°μ΄ν° ꡬ쑰μ λ§κ² μμ | |
data_list = parsed_result.get("data", []) | |
if not data_list: | |
raise ValueError("λ°μ΄ν°λ₯Ό κ°μ Έμ¬ μ μμ΅λλ€.") | |
# 첫 λ²μ§Έ λ°μ΄ν° μ¬μ© | |
data = data_list[0] | |
title = data.get("title", "") | |
transcription = data.get("transcription", []) | |
transcription_as_text = data.get("transcriptionAsText", "") | |
logging.info("μ€ν¬λ¦½νΈ μΆμΆ μλ£") | |
script_json = json.dumps({ | |
"title": title, | |
"transcription": transcription, | |
"transcriptionAsText": transcription_as_text | |
}) | |
return title, script_json | |
except Exception as e: | |
error_msg = f"μ€ν¬λ¦½νΈ μΆμΆ μ€ μ€λ₯ λ°μ: {str(e)}" | |
logging.exception(error_msg) | |
return "", "" | |
# OpenAI API ν€ μ€μ | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
def call_api(prompt, max_tokens, temperature, top_p): | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "user", "content": prompt}], | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p | |
) | |
return response['choices'][0]['message']['content'] | |
except Exception as e: | |
logging.exception("LLM API νΈμΆ μ€ μ€λ₯ λ°μ") | |
return "μμ½μ μμ±νλ λμ μ€λ₯κ° λ°μνμ΅λλ€. λμ€μ λ€μ μλν΄ μ£ΌμΈμ." | |
def extract_video_id(url): | |
parsed_url = urllib.parse.urlparse(url) | |
if parsed_url.hostname in ('www.youtube.com', 'youtube.com'): | |
query_params = urllib.parse.parse_qs(parsed_url.query) | |
return query_params.get('v', [None])[0] | |
elif parsed_url.hostname == 'youtu.be': | |
return parsed_url.path[1:] | |
else: | |
return None | |
def summarize_section(section_text): | |
prompt = f"""λ€μ λ΄μ©μ ν΅μ¬μ μμ½ν΄ μ£ΌμΈμ: | |
{section_text} | |
μμ½μ νκ΅μ΄λ‘ κ°κ²°νκ² μμ±ν΄ μ£ΌμΈμ. | |
""" | |
return call_api(prompt, max_tokens=500, temperature=0.3, top_p=0.9) | |
def segment_transcript(transcript): | |
sentences = [] | |
start_times = [] | |
for entry in transcript: | |
subtitle = entry.get('subtitle', '') | |
start_time = entry.get('start', 0) | |
if not subtitle: | |
continue | |
split_sentences = nltk.tokenize.sent_tokenize(subtitle) | |
sentences.extend(split_sentences) | |
start_times.extend([start_time] * len(split_sentences)) | |
if not sentences: | |
return [] | |
vectorizer = TfidfVectorizer().fit_transform(sentences) | |
vectors = vectorizer.toarray() | |
boundaries = [0] | |
threshold = 0.3 | |
for i in range(1, len(sentences)): | |
similarity = cosine_similarity([vectors[i - 1]], [vectors[i]])[0][0] | |
if similarity < threshold: | |
boundaries.append(i) | |
boundaries.append(len(sentences)) | |
sections = [] | |
for i in range(len(boundaries) - 1): | |
start_idx = boundaries[i] | |
end_idx = boundaries[i + 1] | |
section_sentences = sentences[start_idx:end_idx] | |
section_text = ' '.join(section_sentences) | |
section_start_time = start_times[start_idx] | |
sections.append({ | |
'text': section_text, | |
'start_time': section_start_time | |
}) | |
return sections | |
def generate_summary(sections, url): | |
video_id = extract_video_id(url) | |
summary_html = "<h3>μμ½:</h3>" | |
for idx, section in enumerate(sections): | |
start_time = section['start_time'] | |
hours = int(start_time // 3600) | |
minutes = int((start_time % 3600) // 60) | |
seconds = int(start_time % 60) | |
timestamp_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" | |
timestamp_link = f"https://www.youtube.com/watch?v={video_id}&t={int(start_time)}s" | |
summary = summarize_section(section['text']) | |
summary_html += f""" | |
<h4><a href="{timestamp_link}" target="_blank">{timestamp_str}</a></h4> | |
<div style="white-space: pre-wrap; margin-bottom: 20px;">{summary}</div> | |
""" | |
return summary_html | |
with gr.Blocks() as demo: | |
gr.Markdown("## YouTube μ€ν¬λ¦½νΈ μΆμΆ λ° μμ½ λꡬ") | |
youtube_url_input = gr.Textbox(label="YouTube URL μ λ ₯") | |
analyze_button = gr.Button("λΆμνκΈ°") | |
script_output = gr.HTML(label="μ€ν¬λ¦½νΈ") | |
summary_output = gr.HTML(label="μμ½") | |
cached_data = gr.State({"url": "", "title": "", "script": ""}) | |
def extract_and_cache(url, cache): | |
if url == cache.get("url"): | |
return cache["title"], cache | |
title, script = get_youtube_script(url) | |
new_cache = {"url": url, "title": title, "script": script} | |
return title, new_cache | |
def display_script(title): | |
script_html = f"""<h2 style='font-size:24px;'>{title}</h2>""" | |
return script_html | |
def update_summary(cache): | |
if not cache.get("script"): | |
return "μ€ν¬λ¦½νΈκ° μμ΅λλ€. λ¨Όμ YouTube URLμ μ λ ₯νκ³ λΆμμ μ€νν΄μ£ΌμΈμ." | |
try: | |
parsed_result = json.loads(cache["script"]) | |
transcript = parsed_result.get("transcription", []) | |
if not transcript: | |
return "νΈλμ€ν¬λ¦½νΈλ₯Ό κ°μ Έμ¬ μ μμ΅λλ€." | |
sections = segment_transcript(transcript) | |
if not sections: | |
return "μΉμ μ μμ±ν μ μμ΅λλ€." | |
return generate_summary(sections, cache["url"]) | |
except Exception as e: | |
logging.exception("μμ½ μμ± μ€ μ€λ₯ λ°μ") | |
return "μμ½μ μμ±νλ λμ μ€λ₯κ° λ°μνμ΅λλ€. λμ€μ λ€μ μλν΄ μ£ΌμΈμ." | |
analyze_button.click( | |
extract_and_cache, | |
inputs=[youtube_url_input, cached_data], | |
outputs=[script_output, cached_data] | |
).then( | |
update_summary, | |
inputs=cached_data, | |
outputs=summary_output | |
) | |
demo.launch(share=True) | |