AIRider's picture
Update app.py
506884e verified
raw
history blame
5.29 kB
import gradio as gr
from gradio_client import Client
import json
import logging
import openai
import os
# λ‘œκΉ… μ„€μ •
logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s')
openai.api_key = os.getenv("OPENAI_API_KEY")
def parse_api_response(response):
try:
if isinstance(response, str):
response = json.loads(response)
if isinstance(response, list) and len(response) > 0:
response = response[0]
if not isinstance(response, dict):
raise ValueError(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 응닡 ν˜•μ‹μž…λ‹ˆλ‹€. 받은 데이터 νƒ€μž…: {type(response)}")
return response
except Exception as e:
logging.error(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
raise ValueError(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
def get_youtube_script(url):
logging.info(f"슀크립트 μΆ”μΆœ μ‹œμž‘: URL = {url}")
client = Client("whispersound/YT_Ts_R")
try:
result = client.predict(youtube_url=url, api_name="/predict")
parsed_result = parse_api_response(result)
if 'data' not in parsed_result or not parsed_result['data']:
raise ValueError("API 응닡에 μœ νš¨ν•œ 데이터가 μ—†μŠ΅λ‹ˆλ‹€.")
data = parsed_result["data"][0]
title = data.get("title", "제λͺ© μ—†μŒ")
transcription_text = data.get("transcriptionAsText", "")
if not transcription_text:
raise ValueError("μΆ”μΆœλœ μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
return title, transcription_text
except Exception as e:
logging.exception("슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ")
raise
def call_api(prompt, max_tokens, temperature, top_p):
try:
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
return response['choices'][0]['message']['content']
except Exception as e:
logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
raise
def summarize_text(text):
prompt = f"""
1. λ‹€μŒ μ£Όμ–΄μ§€λŠ” 유튜브 λŒ€λ³Έμ˜ 핡심 μ£Όμ œμ™€ λͺ¨λ“  μ£Όμš” λ‚΄μš©μ„ μƒμ„Έν•˜κ²Œ μš”μ•½ν•˜λΌ
2. λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ μž‘μ„±ν•˜λΌ
3. μš”μ•½λ¬Έλ§ŒμœΌλ‘œλ„ μ˜μƒμ„ 직접 μ‹œμ²­ν•œ 것과 λ™μΌν•œ μˆ˜μ€€μœΌλ‘œ λ‚΄μš©μ„ 이해할 수 μžˆλ„λ‘ μƒμ„Ένžˆ μž‘μ„±
4. 글을 λ„ˆλ¬΄ μ••μΆ•ν•˜κ±°λ‚˜ ν•¨μΆ•ν•˜μ§€ 말고, μ€‘μš”ν•œ λ‚΄μš©κ³Ό 세뢀사항을 λͺ¨λ‘ 포함
5. λ°˜λ“œμ‹œ λŒ€λ³Έμ˜ 흐름과 논리 ꡬ쑰λ₯Ό μœ μ§€
6. λ°˜λ“œμ‹œ μ‹œκ°„ μˆœμ„œλ‚˜ μ‚¬κ±΄μ˜ μ „κ°œ 과정을 λͺ…ν™•ν•˜κ²Œ 반영
7. λ“±μž₯인물, μž₯μ†Œ, 사건 λ“± μ€‘μš”ν•œ μš”μ†Œλ₯Ό μ •ν™•ν•˜κ²Œ μž‘μ„±
8. λŒ€λ³Έμ—μ„œ μ „λ‹¬ν•˜λŠ” κ°μ •μ΄λ‚˜ λΆ„μœ„κΈ°λ„ 포함
9. λ°˜λ“œμ‹œ 기술적 μš©μ–΄λ‚˜ μ „λ¬Έ μš©μ–΄κ°€ μžˆμ„ 경우, 이λ₯Ό μ •ν™•ν•˜κ²Œ μ‚¬μš©
10. λŒ€λ³Έμ˜ λͺ©μ μ΄λ‚˜ μ˜λ„λ₯Ό νŒŒμ•…ν•˜κ³ , 이λ₯Ό μš”μ•½μ— λ°˜λ“œμ‹œ 반영
11. 각 λ¬Έμž₯을 λͺ…ν™•ν•˜κ²Œ κ΅¬λΆ„ν•˜κ³ , μ μ ˆν•œ 단락 ꡬ뢄을 μ‚¬μš©ν•˜μ—¬ 가독성을 λ†’μ΄μ‹œμ˜€
λŒ€λ³Έ:
{text}
"""
return call_api(prompt, max_tokens=2000, temperature=0.3, top_p=0.9)
def create_collapsible_section(section_title, video_title, content):
return f"""
<details>
<summary style="cursor: pointer; font-weight: bold;">{section_title}</summary>
<div style="margin-top: 10px;">
<h3 style="font-size: 18px; margin-bottom: 10px;">{video_title}</h3>
<div style="white-space: pre-wrap; background-color: #f0f0f0; padding: 15px; border-radius: 5px;">{content}</div>
</div>
</details>
"""
def analyze(url, cache):
try:
if url == cache["url"]:
logging.info(f"μΊμ‹œλœ 데이터 μ‚¬μš©: URL = {url}")
title, script = cache["title"], cache["script"]
else:
logging.info(f"μƒˆλ‘œμš΄ 데이터 μΆ”μΆœ μ‹œμž‘: URL = {url}")
title, script = get_youtube_script(url)
cache = {"url": url, "title": title, "script": script}
# 원문 슀크립트 μ„Ήμ…˜ 생성
script_section = create_collapsible_section("원문 슀크립트", title, script)
yield script_section, cache
# μš”μ•½ 생성 및 μ„Ήμ…˜ 생성
summary = summarize_text(script)
summary_section = create_collapsible_section("μš”μ•½", title, summary)
yield script_section + summary_section, cache
except Exception as e:
error_msg = f"처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
logging.exception(error_msg)
yield error_msg, cache
# Gradio μΈν„°νŽ˜μ΄μŠ€
with gr.Blocks() as demo:
gr.Markdown("## YouTube 슀크립트 μΆ”μΆœ 및 μš”μ•½ 도ꡬ")
youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
content_output = gr.HTML(label="λ‚΄μš©")
cached_data = gr.State({"url": "", "title": "", "script": ""})
analyze_button.click(
analyze,
inputs=[youtube_url_input, cached_data],
outputs=[content_output, cached_data]
)
if __name__ == "__main__":
demo.launch(share=True)