YT_Script_Shorts

Running

App Files Files Community

AIRider commited on Sep 29, 2024

Commit

073d3e8

verified ·

1 Parent(s): 54085b8

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -0

app.py CHANGED Viewed

	@@ -0,0 +1,169 @@

+import gradio as gr
+from gradio_client import Client
+import json
+import logging
+import ast
+import openai  # OpenAI 라이브러리 추가
+import os
+import random
+import re
+# 로깅 설정
+logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+def parse_api_response(response):
+    try:
+        if isinstance(response, str):
+            response = ast.literal_eval(response)
+        if isinstance(response, list) and len(response) > 0:
+            response = response[0]
+        if not isinstance(response, dict):
+            raise ValueError(f"예상치 못한 응답 형식입니다. 받은 데이터 타입: {type(response)}")
+        return response
+    except Exception as e:
+        raise ValueError(f"API 응답 파싱 실패: {str(e)}")
+# 문장 구분 함수 (한국어)
+def split_sentences(text):
+    sentences = re.split(r"(니다|에요|구나|해요|군요|겠어요|시오|해라|예요|아요|데요|대요|세요|어요|게요|구요|고요|나요|하죠)(?![\w])", text)
+    combined_sentences = []
+    current_sentence = ""
+    for i in range(0, len(sentences), 2):
+        if i + 1 < len(sentences):
+            sentence = sentences[i] + sentences[i + 1]
+        else:
+            sentence = sentences[i]
+        if len(current_sentence) + len(sentence) > 100:  # 100자를 초과할 경우
+            combined_sentences.append(current_sentence.strip())
+            current_sentence = sentence.strip()
+        else:
+            current_sentence += sentence
+        if sentence.endswith(('.', '?', '!')):
+            combined_sentences.append(current_sentence.strip())
+            current_sentence = ""
+    if current_sentence:
+        combined_sentences.append(current_sentence.strip())
+    return combined_sentences
+def get_youtube_script(url):
+    logging.info(f"스크립트 추출 시작: URL = {url}")
+    # 엔드포인트를 새로운 것으로 변경
+    client = Client("whispersound/YT_Ts_R")
+    try:
+        logging.debug("API 호출 시작")
+        result = client.predict(youtube_url=url, api_name="/predict")
+        logging.debug("API 호출 완료")
+        # 응답 파싱
+        parsed_result = parse_api_response(result)
+        title = parsed_result["data"][0]["title"]
+        transcription_text = parsed_result["data"][0]["transcriptionAsText"]
+        logging.info("스크립트 추출 완료")
+        return title, transcription_text
+    except Exception as e:
+        error_msg = f"스크립트 추출 중 오류 발생: {str(e)}"
+        logging.exception(error_msg)
+        return "", ""
+# OpenAI API 키 설정
+openai.api_key = os.getenv("OPENAI_API_KEY")
+# LLM API 호출 함수
+def call_api(prompt, max_tokens, temperature, top_p):
+    try:
+        response = openai.ChatCompletion.create(
+            model="gpt-4o-mini",  # 모델을 gpt-4o-mini로 변경
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p
+        )
+        return response['choices'][0]['message']['content']
+    except Exception as e:
+        logging.exception("LLM API 호출 중 오류 발생")
+        return "요약을 생성하는 동안 오류가 발생했습니다. 나중에 다시 시도해 주세요."
+# 텍스트 요약 함수
+def summarize_text(text):
+    prompt = f"""당신은 YouTube 비디오 스크립트를 요약하는 AI 어시스턴트입니다.
+아래 제공된 스크립트를 간결하면서도 포괄적으로 요약해주세요.
+비디오의 주요 주제, 핵심 포인트, 전반적인 메시지에 초점을 맞추세요.
+요약은 잘 구조화되고 이해하기 쉬워야 하며, 내용의 본질을 포착해야 합니다.
+반드시 한국어로 요약을 제공하세요.
+다음 구조로 요약을 작성해주세요:
+1. 비디오의 주요 주제 또는 테마
+2. 제시된 주요 포인트 또는 논점
+3. 중요한 결론 또는 시사점
+요약할 스크립트:
+{text}
+위 스크립트에 대한 요약을 제공해주세요. 요약은 간결하면서도 정보가 풍부해야 하며, 비디오 내용의 핵심을 포착해야 합니다."""
+    try:
+        return call_api(prompt, max_tokens=2000, temperature=0.3, top_p=0.9)
+    except Exception as e:
+        logging.exception("요약 생성 중 오류 발생")
+        return "요약을 생성하는 동안 오류가 발생했습니다. 나중에 다시 시도해 주세요."
+# Gradio 인터페이스 설정
+with gr.Blocks() as demo:
+    gr.Markdown("## YouTube Script Extractor with Summary")
+    youtube_url_input = gr.Textbox(label="YouTube URL 입력")
+    analyze_button = gr.Button("분석하기")
+    script_output = gr.HTML(label="스크립트")
+    summary_output = gr.HTML(label="요약")
+    # 캐시를 위한 상태 변수
+    cached_data = gr.State({"url": "", "title": "", "script": ""})
+    def extract_and_cache(url, cache):
+        if url == cache["url"]:
+            return cache["title"], cache["script"], cache
+        title, script = get_youtube_script(url)
+        new_cache = {"url": url, "title": title, "script": script}
+        return title, script, new_cache
+    def display_script(title, script):
+        formatted_script = "\n".join(split_sentences(script))
+        script_html = f"""<h2 style='font-size:24px;'>{title}</h2>
+        <details>
+            <summary><h3>원문 스크립트 (클릭하여 펼치기)</h3></summary>
+            <pre>{formatted_script}</pre>
+        </details>"""
+        return script_html
+    def generate_summary(script):
+        summary = summarize_text(script)
+        summary_html = f"<h3>요약:</h3>\n<pre>{summary}</pre>"
+        return summary_html
+    def analyze(url, cache):
+        title, script, new_cache = extract_and_cache(url, cache)
+        script_html = display_script(title, script)
+        return script_html, new_cache
+    def update_summary(cache):
+        if not cache["script"]:
+            return "스크립트가 없습니다. 먼저 YouTube URL을 입력하고 분석을 실행해주세요."
+        return generate_summary(cache["script"])
+    # 버튼 클릭 시 스크립트 추출
+    analyze_button.click(
+        analyze,
+        inputs=[youtube_url_input, cached_data],
+        outputs=[script_output, cached_data]
+    ).then(
+        update_summary,
+        inputs=[cached_data],
+        outputs=summary_output
+    )
+# 인터페이스 실행
+demo.launch(share=True)