AIRider commited on
Commit
073d3e8
Β·
verified Β·
1 Parent(s): 54085b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -0
app.py CHANGED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_client import Client
3
+ import json
4
+ import logging
5
+ import ast
6
+ import openai # OpenAI 라이브러리 μΆ”κ°€
7
+ import os
8
+ import random
9
+ import re
10
+
11
+ # λ‘œκΉ… μ„€μ •
12
+ logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
13
+ format='%(asctime)s - %(levelname)s - %(message)s')
14
+
15
+ def parse_api_response(response):
16
+ try:
17
+ if isinstance(response, str):
18
+ response = ast.literal_eval(response)
19
+ if isinstance(response, list) and len(response) > 0:
20
+ response = response[0]
21
+ if not isinstance(response, dict):
22
+ raise ValueError(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 응닡 ν˜•μ‹μž…λ‹ˆλ‹€. 받은 데이터 νƒ€μž…: {type(response)}")
23
+ return response
24
+ except Exception as e:
25
+ raise ValueError(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
26
+
27
+ # λ¬Έμž₯ ꡬ뢄 ν•¨μˆ˜ (ν•œκ΅­μ–΄)
28
+ def split_sentences(text):
29
+ sentences = re.split(r"(λ‹ˆλ‹€|μ—μš”|κ΅¬λ‚˜|ν•΄μš”|κ΅°μš”|κ² μ–΄μš”|μ‹œμ˜€|해라|μ˜ˆμš”|μ•„μš”|λ°μš”|λŒ€μš”|μ„Έμš”|μ–΄μš”|κ²Œμš”|κ΅¬μš”|κ³ μš”|λ‚˜μš”|ν•˜μ£ )(?![\w])", text)
30
+ combined_sentences = []
31
+ current_sentence = ""
32
+ for i in range(0, len(sentences), 2):
33
+ if i + 1 < len(sentences):
34
+ sentence = sentences[i] + sentences[i + 1]
35
+ else:
36
+ sentence = sentences[i]
37
+ if len(current_sentence) + len(sentence) > 100: # 100자λ₯Ό μ΄ˆκ³Όν•  경우
38
+ combined_sentences.append(current_sentence.strip())
39
+ current_sentence = sentence.strip()
40
+ else:
41
+ current_sentence += sentence
42
+ if sentence.endswith(('.', '?', '!')):
43
+ combined_sentences.append(current_sentence.strip())
44
+ current_sentence = ""
45
+ if current_sentence:
46
+ combined_sentences.append(current_sentence.strip())
47
+ return combined_sentences
48
+
49
+ def get_youtube_script(url):
50
+ logging.info(f"슀크립트 μΆ”μΆœ μ‹œμž‘: URL = {url}")
51
+
52
+ # μ—”λ“œν¬μΈνŠΈλ₯Ό μƒˆλ‘œμš΄ κ²ƒμœΌλ‘œ λ³€κ²½
53
+ client = Client("whispersound/YT_Ts_R")
54
+
55
+ try:
56
+ logging.debug("API 호좜 μ‹œμž‘")
57
+ result = client.predict(youtube_url=url, api_name="/predict")
58
+ logging.debug("API 호좜 μ™„λ£Œ")
59
+
60
+ # 응닡 νŒŒμ‹±
61
+ parsed_result = parse_api_response(result)
62
+
63
+ title = parsed_result["data"][0]["title"]
64
+ transcription_text = parsed_result["data"][0]["transcriptionAsText"]
65
+
66
+ logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
67
+ return title, transcription_text
68
+
69
+ except Exception as e:
70
+ error_msg = f"슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
71
+ logging.exception(error_msg)
72
+ return "", ""
73
+
74
+ # OpenAI API ν‚€ μ„€μ •
75
+ openai.api_key = os.getenv("OPENAI_API_KEY")
76
+
77
+ # LLM API 호좜 ν•¨μˆ˜
78
+ def call_api(prompt, max_tokens, temperature, top_p):
79
+ try:
80
+ response = openai.ChatCompletion.create(
81
+ model="gpt-4o-mini", # λͺ¨λΈμ„ gpt-4o-mini둜 λ³€κ²½
82
+ messages=[{"role": "user", "content": prompt}],
83
+ max_tokens=max_tokens,
84
+ temperature=temperature,
85
+ top_p=top_p
86
+ )
87
+ return response['choices'][0]['message']['content']
88
+ except Exception as e:
89
+ logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
90
+ return "μš”μ•½μ„ μƒμ„±ν•˜λŠ” λ™μ•ˆ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‚˜μ€‘μ— λ‹€μ‹œ μ‹œλ„ν•΄ μ£Όμ„Έμš”."
91
+
92
+ # ν…μŠ€νŠΈ μš”μ•½ ν•¨μˆ˜
93
+ def summarize_text(text):
94
+ prompt = f"""당신은 YouTube λΉ„λ””μ˜€ 슀크립트λ₯Ό μš”μ•½ν•˜λŠ” AI μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€.
95
+ μ•„λž˜ 제곡된 슀크립트λ₯Ό κ°„κ²°ν•˜λ©΄μ„œλ„ ν¬κ΄„μ μœΌλ‘œ μš”μ•½ν•΄μ£Όμ„Έμš”.
96
+ λΉ„λ””μ˜€μ˜ μ£Όμš” 주제, 핡심 포인트, μ „λ°˜μ μΈ λ©”μ‹œμ§€μ— μ΄ˆμ μ„ λ§žμΆ”μ„Έμš”.
97
+ μš”μ•½μ€ 잘 κ΅¬μ‘°ν™”λ˜κ³  μ΄ν•΄ν•˜κΈ° μ‰¬μ›Œμ•Ό ν•˜λ©°, λ‚΄μš©μ˜ λ³Έμ§ˆμ„ 포착해야 ν•©λ‹ˆλ‹€.
98
+ λ°˜λ“œμ‹œ ν•œκ΅­μ–΄λ‘œ μš”μ•½μ„ μ œκ³΅ν•˜μ„Έμš”.
99
+ λ‹€μŒ ꡬ쑰둜 μš”μ•½μ„ μž‘μ„±ν•΄μ£Όμ„Έμš”:
100
+ 1. λΉ„λ””μ˜€μ˜ μ£Όμš” 주제 λ˜λŠ” ν…Œλ§ˆ
101
+ 2. μ œμ‹œλœ μ£Όμš” 포인트 λ˜λŠ” 논점
102
+ 3. μ€‘μš”ν•œ κ²°λ‘  λ˜λŠ” μ‹œμ‚¬μ 
103
+ μš”μ•½ν•  슀크립트:
104
+ {text}
105
+ μœ„ μŠ€ν¬λ¦½νŠΈμ— λŒ€ν•œ μš”μ•½μ„ μ œκ³΅ν•΄μ£Όμ„Έμš”. μš”μ•½μ€ κ°„κ²°ν•˜λ©΄μ„œλ„ 정보가 풍뢀해야 ν•˜λ©°, λΉ„λ””μ˜€ λ‚΄μš©μ˜ 핡심을 포착해야 ν•©λ‹ˆλ‹€."""
106
+
107
+ try:
108
+ return call_api(prompt, max_tokens=2000, temperature=0.3, top_p=0.9)
109
+ except Exception as e:
110
+ logging.exception("μš”μ•½ 생성 쀑 였λ₯˜ λ°œμƒ")
111
+ return "μš”μ•½μ„ μƒμ„±ν•˜λŠ” λ™μ•ˆ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‚˜μ€‘μ— λ‹€μ‹œ μ‹œλ„ν•΄ μ£Όμ„Έμš”."
112
+
113
+ # Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
114
+ with gr.Blocks() as demo:
115
+ gr.Markdown("## YouTube Script Extractor with Summary")
116
+
117
+ youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
118
+ analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
119
+ script_output = gr.HTML(label="슀크립트")
120
+ summary_output = gr.HTML(label="μš”μ•½")
121
+
122
+ # μΊμ‹œλ₯Ό μœ„ν•œ μƒνƒœ λ³€μˆ˜
123
+ cached_data = gr.State({"url": "", "title": "", "script": ""})
124
+
125
+ def extract_and_cache(url, cache):
126
+ if url == cache["url"]:
127
+ return cache["title"], cache["script"], cache
128
+
129
+ title, script = get_youtube_script(url)
130
+ new_cache = {"url": url, "title": title, "script": script}
131
+ return title, script, new_cache
132
+
133
+ def display_script(title, script):
134
+ formatted_script = "\n".join(split_sentences(script))
135
+ script_html = f"""<h2 style='font-size:24px;'>{title}</h2>
136
+ <details>
137
+ <summary><h3>원문 슀크립트 (ν΄λ¦­ν•˜μ—¬ 펼치기)</h3></summary>
138
+ <pre>{formatted_script}</pre>
139
+ </details>"""
140
+ return script_html
141
+
142
+ def generate_summary(script):
143
+ summary = summarize_text(script)
144
+ summary_html = f"<h3>μš”μ•½:</h3>\n<pre>{summary}</pre>"
145
+ return summary_html
146
+
147
+ def analyze(url, cache):
148
+ title, script, new_cache = extract_and_cache(url, cache)
149
+ script_html = display_script(title, script)
150
+ return script_html, new_cache
151
+
152
+ def update_summary(cache):
153
+ if not cache["script"]:
154
+ return "μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € YouTube URL을 μž…λ ₯ν•˜κ³  뢄석을 μ‹€ν–‰ν•΄μ£Όμ„Έμš”."
155
+ return generate_summary(cache["script"])
156
+
157
+ # λ²„νŠΌ 클릭 μ‹œ 슀크립트 μΆ”μΆœ
158
+ analyze_button.click(
159
+ analyze,
160
+ inputs=[youtube_url_input, cached_data],
161
+ outputs=[script_output, cached_data]
162
+ ).then(
163
+ update_summary,
164
+ inputs=[cached_data],
165
+ outputs=summary_output
166
+ )
167
+
168
+ # μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰
169
+ demo.launch(share=True)