AIRider commited on
Commit
876af8e
Β·
verified Β·
1 Parent(s): 506884e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -8
app.py CHANGED
@@ -4,6 +4,7 @@ import json
4
  import logging
5
  import openai
6
  import os
 
7
 
8
  # λ‘œκΉ… μ„€μ •
9
  logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
@@ -11,6 +12,28 @@ logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG
11
 
12
  openai.api_key = os.getenv("OPENAI_API_KEY")
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def parse_api_response(response):
15
  try:
16
  if isinstance(response, str):
@@ -36,13 +59,14 @@ def get_youtube_script(url):
36
 
37
  data = parsed_result["data"][0]
38
  title = data.get("title", "제λͺ© μ—†μŒ")
 
39
  transcription_text = data.get("transcriptionAsText", "")
40
 
41
  if not transcription_text:
42
  raise ValueError("μΆ”μΆœλœ μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
43
 
44
  logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
45
- return title, transcription_text
46
  except Exception as e:
47
  logging.exception("슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ")
48
  raise
@@ -61,9 +85,14 @@ def call_api(prompt, max_tokens, temperature, top_p):
61
  logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
62
  raise
63
 
64
- def summarize_text(text):
65
  prompt = f"""
66
- 1. λ‹€μŒ μ£Όμ–΄μ§€λŠ” 유튜브 λŒ€λ³Έμ˜ 핡심 μ£Όμ œμ™€ λͺ¨λ“  μ£Όμš” λ‚΄μš©μ„ μƒμ„Έν•˜κ²Œ μš”μ•½ν•˜λΌ
 
 
 
 
 
67
  2. λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ μž‘μ„±ν•˜λΌ
68
  3. μš”μ•½λ¬Έλ§ŒμœΌλ‘œλ„ μ˜μƒμ„ 직접 μ‹œμ²­ν•œ 것과 λ™μΌν•œ μˆ˜μ€€μœΌλ‘œ λ‚΄μš©μ„ 이해할 수 μžˆλ„λ‘ μƒμ„Ένžˆ μž‘μ„±
69
  4. 글을 λ„ˆλ¬΄ μ••μΆ•ν•˜κ±°λ‚˜ ν•¨μΆ•ν•˜μ§€ 말고, μ€‘μš”ν•œ λ‚΄μš©κ³Ό 세뢀사항을 λͺ¨λ‘ 포함
@@ -81,6 +110,10 @@ def summarize_text(text):
81
  return call_api(prompt, max_tokens=2000, temperature=0.3, top_p=0.9)
82
 
83
  def create_collapsible_section(section_title, video_title, content):
 
 
 
 
84
  return f"""
85
  <details>
86
  <summary style="cursor: pointer; font-weight: bold;">{section_title}</summary>
@@ -95,18 +128,18 @@ def analyze(url, cache):
95
  try:
96
  if url == cache["url"]:
97
  logging.info(f"μΊμ‹œλœ 데이터 μ‚¬μš©: URL = {url}")
98
- title, script = cache["title"], cache["script"]
99
  else:
100
  logging.info(f"μƒˆλ‘œμš΄ 데이터 μΆ”μΆœ μ‹œμž‘: URL = {url}")
101
- title, script = get_youtube_script(url)
102
- cache = {"url": url, "title": title, "script": script}
103
 
104
  # 원문 슀크립트 μ„Ήμ…˜ 생성
105
  script_section = create_collapsible_section("원문 슀크립트", title, script)
106
  yield script_section, cache
107
 
108
  # μš”μ•½ 생성 및 μ„Ήμ…˜ 생성
109
- summary = summarize_text(script)
110
  summary_section = create_collapsible_section("μš”μ•½", title, summary)
111
  yield script_section + summary_section, cache
112
 
@@ -121,7 +154,7 @@ with gr.Blocks() as demo:
121
  youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
122
  analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
123
  content_output = gr.HTML(label="λ‚΄μš©")
124
- cached_data = gr.State({"url": "", "title": "", "script": ""})
125
 
126
  analyze_button.click(
127
  analyze,
 
4
  import logging
5
  import openai
6
  import os
7
+ import re
8
 
9
  # λ‘œκΉ… μ„€μ •
10
  logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
 
12
 
13
  openai.api_key = os.getenv("OPENAI_API_KEY")
14
 
15
+ # λ¬Έμž₯ ꡬ뢄 ν•¨μˆ˜
16
+ def split_sentences(text):
17
+ sentences = re.split(r"(λ‹ˆλ‹€|μ—μš”|κ΅¬λ‚˜|ν•΄μš”|κ΅°μš”|κ² μ–΄μš”|μ‹œμ˜€|해라|μ˜ˆμš”|μ•„μš”|λ°μš”|λŒ€μš”|μ„Έμš”|μ–΄μš”|κ²Œμš”|κ΅¬μš”|κ³ μš”|λ‚˜μš”|ν•˜μ£ )(?![\w])", text)
18
+ combined_sentences = []
19
+ current_sentence = ""
20
+ for i in range(0, len(sentences), 2):
21
+ if i + 1 < len(sentences):
22
+ sentence = sentences[i] + sentences[i + 1]
23
+ else:
24
+ sentence = sentences[i]
25
+ if len(current_sentence) + len(sentence) > 100: # 100자λ₯Ό μ΄ˆκ³Όν•  경우
26
+ combined_sentences.append(current_sentence.strip())
27
+ current_sentence = sentence.strip()
28
+ else:
29
+ current_sentence += sentence
30
+ if sentence.endswith(('.', '?', '!')):
31
+ combined_sentences.append(current_sentence.strip())
32
+ current_sentence = ""
33
+ if current_sentence:
34
+ combined_sentences.append(current_sentence.strip())
35
+ return combined_sentences
36
+
37
  def parse_api_response(response):
38
  try:
39
  if isinstance(response, str):
 
59
 
60
  data = parsed_result["data"][0]
61
  title = data.get("title", "제λͺ© μ—†μŒ")
62
+ description = data.get("description", "μ„€λͺ… μ—†μŒ")
63
  transcription_text = data.get("transcriptionAsText", "")
64
 
65
  if not transcription_text:
66
  raise ValueError("μΆ”μΆœλœ μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
67
 
68
  logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
69
+ return title, description, transcription_text
70
  except Exception as e:
71
  logging.exception("슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ")
72
  raise
 
85
  logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
86
  raise
87
 
88
+ def summarize_text(title, description, text):
89
  prompt = f"""
90
+ 제λͺ©: {title}
91
+ μ„€λͺ…: {description}
92
+
93
+ μœ„μ˜ 제λͺ©κ³Ό μ„€λͺ…은 이 유튜브 μ˜μƒμ˜ 원본 λ©”νƒ€λ°μ΄ν„°μž…λ‹ˆλ‹€. 이λ₯Ό μ°Έκ³ ν•˜μ—¬ μ•„λž˜μ˜ λŒ€λ³Έμ„ μš”μ•½ν•΄μ£Όμ„Έμš”.
94
+
95
+ 1. μœ„μ˜ 제λͺ©κ³Ό μ„€λͺ…을 μ°Έκ³ ν•˜μ—¬ 유튜브 λŒ€λ³Έμ˜ 핡심 μ£Όμ œμ™€ λͺ¨λ“  μ£Όμš” λ‚΄μš©μ„ μƒμ„Έν•˜κ²Œ μš”μ•½ν•˜λΌ
96
  2. λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ μž‘μ„±ν•˜λΌ
97
  3. μš”μ•½λ¬Έλ§ŒμœΌλ‘œλ„ μ˜μƒμ„ 직접 μ‹œμ²­ν•œ 것과 λ™μΌν•œ μˆ˜μ€€μœΌλ‘œ λ‚΄μš©μ„ 이해할 수 μžˆλ„λ‘ μƒμ„Ένžˆ μž‘μ„±
98
  4. 글을 λ„ˆλ¬΄ μ••μΆ•ν•˜κ±°λ‚˜ ν•¨μΆ•ν•˜μ§€ 말고, μ€‘μš”ν•œ λ‚΄μš©κ³Ό 세뢀사항을 λͺ¨λ‘ 포함
 
110
  return call_api(prompt, max_tokens=2000, temperature=0.3, top_p=0.9)
111
 
112
  def create_collapsible_section(section_title, video_title, content):
113
+ if section_title == "원문 슀크립트":
114
+ sentences = split_sentences(content)
115
+ content = "\n".join(sentences)
116
+
117
  return f"""
118
  <details>
119
  <summary style="cursor: pointer; font-weight: bold;">{section_title}</summary>
 
128
  try:
129
  if url == cache["url"]:
130
  logging.info(f"μΊμ‹œλœ 데이터 μ‚¬μš©: URL = {url}")
131
+ title, description, script = cache["title"], cache["description"], cache["script"]
132
  else:
133
  logging.info(f"μƒˆλ‘œμš΄ 데이터 μΆ”μΆœ μ‹œμž‘: URL = {url}")
134
+ title, description, script = get_youtube_script(url)
135
+ cache = {"url": url, "title": title, "description": description, "script": script}
136
 
137
  # 원문 슀크립트 μ„Ήμ…˜ 생성
138
  script_section = create_collapsible_section("원문 슀크립트", title, script)
139
  yield script_section, cache
140
 
141
  # μš”μ•½ 생성 및 μ„Ήμ…˜ 생성
142
+ summary = summarize_text(title, description, script)
143
  summary_section = create_collapsible_section("μš”μ•½", title, summary)
144
  yield script_section + summary_section, cache
145
 
 
154
  youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
155
  analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
156
  content_output = gr.HTML(label="λ‚΄μš©")
157
+ cached_data = gr.State({"url": "", "title": "", "description": "", "script": ""})
158
 
159
  analyze_button.click(
160
  analyze,