seawolf2357 commited on
Commit
99127b9
ยท
verified ยท
1 Parent(s): 34965c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -29
app.py CHANGED
@@ -3,16 +3,66 @@ import requests
3
  import json
4
  import os
5
  from datetime import datetime, timedelta
6
- from bs4 import BeautifulSoup # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ
7
  from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
8
 
9
- # ํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์„ค์น˜ (ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ฃผ์„์„ ์ œ๊ฑฐํ•˜๊ณ  ์‹คํ–‰)
10
- # !pip install bs4 huggingface_hub
11
-
12
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
13
  API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
14
  HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  MAJOR_COUNTRIES = [
17
  "United States", "United Kingdom", "Canada", "Australia", "Germany",
18
  "France", "Japan", "South Korea", "China", "India",
@@ -37,7 +87,7 @@ def search_serphouse(query, country, page=1, num_result=100):
37
  "data": {
38
  "q": query,
39
  "domain": "google.com",
40
- "loc": country,
41
  "lang": "en",
42
  "device": "desktop",
43
  "serp_type": "news",
@@ -127,19 +177,10 @@ def serphouse_search(query, country):
127
  # LLM ์„ค์ •
128
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
129
 
130
- def summarize_article(url):
131
  try:
132
- # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
133
- response = requests.get(url)
134
- response.raise_for_status()
135
- soup = BeautifulSoup(response.text, 'html.parser')
136
- # ๋ชจ๋“  ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
137
- text = ' '.join([p.get_text() for p in soup.find_all('p')])
138
- if not text.strip():
139
- return "๊ธฐ์‚ฌ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
140
-
141
- # ์š”์•ฝ ์ƒ์„ฑ
142
- prompt = f"๋‹ค์Œ ์˜์–ด ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n{text}"
143
  summary = hf_client.text_generation(prompt, max_new_tokens=500)
144
  return summary
145
  except Exception as e:
@@ -149,10 +190,26 @@ css = """
149
  footer {
150
  visibility: hidden;
151
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  """
153
 
154
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
155
- with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
156
  gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
157
 
158
  with gr.Column():
@@ -170,10 +227,19 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
170
  return f"<p>{error_message}</p>", gr.update(visible=False)
171
  else:
172
  # ๊ธฐ์‚ฌ ๋ชฉ๋ก์„ HTML ํ…Œ์ด๋ธ”๋กœ ์ƒ์„ฑ
173
- table_html = "<table border='1' style='width:100%; text-align:left;'><tr><th>๋ฒˆํ˜ธ</th><th>์ œ๋ชฉ</th><th>์ถœ์ฒ˜</th><th>์‹œ๊ฐ„</th><th>๋ถ„์„</th></tr>"
 
 
 
 
 
 
 
 
 
174
  for article in articles:
175
  # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ๋ฒ„ํŠผ์— ํ•ด๋‹นํ•˜๋Š” JavaScript ์ฝ”๋“œ๋ฅผ ์‚ฝ์ž…
176
- analyze_button = f"""<button onclick="analyzeArticle('{article['link']}')">๋ถ„์„</button>"""
177
  row = f"""
178
  <tr>
179
  <td>{article['index']}</td>
@@ -189,23 +255,36 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
189
  # JavaScript ํ•จ์ˆ˜ ์ •์˜
190
  js_code = """
191
  <script>
192
- function analyzeArticle(url) {
193
- // Gradio์˜ handle_function์„ ์‚ฌ์šฉํ•˜์—ฌ Python ํ•จ์ˆ˜ ํ˜ธ์ถœ
194
- gradioApp().querySelector('#article_url_input textarea').value = url;
195
- gradioApp().querySelector('#analyze_button').click();
 
 
 
 
 
196
  }
197
  </script>
198
  """
199
 
200
- full_html = table_html + js_code
 
 
 
 
 
 
 
201
 
202
  return full_html, gr.update(visible=True, value="") # summary_output ์ดˆ๊ธฐํ™”
203
 
204
- def analyze_article(url):
205
- summary = summarize_article(url)
206
  return summary
207
 
208
- article_url_input = gr.Textbox(visible=False, elem_id="article_url_input")
 
209
  analyze_button = gr.Button("๋ถ„์„", visible=False, elem_id="analyze_button")
210
 
211
  search_button.click(
@@ -216,7 +295,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
216
 
217
  analyze_button.click(
218
  analyze_article,
219
- inputs=[article_url_input],
220
  outputs=[summary_output]
221
  )
222
 
 
3
  import json
4
  import os
5
  from datetime import datetime, timedelta
 
6
  from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
7
 
 
 
 
8
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
9
  API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
10
  HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
11
 
12
+ # ๊ตญ๊ฐ€ ์ด๋ฆ„๊ณผ Google ๊ฒ€์ƒ‰์—์„œ ์‚ฌ์šฉํ•˜๋Š” ๊ตญ๊ฐ€ ์ฝ”๋“œ๋ฅผ ๋งคํ•‘
13
+ COUNTRY_CODE_MAPPING = {
14
+ "United States": "us",
15
+ "United Kingdom": "uk",
16
+ "Canada": "ca",
17
+ "Australia": "au",
18
+ "Germany": "de",
19
+ "France": "fr",
20
+ "Japan": "jp",
21
+ "South Korea": "kr",
22
+ "China": "cn",
23
+ "India": "in",
24
+ "Brazil": "br",
25
+ "Mexico": "mx",
26
+ "Russia": "ru",
27
+ "Italy": "it",
28
+ "Spain": "es",
29
+ "Netherlands": "nl",
30
+ "Sweden": "se",
31
+ "Switzerland": "ch",
32
+ "Norway": "no",
33
+ "Denmark": "dk",
34
+ "Finland": "fi",
35
+ "Belgium": "be",
36
+ "Austria": "at",
37
+ "New Zealand": "nz",
38
+ "Ireland": "ie",
39
+ "Singapore": "sg",
40
+ "Hong Kong": "hk",
41
+ "Israel": "il",
42
+ "United Arab Emirates": "ae",
43
+ "Saudi Arabia": "sa",
44
+ "South Africa": "za",
45
+ "Turkey": "tr",
46
+ "Egypt": "eg",
47
+ "Poland": "pl",
48
+ "Czech Republic": "cz",
49
+ "Hungary": "hu",
50
+ "Greece": "gr",
51
+ "Portugal": "pt",
52
+ "Argentina": "ar",
53
+ "Chile": "cl",
54
+ "Colombia": "co",
55
+ "Peru": "pe",
56
+ "Venezuela": "ve",
57
+ "Thailand": "th",
58
+ "Malaysia": "my",
59
+ "Indonesia": "id",
60
+ "Philippines": "ph",
61
+ "Vietnam": "vn",
62
+ "Pakistan": "pk",
63
+ "Bangladesh": "bd"
64
+ }
65
+
66
  MAJOR_COUNTRIES = [
67
  "United States", "United Kingdom", "Canada", "Australia", "Germany",
68
  "France", "Japan", "South Korea", "China", "India",
 
87
  "data": {
88
  "q": query,
89
  "domain": "google.com",
90
+ "loc": COUNTRY_CODE_MAPPING.get(country, "us"),
91
  "lang": "en",
92
  "device": "desktop",
93
  "serp_type": "news",
 
177
  # LLM ์„ค์ •
178
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
179
 
180
+ def summarize_article(title, snippet):
181
  try:
182
+ # ๊ธฐ์‚ฌ ์ œ๋ชฉ๊ณผ ์Šค๋‹ˆํŽซ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์š”์•ฝ ์ƒ์„ฑ
183
+ prompt = f"๋‹ค์Œ ๋‰ด์Šค ์ œ๋ชฉ๊ณผ ์š”์•ฝ์„ ๋ฐ”ํƒ•์œผ๋กœ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n์ œ๋ชฉ: {title}\n์š”์•ฝ: {snippet}"
 
 
 
 
 
 
 
 
 
184
  summary = hf_client.text_generation(prompt, max_new_tokens=500)
185
  return summary
186
  except Exception as e:
 
190
  footer {
191
  visibility: hidden;
192
  }
193
+ /* ๋ถ„์„ ๋ฒ„ํŠผ ์Šคํƒ€์ผ ๊ฐœ์„  */
194
+ .analyze-button {
195
+ background-color: #4CAF50; /* Green */
196
+ border: none;
197
+ color: white;
198
+ padding: 6px 12px;
199
+ text-align: center;
200
+ text-decoration: none;
201
+ font-size: 14px;
202
+ margin: 2px;
203
+ cursor: pointer;
204
+ border-radius: 4px;
205
+ }
206
+ .analyze-button:hover {
207
+ background-color: #45a049;
208
+ }
209
  """
210
 
211
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
212
+ with gr.Blocks(css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
213
  gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
214
 
215
  with gr.Column():
 
227
  return f"<p>{error_message}</p>", gr.update(visible=False)
228
  else:
229
  # ๊ธฐ์‚ฌ ๋ชฉ๋ก์„ HTML ํ…Œ์ด๋ธ”๋กœ ์ƒ์„ฑ
230
+ table_html = """
231
+ <table border='1' style='width:100%; text-align:left;'>
232
+ <tr>
233
+ <th>๋ฒˆํ˜ธ</th>
234
+ <th>์ œ๋ชฉ</th>
235
+ <th>์ถœ์ฒ˜</th>
236
+ <th>์‹œ๊ฐ„</th>
237
+ <th>๋ถ„์„</th>
238
+ </tr>
239
+ """
240
  for article in articles:
241
  # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ๋ฒ„ํŠผ์— ํ•ด๋‹นํ•˜๋Š” JavaScript ์ฝ”๋“œ๋ฅผ ์‚ฝ์ž…
242
+ analyze_button = f"""<button class="analyze-button" onclick="analyzeArticle('{article['index']}')">๋ถ„์„</button>"""
243
  row = f"""
244
  <tr>
245
  <td>{article['index']}</td>
 
255
  # JavaScript ํ•จ์ˆ˜ ์ •์˜
256
  js_code = """
257
  <script>
258
+ function analyzeArticle(index) {
259
+ // Gradio์˜ handleFunction์„ ์‚ฌ์šฉํ•˜์—ฌ Python ํ•จ์ˆ˜ ํ˜ธ์ถœ
260
+ const articleData = JSON.parse(document.getElementById('articles_data').textContent);
261
+ const selectedArticle = articleData.find(article => article.index == index);
262
+ if (selectedArticle) {
263
+ gradioApp().querySelector('#article_title textarea').value = selectedArticle.title;
264
+ gradioApp().querySelector('#article_snippet textarea').value = selectedArticle.snippet;
265
+ gradioApp().querySelector('#analyze_button').click();
266
+ }
267
  }
268
  </script>
269
  """
270
 
271
+ # ๊ธฐ์‚ฌ ๋ฐ์ดํ„ฐ๋ฅผ JSON์œผ๋กœ ์ €์žฅํ•˜์—ฌ JavaScript์—์„œ ์ ‘๊ทผ ๊ฐ€๋Šฅํ•˜๋„๋ก ํ•จ
272
+ articles_json = json.dumps(articles)
273
+
274
+ full_html = f"""
275
+ <div id="articles_data" style="display:none;">{articles_json}</div>
276
+ {table_html}
277
+ {js_code}
278
+ """
279
 
280
  return full_html, gr.update(visible=True, value="") # summary_output ์ดˆ๊ธฐํ™”
281
 
282
+ def analyze_article(title, snippet):
283
+ summary = summarize_article(title, snippet)
284
  return summary
285
 
286
+ article_title = gr.Textbox(visible=False, elem_id="article_title")
287
+ article_snippet = gr.Textbox(visible=False, elem_id="article_snippet")
288
  analyze_button = gr.Button("๋ถ„์„", visible=False, elem_id="analyze_button")
289
 
290
  search_button.click(
 
295
 
296
  analyze_button.click(
297
  analyze_article,
298
+ inputs=[article_title, article_snippet],
299
  outputs=[summary_output]
300
  )
301