Spaces:
Building
Building
Update app.py
Browse files
app.py
CHANGED
@@ -26,22 +26,15 @@ KOREAN_COMPANIES = [
|
|
26 |
"INTEL",
|
27 |
"SAMSUNG",
|
28 |
"HYNIX",
|
29 |
-
"BITCOIN",
|
30 |
"crypto",
|
31 |
"stock",
|
32 |
"Economics",
|
33 |
"Finance",
|
34 |
-
"investing"
|
35 |
]
|
36 |
|
37 |
-
######################################################################
|
38 |
-
# κ³΅ν΅ ν¨μ: μκ° λ³ν
|
39 |
-
######################################################################
|
40 |
def convert_to_seoul_time(timestamp_str):
|
41 |
-
"""
|
42 |
-
μ£Όμ΄μ§ 'YYYY-MM-DD HH:MM:SS' ννμ μκ°(UTC κΈ°μ€ λ±)μ
|
43 |
-
'YYYY-MM-DD HH:MM:SS KST' ννλ‘ λ³ννμ¬ λ°ν.
|
44 |
-
"""
|
45 |
try:
|
46 |
dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
47 |
seoul_tz = pytz.timezone('Asia/Seoul')
|
@@ -51,23 +44,18 @@ def convert_to_seoul_time(timestamp_str):
|
|
51 |
print(f"μκ° λ³ν μ€λ₯: {str(e)}")
|
52 |
return timestamp_str
|
53 |
|
54 |
-
######################################################################
|
55 |
-
# κ³΅ν΅ ν¨μ: κ°μ± λΆμ
|
56 |
-
######################################################################
|
57 |
def analyze_sentiment_batch(articles, client):
|
58 |
"""
|
59 |
-
OpenAI APIλ₯Ό
|
60 |
-
- κ²°κ³Όλ₯Ό νκ΅μ΄λ‘ μμ±νλλ‘ ν둬ννΈ λ΄μ λͺ
μ.
|
61 |
"""
|
62 |
try:
|
63 |
-
#
|
64 |
combined_text = "\n\n".join([
|
65 |
f"μ λͺ©: {article.get('title', '')}\nλ΄μ©: {article.get('snippet', '')}"
|
66 |
for article in articles
|
67 |
])
|
68 |
|
69 |
-
|
70 |
-
prompt = f"""λ€μ λ΄μ€ λͺ¨μμ λν΄ μ λ°μ μΈ κ°μ± λΆμμ μννμΈμ. (νκ΅μ΄λ‘ μμ±νμΈμ)
|
71 |
|
72 |
λ΄μ€ λ΄μ©:
|
73 |
{combined_text}
|
@@ -94,14 +82,9 @@ def analyze_sentiment_batch(articles, client):
|
|
94 |
except Exception as e:
|
95 |
return f"κ°μ± λΆμ μ€ν¨: {str(e)}"
|
96 |
|
97 |
-
|
98 |
-
# DB μ΄κΈ°ν
|
99 |
-
######################################################################
|
100 |
def init_db():
|
101 |
-
"""
|
102 |
-
SQLite DB νμΌ(search_results.db)μ΄ μμΌλ©΄ μμ±,
|
103 |
-
'searches' ν
μ΄λΈμ΄ μμΌλ©΄ μμ±
|
104 |
-
"""
|
105 |
db_path = pathlib.Path("search_results.db")
|
106 |
conn = sqlite3.connect(db_path)
|
107 |
c = conn.cursor()
|
@@ -116,17 +99,16 @@ def init_db():
|
|
116 |
|
117 |
def save_to_db(keyword, country, results):
|
118 |
"""
|
119 |
-
(keyword, country)
|
120 |
"""
|
121 |
conn = sqlite3.connect("search_results.db")
|
122 |
c = conn.cursor()
|
123 |
-
|
124 |
seoul_tz = pytz.timezone('Asia/Seoul')
|
125 |
now = datetime.now(seoul_tz)
|
126 |
timestamp = now.strftime('%Y-%m-%d %H:%M:%S')
|
127 |
|
128 |
-
c.execute("""INSERT INTO searches
|
129 |
-
(keyword, country, results, timestamp)
|
130 |
VALUES (?, ?, ?, ?)""",
|
131 |
(keyword, country, json.dumps(results), timestamp))
|
132 |
conn.commit()
|
@@ -134,324 +116,128 @@ def save_to_db(keyword, country, results):
|
|
134 |
|
135 |
def load_from_db(keyword, country):
|
136 |
"""
|
137 |
-
|
138 |
-
- μ±κ³΅μ (json.loads(...)λ results, KST μκ°)
|
139 |
-
- μ€ν¨μ (None, None)
|
140 |
"""
|
141 |
conn = sqlite3.connect("search_results.db")
|
142 |
c = conn.cursor()
|
143 |
-
c.execute("
|
144 |
-
FROM searches
|
145 |
-
WHERE keyword=? AND country=?
|
146 |
-
ORDER BY timestamp DESC
|
147 |
-
LIMIT 1""",
|
148 |
(keyword, country))
|
149 |
-
|
150 |
conn.close()
|
151 |
-
if
|
152 |
-
return json.loads(
|
153 |
return None, None
|
154 |
|
155 |
-
######################################################################
|
156 |
-
# SerpHouse API (κ²μ ν¨μλ€)
|
157 |
-
######################################################################
|
158 |
-
API_KEY = os.getenv("SERPHOUSE_API_KEY")
|
159 |
-
|
160 |
-
def is_english(text):
|
161 |
-
"""
|
162 |
-
ν
μ€νΈκ° μ λΆ ASCII λ²μλ©΄ True, μλλ©΄ False
|
163 |
-
"""
|
164 |
-
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
|
165 |
-
|
166 |
-
@lru_cache(maxsize=100)
|
167 |
-
def translate_query(query, country):
|
168 |
-
"""
|
169 |
-
queryλ₯Ό ν΄λΉ country μΈμ΄λ‘ λ²μ
|
170 |
-
"""
|
171 |
-
try:
|
172 |
-
# μ΄λ―Έ μμ΄λ©΄ κ·Έλ₯ λ°ν
|
173 |
-
if is_english(query):
|
174 |
-
return query
|
175 |
-
|
176 |
-
if country in COUNTRY_LANGUAGES:
|
177 |
-
target_lang = COUNTRY_LANGUAGES[country]
|
178 |
-
|
179 |
-
url = "https://translate.googleapis.com/translate_a/single"
|
180 |
-
params = {
|
181 |
-
"client": "gtx",
|
182 |
-
"sl": "auto",
|
183 |
-
"tl": target_lang,
|
184 |
-
"dt": "t",
|
185 |
-
"q": query
|
186 |
-
}
|
187 |
-
|
188 |
-
session = requests.Session()
|
189 |
-
retries = Retry(total=3, backoff_factor=0.5)
|
190 |
-
session.mount('https://', HTTPAdapter(max_retries=retries))
|
191 |
-
|
192 |
-
resp = session.get(url, params=params, timeout=(5, 10))
|
193 |
-
translated_text = resp.json()[0][0][0]
|
194 |
-
return translated_text
|
195 |
-
|
196 |
-
return query
|
197 |
-
except Exception as e:
|
198 |
-
print(f"λ²μ μ€λ₯: {str(e)}")
|
199 |
-
return query
|
200 |
-
|
201 |
-
@lru_cache(maxsize=200)
|
202 |
-
def translate_to_korean(text):
|
203 |
-
"""
|
204 |
-
snippet λ±μ νκΈλ‘ λ²μνκΈ° μν ν¨μ
|
205 |
-
"""
|
206 |
-
try:
|
207 |
-
url = "https://translate.googleapis.com/translate_a/single"
|
208 |
-
params = {
|
209 |
-
"client": "gtx",
|
210 |
-
"sl": "auto",
|
211 |
-
"tl": "ko",
|
212 |
-
"dt": "t",
|
213 |
-
"q": text
|
214 |
-
}
|
215 |
-
|
216 |
-
session = requests.Session()
|
217 |
-
retries = Retry(total=3, backoff_factor=0.5)
|
218 |
-
session.mount('https://', HTTPAdapter(max_retries=retries))
|
219 |
-
|
220 |
-
response = session.get(url, params=params, timeout=(5, 10))
|
221 |
-
translated_text = response.json()[0][0][0]
|
222 |
-
return translated_text
|
223 |
-
except Exception as e:
|
224 |
-
print(f"νκΈ λ²μ μ€λ₯: {str(e)}")
|
225 |
-
return text
|
226 |
-
|
227 |
-
def search_serphouse(query, country, page=1, num_result=10):
|
228 |
-
"""
|
229 |
-
SerpHouse API μ€μκ° κ²μ -> 'news' (sort_by=date)
|
230 |
-
"""
|
231 |
-
url = "https://api.serphouse.com/serp/live"
|
232 |
-
|
233 |
-
now = datetime.utcnow()
|
234 |
-
yesterday = now - timedelta(days=1)
|
235 |
-
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
|
236 |
-
|
237 |
-
translated_query = translate_query(query, country)
|
238 |
-
|
239 |
-
payload = {
|
240 |
-
"data": {
|
241 |
-
"q": translated_query,
|
242 |
-
"domain": "google.com",
|
243 |
-
"loc": COUNTRY_LOCATIONS.get(country, "United States"),
|
244 |
-
"lang": COUNTRY_LANGUAGES.get(country, "en"),
|
245 |
-
"device": "desktop",
|
246 |
-
"serp_type": "news",
|
247 |
-
"page": str(page),
|
248 |
-
"num": "100",
|
249 |
-
"date_range": date_range,
|
250 |
-
"sort_by": "date"
|
251 |
-
}
|
252 |
-
}
|
253 |
-
|
254 |
-
headers = {
|
255 |
-
"accept": "application/json",
|
256 |
-
"content-type": "application/json",
|
257 |
-
"authorization": f"Bearer {API_KEY}"
|
258 |
-
}
|
259 |
-
|
260 |
-
try:
|
261 |
-
session = requests.Session()
|
262 |
-
retries = Retry(
|
263 |
-
total=5,
|
264 |
-
backoff_factor=1,
|
265 |
-
status_forcelist=[429, 500, 502, 503, 504],
|
266 |
-
allowed_methods=["POST"]
|
267 |
-
)
|
268 |
-
adapter = HTTPAdapter(max_retries=retries)
|
269 |
-
session.mount('http://', adapter)
|
270 |
-
session.mount('https://', adapter)
|
271 |
-
|
272 |
-
resp = session.post(url, json=payload, headers=headers, timeout=(30, 30))
|
273 |
-
resp.raise_for_status()
|
274 |
-
|
275 |
-
# μλ΅ JSON
|
276 |
-
return {
|
277 |
-
"results": resp.json(),
|
278 |
-
"translated_query": translated_query
|
279 |
-
}
|
280 |
-
except requests.exceptions.Timeout:
|
281 |
-
return {
|
282 |
-
"error": "κ²μ μκ°μ΄ μ΄κ³Όλμμ΅λλ€. μ μ ν λ€μ μλν΄μ£ΌμΈμ.",
|
283 |
-
"translated_query": query
|
284 |
-
}
|
285 |
-
except requests.exceptions.RequestException as e:
|
286 |
-
return {
|
287 |
-
"error": f"κ²μ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}",
|
288 |
-
"translated_query": query
|
289 |
-
}
|
290 |
-
except Exception as e:
|
291 |
-
return {
|
292 |
-
"error": f"μκΈ°μΉ μμ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}",
|
293 |
-
"translated_query": query
|
294 |
-
}
|
295 |
-
|
296 |
-
def format_results_from_raw(response_data):
|
297 |
-
"""
|
298 |
-
SerpHouse API μλ΅μ (error_message, articles_list) ννλ‘ κ°κ³΅
|
299 |
-
- νκ΅ λλ©μΈ(kr, korea, etc) μ μΈ
|
300 |
-
- emptyμ "κ²μ κ²°κ³Όκ° μμ΅λλ€."
|
301 |
-
"""
|
302 |
-
if "error" in response_data:
|
303 |
-
return "Error: " + response_data["error"], []
|
304 |
-
|
305 |
-
try:
|
306 |
-
results = response_data["results"]
|
307 |
-
translated_query = response_data["translated_query"]
|
308 |
-
|
309 |
-
# μ€μ λ΄μ€ ν κ²°κ³Ό
|
310 |
-
news_results = results.get('results', {}).get('results', {}).get('news', [])
|
311 |
-
if not news_results:
|
312 |
-
return "κ²μ κ²°κ³Όκ° μμ΅λλ€.", []
|
313 |
-
|
314 |
-
# νκ΅μ΄ μ μΈ
|
315 |
-
korean_domains = [
|
316 |
-
'.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
|
317 |
-
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
|
318 |
-
]
|
319 |
-
korean_keywords = [
|
320 |
-
'korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
|
321 |
-
'gwangju', 'daejeon', 'ulsan', 'sejong'
|
322 |
-
]
|
323 |
-
|
324 |
-
filtered_articles = []
|
325 |
-
for idx, result in enumerate(news_results, 1):
|
326 |
-
url = result.get("url", result.get("link", "")).lower()
|
327 |
-
title = result.get("title", "").lower()
|
328 |
-
channel = result.get("channel", result.get("source", "")).lower()
|
329 |
-
|
330 |
-
is_korean_content = (
|
331 |
-
any(domain in url or domain in channel for domain in korean_domains)
|
332 |
-
or any(keyword in title for keyword in korean_keywords)
|
333 |
-
)
|
334 |
-
if not is_korean_content:
|
335 |
-
filtered_articles.append({
|
336 |
-
"index": idx,
|
337 |
-
"title": result.get("title", "μ λͺ© μμ"),
|
338 |
-
"link": url,
|
339 |
-
"snippet": result.get("snippet", "λ΄μ© μμ"),
|
340 |
-
"channel": result.get("channel", result.get("source", "μ μ μμ")),
|
341 |
-
"time": result.get("time", result.get("date", "μ μ μλ μκ°")),
|
342 |
-
"image_url": result.get("img", result.get("thumbnail", "")),
|
343 |
-
"translated_query": translated_query
|
344 |
-
})
|
345 |
-
|
346 |
-
return "", filtered_articles
|
347 |
-
except Exception as e:
|
348 |
-
return f"κ²°κ³Ό μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}", []
|
349 |
-
|
350 |
-
def serphouse_search(query, country):
|
351 |
-
"""
|
352 |
-
μ 체 νμ΄νλΌμΈ (search_serphouse -> format_results_from_raw)
|
353 |
-
λ°ν: (error_message, articles_list)
|
354 |
-
"""
|
355 |
-
response_data = search_serphouse(query, country)
|
356 |
-
return format_results_from_raw(response_data)
|
357 |
-
|
358 |
-
######################################################################
|
359 |
-
# λ΄μ€ κΈ°μ¬ λͺ©λ‘ -> Markdown
|
360 |
-
######################################################################
|
361 |
def display_results(articles):
|
362 |
"""
|
363 |
-
κΈ°μ¬ λͺ©λ‘μ Markdown λ¬Έμμ΄λ‘
|
364 |
-
- snippet(μλ¬Έ) + νκΈ λ²μ(snippet)μ ν¨κ» νμ
|
365 |
"""
|
366 |
output = ""
|
367 |
for idx, article in enumerate(articles, 1):
|
368 |
-
# snippetμ νκ΅μ΄λ‘ λ²μ
|
369 |
-
korean_snippet = translate_to_korean(article['snippet'])
|
370 |
-
|
371 |
output += f"### {idx}. {article['title']}\n"
|
372 |
output += f"μΆμ²: {article['channel']}\n"
|
373 |
output += f"μκ°: {article['time']}\n"
|
374 |
output += f"λ§ν¬: {article['link']}\n"
|
375 |
-
output += f"
|
376 |
-
output += f"μμ½(νκ΅μ΄): {korean_snippet}\n\n"
|
377 |
return output
|
378 |
|
379 |
-
######################################################################
|
380 |
-
# νκ΅ κΈ°μ
λͺ©λ‘ (μ΄λ―Έ μ μΈλ¨)
|
381 |
-
######################################################################
|
382 |
|
383 |
-
|
384 |
-
#
|
385 |
-
|
386 |
def search_company(company):
|
387 |
"""
|
388 |
-
|
|
|
|
|
389 |
"""
|
390 |
error_message, articles = serphouse_search(company, "United States")
|
391 |
if not error_message and articles:
|
|
|
392 |
analysis = analyze_sentiment_batch(articles, client)
|
393 |
-
|
|
|
|
|
394 |
"articles": articles,
|
395 |
"analysis": analysis
|
396 |
}
|
397 |
-
save_to_db(company, "United States",
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
|
|
407 |
def load_company(company):
|
408 |
"""
|
409 |
-
DBμμ (
|
|
|
410 |
"""
|
411 |
-
|
412 |
-
if
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
|
|
|
|
419 |
return f"{company}μ λν μ μ₯λ κ²°κ³Όκ° μμ΅λλ€."
|
420 |
|
421 |
-
|
422 |
-
|
423 |
-
|
|
|
424 |
def show_stats():
|
425 |
"""
|
426 |
-
KOREAN_COMPANIES λ΄ λͺ¨λ
|
|
|
|
|
|
|
|
|
|
|
|
|
427 |
"""
|
428 |
conn = sqlite3.connect("search_results.db")
|
429 |
c = conn.cursor()
|
430 |
-
|
431 |
-
output = "## EarnBOT λΆμ 리ν¬νΈ\n\n"
|
432 |
-
|
433 |
-
# DBμμ
|
434 |
data_list = []
|
435 |
-
for
|
436 |
c.execute("""
|
437 |
-
SELECT results, timestamp
|
438 |
-
FROM searches
|
439 |
-
WHERE keyword
|
440 |
-
ORDER BY timestamp DESC
|
441 |
LIMIT 1
|
442 |
-
""", (
|
|
|
443 |
row = c.fetchone()
|
444 |
if row:
|
445 |
-
results_json,
|
446 |
-
data_list.append((
|
|
|
447 |
conn.close()
|
448 |
-
|
|
|
449 |
def analyze_data(item):
|
450 |
-
comp, tstamp,
|
451 |
-
|
452 |
-
articles =
|
453 |
-
analysis =
|
|
|
454 |
count_articles = len(articles)
|
|
|
|
|
|
|
|
|
455 |
return (comp, tstamp, count_articles, analysis)
|
456 |
|
457 |
results_list = []
|
@@ -459,80 +245,87 @@ def show_stats():
|
|
459 |
futures = [executor.submit(analyze_data, dl) for dl in data_list]
|
460 |
for future in as_completed(futures):
|
461 |
results_list.append(future.result())
|
462 |
-
|
463 |
-
|
464 |
-
|
|
|
465 |
output += f"### {comp}\n"
|
466 |
-
output += f"- λ§μ§λ§ μ
λ°μ΄νΈ: {
|
467 |
-
output += f"- μ μ₯λ κΈ°μ¬ μ: {
|
468 |
if analysis:
|
469 |
output += "#### λ΄μ€ κ°μ± λΆμ\n"
|
470 |
output += f"{analysis}\n\n"
|
471 |
output += "---\n\n"
|
472 |
-
|
473 |
return output
|
474 |
|
475 |
-
|
476 |
-
# μ 체 κ²μ+μΆλ ₯+λΆμ μ’
ν©
|
477 |
-
######################################################################
|
478 |
def search_all_companies():
|
479 |
"""
|
480 |
-
|
|
|
481 |
"""
|
482 |
-
|
483 |
-
|
484 |
def do_search(comp):
|
485 |
return comp, search_company(comp)
|
486 |
-
|
487 |
with ThreadPoolExecutor(max_workers=5) as executor:
|
488 |
futures = [executor.submit(do_search, c) for c in KOREAN_COMPANIES]
|
489 |
for future in as_completed(futures):
|
490 |
-
comp,
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
return
|
495 |
|
496 |
def load_all_companies():
|
497 |
"""
|
498 |
-
λͺ¨λ κΈ°μ
DB
|
|
|
499 |
"""
|
500 |
-
|
|
|
501 |
for comp in KOREAN_COMPANIES:
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
return
|
506 |
|
507 |
def full_summary_report():
|
508 |
"""
|
509 |
-
1)
|
510 |
-
|
511 |
-
3) κ°μ± λΆμ ν΅κ³
|
512 |
"""
|
513 |
-
|
514 |
-
|
|
|
|
|
|
|
|
|
|
|
515 |
stats_text = show_stats()
|
516 |
-
|
517 |
-
|
518 |
"# μ 체 λΆμ λ³΄κ³ μμ½\n\n"
|
519 |
"μλ μμλ‘ μ€νλμμ΅λλ€:\n"
|
520 |
"1. λͺ¨λ μ’
λͺ© κ²μ(λ³λ ¬) + λΆμ => 2. λͺ¨λ μ’
λͺ© DB κ²°κ³Ό μΆλ ₯ => 3. μ 체 κ°μ± λΆμ ν΅κ³\n\n"
|
521 |
-
f"{
|
522 |
-
f"{
|
523 |
"## [μ 체 κ°μ± λΆμ ν΅κ³]\n\n"
|
524 |
f"{stats_text}"
|
525 |
)
|
526 |
-
return
|
|
|
527 |
|
528 |
-
|
529 |
-
# μ¬μ©μ μμ κ²μ
|
530 |
-
|
531 |
def search_custom(query, country):
|
532 |
"""
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
"""
|
537 |
error_message, articles = serphouse_search(query, country)
|
538 |
if error_message:
|
@@ -540,31 +333,39 @@ def search_custom(query, country):
|
|
540 |
if not articles:
|
541 |
return "κ²μ κ²°κ³Όκ° μμ΅λλ€."
|
542 |
|
|
|
543 |
analysis = analyze_sentiment_batch(articles, client)
|
544 |
-
|
|
|
|
|
545 |
"articles": articles,
|
546 |
"analysis": analysis
|
547 |
}
|
548 |
-
save_to_db(query, country,
|
549 |
-
|
550 |
-
|
551 |
-
|
|
|
552 |
return "DBμμ λ‘λ μ€ν¨"
|
553 |
|
554 |
-
|
555 |
-
analy = loaded.get("analysis", "")
|
556 |
-
|
557 |
out = f"## [μ¬μ©μ μμ κ²μ κ²°κ³Ό]\n\n"
|
558 |
out += f"**ν€μλ**: {query}\n\n"
|
559 |
out += f"**κ΅κ°**: {country}\n\n"
|
560 |
-
out += f"**μ μ₯ μκ°**: {
|
|
|
|
|
|
|
|
|
561 |
out += display_results(arts)
|
562 |
out += f"### λ΄μ€ κ°μ± λΆμ\n{analy}\n"
|
|
|
563 |
return out
|
564 |
|
565 |
-
|
566 |
-
|
567 |
-
|
|
|
568 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
569 |
if not ACCESS_TOKEN:
|
570 |
raise ValueError("HF_TOKEN environment variable is not set")
|
@@ -574,14 +375,17 @@ client = OpenAI(
|
|
574 |
api_key=ACCESS_TOKEN,
|
575 |
)
|
576 |
|
577 |
-
|
578 |
-
|
579 |
-
|
|
|
|
|
|
|
580 |
COUNTRY_LANGUAGES = {
|
581 |
"United States": "en",
|
582 |
"KOREA": "ko",
|
583 |
"United Kingdom": "en",
|
584 |
-
"Taiwan": "zh-TW",
|
585 |
"Canada": "en",
|
586 |
"Australia": "en",
|
587 |
"Germany": "de",
|
@@ -719,11 +523,190 @@ COUNTRY_LOCATIONS = {
|
|
719 |
"Iceland": "Iceland"
|
720 |
}
|
721 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
722 |
css = """
|
723 |
/* μ μ μ€νμΌ */
|
724 |
footer {visibility: hidden;}
|
725 |
|
726 |
-
/* λ μ΄μμ
|
727 |
#status_area {
|
728 |
background: rgba(255, 255, 255, 0.9);
|
729 |
padding: 15px;
|
@@ -737,6 +720,7 @@ footer {visibility: hidden;}
|
|
737 |
margin-top: 10px;
|
738 |
}
|
739 |
|
|
|
740 |
.tabs {
|
741 |
border-bottom: 2px solid #ddd !important;
|
742 |
margin-bottom: 20px !important;
|
@@ -764,6 +748,7 @@ footer {visibility: hidden;}
|
|
764 |
padding: 10px 0;
|
765 |
}
|
766 |
|
|
|
767 |
.group {
|
768 |
border: 1px solid #eee;
|
769 |
padding: 15px;
|
@@ -778,13 +763,123 @@ footer {visibility: hidden;}
|
|
778 |
border: none !important;
|
779 |
}
|
780 |
|
781 |
-
/*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
782 |
"""
|
783 |
|
784 |
import gradio as gr
|
785 |
|
786 |
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μλΉμ€") as iface:
|
787 |
-
# DB μ΄κΈ°ν
|
788 |
init_db()
|
789 |
|
790 |
with gr.Tabs():
|
@@ -793,7 +888,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μλΉ
|
|
793 |
gr.Markdown("## EarnBot: κΈλ‘λ² λΉ
ν
ν¬ κΈ°μ
λ° ν¬μ μ λ§ AI μλ λΆμ")
|
794 |
gr.Markdown(" * 'μ 체 λΆμ λ³΄κ³ μμ½' ν΄λ¦ μ μ 체 μλ λ³΄κ³ μμ±.\n * μλ κ°λ³ μ’
λͺ©μ 'κ²μ(DB μλ μ μ₯)'κ³Ό 'μΆλ ₯(DB μλ νΈμΆ)'λ κ°λ₯.\n * μΆκ°λ‘, μνλ μμ ν€μλ λ° κ΅κ°λ‘ κ²μ/λΆμν μλ μμ΅λλ€.")
|
795 |
|
796 |
-
# μ¬μ©μ μμ κ²μ μΉμ
|
797 |
with gr.Group():
|
798 |
gr.Markdown("### μ¬μ©μ μμ κ²μ")
|
799 |
with gr.Row():
|
@@ -813,29 +908,27 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μλΉ
|
|
813 |
|
814 |
custom_search_output = gr.Markdown()
|
815 |
|
816 |
-
# μμ κ²μ λ²νΌ ν΄λ¦
|
817 |
custom_search_btn.click(
|
818 |
fn=search_custom,
|
819 |
inputs=[user_input, country_selection],
|
820 |
outputs=custom_search_output
|
821 |
)
|
822 |
|
823 |
-
# μ 체 λΆμ λ³΄κ³ λ²νΌ
|
824 |
with gr.Row():
|
825 |
full_report_btn = gr.Button("μ 체 λΆμ λ³΄κ³ μμ½", variant="primary")
|
826 |
full_report_display = gr.Markdown()
|
827 |
|
828 |
-
# μ 체 λ³΄κ³ -> full_summary_report
|
829 |
full_report_btn.click(
|
830 |
fn=full_summary_report,
|
831 |
outputs=full_report_display
|
832 |
)
|
833 |
|
834 |
-
# μ§μ λ
|
835 |
with gr.Column():
|
836 |
for i in range(0, len(KOREAN_COMPANIES), 2):
|
837 |
with gr.Row():
|
838 |
-
# μΌμͺ½
|
839 |
with gr.Column():
|
840 |
company = KOREAN_COMPANIES[i]
|
841 |
with gr.Group():
|
@@ -845,18 +938,16 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μλΉ
|
|
845 |
load_btn = gr.Button("μΆλ ₯", variant="secondary")
|
846 |
result_display = gr.Markdown()
|
847 |
|
848 |
-
# κ²μ
|
849 |
search_btn.click(
|
850 |
fn=lambda c=company: search_company(c),
|
851 |
outputs=result_display
|
852 |
)
|
853 |
-
# μΆλ ₯
|
854 |
load_btn.click(
|
855 |
fn=lambda c=company: load_company(c),
|
856 |
outputs=result_display
|
857 |
)
|
858 |
|
859 |
-
# μ€λ₯Έμͺ½
|
860 |
if i + 1 < len(KOREAN_COMPANIES):
|
861 |
with gr.Column():
|
862 |
company = KOREAN_COMPANIES[i + 1]
|
|
|
26 |
"INTEL",
|
27 |
"SAMSUNG",
|
28 |
"HYNIX",
|
29 |
+
"BITCOIN",
|
30 |
"crypto",
|
31 |
"stock",
|
32 |
"Economics",
|
33 |
"Finance",
|
34 |
+
"investing"
|
35 |
]
|
36 |
|
|
|
|
|
|
|
37 |
def convert_to_seoul_time(timestamp_str):
|
|
|
|
|
|
|
|
|
38 |
try:
|
39 |
dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
40 |
seoul_tz = pytz.timezone('Asia/Seoul')
|
|
|
44 |
print(f"μκ° λ³ν μ€λ₯: {str(e)}")
|
45 |
return timestamp_str
|
46 |
|
|
|
|
|
|
|
47 |
def analyze_sentiment_batch(articles, client):
|
48 |
"""
|
49 |
+
OpenAI APIλ₯Ό ν΅ν΄ λ΄μ€ κΈ°μ¬λ€μ μ’
ν© κ°μ± λΆμμ μν
|
|
|
50 |
"""
|
51 |
try:
|
52 |
+
# λͺ¨λ κΈ°μ¬μ μ λͺ©κ³Ό λ΄μ©μ νλμ ν
μ€νΈλ‘ κ²°ν©
|
53 |
combined_text = "\n\n".join([
|
54 |
f"μ λͺ©: {article.get('title', '')}\nλ΄μ©: {article.get('snippet', '')}"
|
55 |
for article in articles
|
56 |
])
|
57 |
|
58 |
+
prompt = f"""λ€μ λ΄μ€ λͺ¨μμ λν΄ μ λ°μ μΈ κ°μ± λΆμμ μννμΈμ:
|
|
|
59 |
|
60 |
λ΄μ€ λ΄μ©:
|
61 |
{combined_text}
|
|
|
82 |
except Exception as e:
|
83 |
return f"κ°μ± λΆμ μ€ν¨: {str(e)}"
|
84 |
|
85 |
+
|
86 |
+
# DB μ΄κΈ°ν ν¨μ
|
|
|
87 |
def init_db():
|
|
|
|
|
|
|
|
|
88 |
db_path = pathlib.Path("search_results.db")
|
89 |
conn = sqlite3.connect(db_path)
|
90 |
c = conn.cursor()
|
|
|
99 |
|
100 |
def save_to_db(keyword, country, results):
|
101 |
"""
|
102 |
+
νΉμ (keyword, country) μ‘°ν©μ λν κ²μ κ²°κ³Όλ₯Ό DBμ μ μ₯
|
103 |
"""
|
104 |
conn = sqlite3.connect("search_results.db")
|
105 |
c = conn.cursor()
|
|
|
106 |
seoul_tz = pytz.timezone('Asia/Seoul')
|
107 |
now = datetime.now(seoul_tz)
|
108 |
timestamp = now.strftime('%Y-%m-%d %H:%M:%S')
|
109 |
|
110 |
+
c.execute("""INSERT INTO searches
|
111 |
+
(keyword, country, results, timestamp)
|
112 |
VALUES (?, ?, ?, ?)""",
|
113 |
(keyword, country, json.dumps(results), timestamp))
|
114 |
conn.commit()
|
|
|
116 |
|
117 |
def load_from_db(keyword, country):
|
118 |
"""
|
119 |
+
νΉμ (keyword, country) μ‘°ν©μ λν κ°μ₯ μ΅κ·Ό κ²μ κ²°κ³Όλ₯Ό DBμμ λΆλ¬μ€κΈ°
|
|
|
|
|
120 |
"""
|
121 |
conn = sqlite3.connect("search_results.db")
|
122 |
c = conn.cursor()
|
123 |
+
c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
|
|
|
|
|
|
|
|
|
124 |
(keyword, country))
|
125 |
+
result = c.fetchone()
|
126 |
conn.close()
|
127 |
+
if result:
|
128 |
+
return json.loads(result[0]), convert_to_seoul_time(result[1])
|
129 |
return None, None
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
def display_results(articles):
|
132 |
"""
|
133 |
+
λ΄μ€ κΈ°μ¬ λͺ©λ‘μ Markdown λ¬Έμμ΄λ‘ λ³ννμ¬ λ°ν
|
|
|
134 |
"""
|
135 |
output = ""
|
136 |
for idx, article in enumerate(articles, 1):
|
|
|
|
|
|
|
137 |
output += f"### {idx}. {article['title']}\n"
|
138 |
output += f"μΆμ²: {article['channel']}\n"
|
139 |
output += f"μκ°: {article['time']}\n"
|
140 |
output += f"λ§ν¬: {article['link']}\n"
|
141 |
+
output += f"μμ½: {article['snippet']}\n\n"
|
|
|
142 |
return output
|
143 |
|
|
|
|
|
|
|
144 |
|
145 |
+
########################################
|
146 |
+
# 1) κ²μ μ => κΈ°μ¬ + λΆμ λμ μΆλ ₯, DB μ μ₯
|
147 |
+
########################################
|
148 |
def search_company(company):
|
149 |
"""
|
150 |
+
λ¨μΌ κΈ°μ
(λλ ν€μλ)μ λν΄ λ―Έκ΅ λ΄μ€ κ²μ ν,
|
151 |
+
1) κΈ°μ¬ λͺ©λ‘ + 2) κ°μ± λΆμ λ³΄κ³ λ₯Ό ν¨κ» μΆλ ₯
|
152 |
+
=> { "articles": [...], "analysis": ... } ννλ‘ DBμ μ μ₯
|
153 |
"""
|
154 |
error_message, articles = serphouse_search(company, "United States")
|
155 |
if not error_message and articles:
|
156 |
+
# κ°μ± λΆμ
|
157 |
analysis = analyze_sentiment_batch(articles, client)
|
158 |
+
|
159 |
+
# DB μ μ₯μ© λ°μ΄ν° ꡬμ±
|
160 |
+
store_dict = {
|
161 |
"articles": articles,
|
162 |
"analysis": analysis
|
163 |
}
|
164 |
+
save_to_db(company, "United States", store_dict)
|
165 |
+
|
166 |
+
# νλ©΄ μΆλ ₯μ©
|
167 |
+
output = display_results(articles)
|
168 |
+
output += f"\n\n### λΆμ λ³΄κ³ \n{analysis}\n"
|
169 |
+
return output
|
170 |
+
return f"{company}μ λν κ²μ κ²°κ³Όκ° μμ΅λλ€."
|
171 |
+
|
172 |
+
########################################
|
173 |
+
# 2) μΆλ ₯ μ => DBμ μ μ₯λ κΈ°μ¬ + λΆμ ν¨κ» μΆλ ₯
|
174 |
+
########################################
|
175 |
def load_company(company):
|
176 |
"""
|
177 |
+
DBμμ λ¨μΌ κΈ°μ
(λλ ν€μλ)μ λ―Έκ΅ λ΄μ€ κ²μ κ²°κ³Όλ₯Ό λΆλ¬μ
|
178 |
+
κΈ°μ¬ λͺ©λ‘ + λΆμ κ²°κ³Όλ₯Ό ν¨κ» μΆλ ₯
|
179 |
"""
|
180 |
+
data, timestamp = load_from_db(company, "United States")
|
181 |
+
if data:
|
182 |
+
# dataλ { "articles": [...], "analysis": "..."} νν
|
183 |
+
articles = data.get("articles", [])
|
184 |
+
analysis = data.get("analysis", "")
|
185 |
+
|
186 |
+
output = f"### {company} κ²μ κ²°κ³Ό\nμ μ₯ μκ°: {timestamp}\n\n"
|
187 |
+
output += display_results(articles)
|
188 |
+
output += f"\n\n### λΆμ λ³΄κ³ \n{analysis}\n"
|
189 |
+
return output
|
190 |
return f"{company}μ λν μ μ₯λ κ²°κ³Όκ° μμ΅λλ€."
|
191 |
|
192 |
+
|
193 |
+
########################################
|
194 |
+
# 3) κΈ°μ‘΄ show_stats()μμ 리ν¬νΈ μ λͺ© λ³κ²½
|
195 |
+
########################################
|
196 |
def show_stats():
|
197 |
"""
|
198 |
+
KOREAN_COMPANIES λͺ©λ‘ λ΄ λͺ¨λ κΈ°μ
μ λν΄:
|
199 |
+
- κ°μ₯ μ΅κ·Ό DB μ μ₯ μΌμ
|
200 |
+
- κΈ°μ¬ μ
|
201 |
+
- κ°μ± λΆμ κ²°κ³Ό
|
202 |
+
λ₯Ό λ³λ ¬μ²λ¦¬λ‘ μ‘°ννμ¬ λ³΄κ³ μ ννλ‘ λ°ν
|
203 |
+
|
204 |
+
(문ꡬ λ³κ²½) "νκ΅ κΈ°μ
λ΄μ€ λΆμ 리ν¬νΈ" -> "EarnBOT λΆμ 리ν¬νΈ"
|
205 |
"""
|
206 |
conn = sqlite3.connect("search_results.db")
|
207 |
c = conn.cursor()
|
208 |
+
|
209 |
+
output = "## EarnBOT λΆμ 리ν¬νΈ\n\n" # μ¬κΈ°μ 문ꡬ λ³κ²½
|
210 |
+
|
211 |
+
# λͺ¨λ κΈ°μ
μ λν΄ DBμμ μ½μ΄μ¬ (company, timestamp, articles) λͺ©λ‘ μμ§
|
212 |
data_list = []
|
213 |
+
for company in KOREAN_COMPANIES:
|
214 |
c.execute("""
|
215 |
+
SELECT results, timestamp
|
216 |
+
FROM searches
|
217 |
+
WHERE keyword = ?
|
218 |
+
ORDER BY timestamp DESC
|
219 |
LIMIT 1
|
220 |
+
""", (company,))
|
221 |
+
|
222 |
row = c.fetchone()
|
223 |
if row:
|
224 |
+
results_json, timestamp = row
|
225 |
+
data_list.append((company, timestamp, results_json))
|
226 |
+
|
227 |
conn.close()
|
228 |
+
|
229 |
+
# κ°μ± λΆμ λ³λ ¬ μ²λ¦¬ ν¨μ
|
230 |
def analyze_data(item):
|
231 |
+
comp, tstamp, results_json = item
|
232 |
+
data = json.loads(results_json)
|
233 |
+
articles = data.get("articles", [])
|
234 |
+
analysis = data.get("analysis", "")
|
235 |
+
|
236 |
count_articles = len(articles)
|
237 |
+
# μ¬κΈ°μλ μ΄λ―Έ DBμ "analysis"κ° λ€μ΄ μμΌλ―λ‘,
|
238 |
+
# κ΅³μ΄ μ¬λΆμν νμκ° μμΌλ©΄ κ·Έλλ‘ μ¬μ©
|
239 |
+
# (νμ μ μ¬λΆμ κ°λ₯)
|
240 |
+
|
241 |
return (comp, tstamp, count_articles, analysis)
|
242 |
|
243 |
results_list = []
|
|
|
245 |
futures = [executor.submit(analyze_data, dl) for dl in data_list]
|
246 |
for future in as_completed(futures):
|
247 |
results_list.append(future.result())
|
248 |
+
|
249 |
+
# κ²°κ³Ό μΆλ ₯
|
250 |
+
for comp, tstamp, count, analysis in results_list:
|
251 |
+
seoul_time = convert_to_seoul_time(tstamp)
|
252 |
output += f"### {comp}\n"
|
253 |
+
output += f"- λ§μ§λ§ μ
λ°μ΄νΈ: {seoul_time}\n"
|
254 |
+
output += f"- μ μ₯λ κΈ°μ¬ μ: {count}건\n\n"
|
255 |
if analysis:
|
256 |
output += "#### λ΄μ€ κ°μ± λΆμ\n"
|
257 |
output += f"{analysis}\n\n"
|
258 |
output += "---\n\n"
|
259 |
+
|
260 |
return output
|
261 |
|
262 |
+
|
|
|
|
|
263 |
def search_all_companies():
|
264 |
"""
|
265 |
+
KOREAN_COMPANIES 리μ€νΈ λ΄ λͺ¨λ κΈ°μ
κ²μ (λ©ν°μ€λ λ©) =>
|
266 |
+
=> λΆμ + DB μ μ₯ => κ²°κ³Ό Markdown λ°ν
|
267 |
"""
|
268 |
+
overall_result = "# [μ 체 κ²μ κ²°κ³Ό]\n\n"
|
269 |
+
|
270 |
def do_search(comp):
|
271 |
return comp, search_company(comp)
|
272 |
+
|
273 |
with ThreadPoolExecutor(max_workers=5) as executor:
|
274 |
futures = [executor.submit(do_search, c) for c in KOREAN_COMPANIES]
|
275 |
for future in as_completed(futures):
|
276 |
+
comp, res_text = future.result()
|
277 |
+
overall_result += f"## {comp}\n"
|
278 |
+
overall_result += res_text + "\n\n"
|
279 |
+
|
280 |
+
return overall_result
|
281 |
|
282 |
def load_all_companies():
|
283 |
"""
|
284 |
+
KOREAN_COMPANIES 리μ€νΈ λ΄ λͺ¨λ κΈ°μ
DB λΆλ¬μ€κΈ° =>
|
285 |
+
κΈ°μ¬ λͺ©λ‘ + λΆμ λ³΄κ³ => κ²°κ³Ό Markdown
|
286 |
"""
|
287 |
+
overall_result = "# [μ 체 μΆλ ₯ κ²°κ³Ό]\n\n"
|
288 |
+
|
289 |
for comp in KOREAN_COMPANIES:
|
290 |
+
overall_result += f"## {comp}\n"
|
291 |
+
overall_result += load_company(comp)
|
292 |
+
overall_result += "\n"
|
293 |
+
return overall_result
|
294 |
|
295 |
def full_summary_report():
|
296 |
"""
|
297 |
+
(1) λͺ¨λ κΈ°μ
κ²μ(λ³λ ¬) -> (2) DBμμ λΆλ¬μ€κΈ° -> (3) κ°μ± λΆμ ν΅κ³
|
298 |
+
μμλλ‘ μ€ννμ¬, μ 체 리ν¬νΈλ₯Ό ν©μ³ λ°ν
|
|
|
299 |
"""
|
300 |
+
# 1) μ 체 κ²μ(λ³λ ¬) => κΈ°μ¬ + λΆμ DB μ μ₯
|
301 |
+
search_result_text = search_all_companies()
|
302 |
+
|
303 |
+
# 2) μ 체 μΆλ ₯ => DBμ μ μ₯λ κΈ°μ¬ + λΆμ κ²°κ³Ό
|
304 |
+
load_result_text = load_all_companies()
|
305 |
+
|
306 |
+
# 3) μ 체 ν΅κ³(κ°μ± λΆμ) - 리ν¬νΈ μ λͺ© λ³κ²½λ¨(EarnBOT λΆμ 리ν¬νΈ)
|
307 |
stats_text = show_stats()
|
308 |
+
|
309 |
+
combined_report = (
|
310 |
"# μ 체 λΆμ λ³΄κ³ μμ½\n\n"
|
311 |
"μλ μμλ‘ μ€νλμμ΅λλ€:\n"
|
312 |
"1. λͺ¨λ μ’
λͺ© κ²μ(λ³λ ¬) + λΆμ => 2. λͺ¨λ μ’
λͺ© DB κ²°κ³Ό μΆλ ₯ => 3. μ 체 κ°μ± λΆμ ν΅κ³\n\n"
|
313 |
+
f"{search_result_text}\n\n"
|
314 |
+
f"{load_result_text}\n\n"
|
315 |
"## [μ 체 κ°μ± λΆμ ν΅κ³]\n\n"
|
316 |
f"{stats_text}"
|
317 |
)
|
318 |
+
return combined_report
|
319 |
+
|
320 |
|
321 |
+
########################################
|
322 |
+
# μ¬μ©μ μμ κ²μ (μΆκ° κΈ°λ₯)
|
323 |
+
########################################
|
324 |
def search_custom(query, country):
|
325 |
"""
|
326 |
+
μ¬μ©μκ° μ
λ ₯ν (query, country)μ λν΄
|
327 |
+
1) κ²μ + λΆμ => DB μ μ₯
|
328 |
+
2) DB λ‘λ => κ²°κ³Ό(κΈ°μ¬ λͺ©λ‘ + λΆμ) μΆλ ₯
|
329 |
"""
|
330 |
error_message, articles = serphouse_search(query, country)
|
331 |
if error_message:
|
|
|
333 |
if not articles:
|
334 |
return "κ²μ κ²°κ³Όκ° μμ΅λλ€."
|
335 |
|
336 |
+
# 1) λΆμ
|
337 |
analysis = analyze_sentiment_batch(articles, client)
|
338 |
+
|
339 |
+
# 2) DB μ μ₯
|
340 |
+
save_data = {
|
341 |
"articles": articles,
|
342 |
"analysis": analysis
|
343 |
}
|
344 |
+
save_to_db(query, country, save_data)
|
345 |
+
|
346 |
+
# 3) DB μ¬λ‘λ
|
347 |
+
loaded_data, timestamp = load_from_db(query, country)
|
348 |
+
if not loaded_data:
|
349 |
return "DBμμ λ‘λ μ€ν¨"
|
350 |
|
351 |
+
# 4) κ²°κ³Ό νμ
|
|
|
|
|
352 |
out = f"## [μ¬μ©μ μμ κ²μ κ²°κ³Ό]\n\n"
|
353 |
out += f"**ν€μλ**: {query}\n\n"
|
354 |
out += f"**κ΅κ°**: {country}\n\n"
|
355 |
+
out += f"**μ μ₯ μκ°**: {timestamp}\n\n"
|
356 |
+
|
357 |
+
arts = loaded_data.get("articles", [])
|
358 |
+
analy = loaded_data.get("analysis", "")
|
359 |
+
|
360 |
out += display_results(arts)
|
361 |
out += f"### λ΄μ€ κ°μ± λΆμ\n{analy}\n"
|
362 |
+
|
363 |
return out
|
364 |
|
365 |
+
|
366 |
+
########################################
|
367 |
+
# API μΈμ¦
|
368 |
+
########################################
|
369 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
370 |
if not ACCESS_TOKEN:
|
371 |
raise ValueError("HF_TOKEN environment variable is not set")
|
|
|
375 |
api_key=ACCESS_TOKEN,
|
376 |
)
|
377 |
|
378 |
+
API_KEY = os.getenv("SERPHOUSE_API_KEY")
|
379 |
+
|
380 |
+
|
381 |
+
########################################
|
382 |
+
# κ΅κ°λ³ μ€μ
|
383 |
+
########################################
|
384 |
COUNTRY_LANGUAGES = {
|
385 |
"United States": "en",
|
386 |
"KOREA": "ko",
|
387 |
"United Kingdom": "en",
|
388 |
+
"Taiwan": "zh-TW",
|
389 |
"Canada": "en",
|
390 |
"Australia": "en",
|
391 |
"Germany": "de",
|
|
|
523 |
"Iceland": "Iceland"
|
524 |
}
|
525 |
|
526 |
+
|
527 |
+
@lru_cache(maxsize=100)
|
528 |
+
def translate_query(query, country):
|
529 |
+
"""
|
530 |
+
Google Translation API(λΉκ³΅μ) μ¬μ©νμ¬ κ²μμ΄λ₯Ό ν΄λΉ κ΅κ° μΈμ΄λ‘ λ²μ
|
531 |
+
"""
|
532 |
+
try:
|
533 |
+
if is_english(query):
|
534 |
+
return query
|
535 |
+
|
536 |
+
if country in COUNTRY_LANGUAGES:
|
537 |
+
if country == "South Korea":
|
538 |
+
return query
|
539 |
+
target_lang = COUNTRY_LANGUAGES[country]
|
540 |
+
|
541 |
+
url = "https://translate.googleapis.com/translate_a/single"
|
542 |
+
params = {
|
543 |
+
"client": "gtx",
|
544 |
+
"sl": "auto",
|
545 |
+
"tl": target_lang,
|
546 |
+
"dt": "t",
|
547 |
+
"q": query
|
548 |
+
}
|
549 |
+
|
550 |
+
session = requests.Session()
|
551 |
+
retries = Retry(total=3, backoff_factor=0.5)
|
552 |
+
session.mount('https://', HTTPAdapter(max_retries=retries))
|
553 |
+
|
554 |
+
response = session.get(url, params=params, timeout=(5, 10))
|
555 |
+
translated_text = response.json()[0][0][0]
|
556 |
+
return translated_text
|
557 |
+
|
558 |
+
return query
|
559 |
+
|
560 |
+
except Exception as e:
|
561 |
+
print(f"λ²μ μ€λ₯: {str(e)}")
|
562 |
+
return query
|
563 |
+
|
564 |
+
def is_english(text):
|
565 |
+
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
|
566 |
+
|
567 |
+
def search_serphouse(query, country, page=1, num_result=10):
|
568 |
+
"""
|
569 |
+
SerpHouse APIμ μ€μκ° κ²μ μμ²μ 보λ΄μ΄,
|
570 |
+
'λ΄μ€' ν (sort_by=date)μμ ν΄λΉ queryμ λν κΈ°μ¬ λͺ©λ‘μ κ°μ Έμ¨λ€.
|
571 |
+
"""
|
572 |
+
url = "https://api.serphouse.com/serp/live"
|
573 |
+
|
574 |
+
now = datetime.utcnow()
|
575 |
+
yesterday = now - timedelta(days=1)
|
576 |
+
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
|
577 |
+
|
578 |
+
translated_query = translate_query(query, country)
|
579 |
+
|
580 |
+
payload = {
|
581 |
+
"data": {
|
582 |
+
"q": translated_query,
|
583 |
+
"domain": "google.com",
|
584 |
+
"loc": COUNTRY_LOCATIONS.get(country, "United States"),
|
585 |
+
"lang": COUNTRY_LANGUAGES.get(country, "en"),
|
586 |
+
"device": "desktop",
|
587 |
+
"serp_type": "news",
|
588 |
+
"page": str(page),
|
589 |
+
"num": "100",
|
590 |
+
"date_range": date_range,
|
591 |
+
"sort_by": "date"
|
592 |
+
}
|
593 |
+
}
|
594 |
+
|
595 |
+
headers = {
|
596 |
+
"accept": "application/json",
|
597 |
+
"content-type": "application/json",
|
598 |
+
"authorization": f"Bearer {API_KEY}"
|
599 |
+
}
|
600 |
+
|
601 |
+
try:
|
602 |
+
session = requests.Session()
|
603 |
+
|
604 |
+
retries = Retry(
|
605 |
+
total=5,
|
606 |
+
backoff_factor=1,
|
607 |
+
status_forcelist=[500, 502, 503, 504, 429],
|
608 |
+
allowed_methods=["POST"]
|
609 |
+
)
|
610 |
+
|
611 |
+
adapter = HTTPAdapter(max_retries=retries)
|
612 |
+
session.mount('http://', adapter)
|
613 |
+
session.mount('https://', adapter)
|
614 |
+
|
615 |
+
response = session.post(
|
616 |
+
url,
|
617 |
+
json=payload,
|
618 |
+
headers=headers,
|
619 |
+
timeout=(30, 30)
|
620 |
+
)
|
621 |
+
|
622 |
+
response.raise_for_status()
|
623 |
+
return {"results": response.json(), "translated_query": translated_query}
|
624 |
+
|
625 |
+
except requests.exceptions.Timeout:
|
626 |
+
return {
|
627 |
+
"error": "κ²μ μκ°μ΄ μ΄κ³Όλμμ΅λλ€. μ μ ν λ€μ μλν΄μ£ΌμΈμ.",
|
628 |
+
"translated_query": query
|
629 |
+
}
|
630 |
+
except requests.exceptions.RequestException as e:
|
631 |
+
return {
|
632 |
+
"error": f"κ²μ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}",
|
633 |
+
"translated_query": query
|
634 |
+
}
|
635 |
+
except Exception as e:
|
636 |
+
return {
|
637 |
+
"error": f"μκΈ°μΉ μμ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}",
|
638 |
+
"translated_query": query
|
639 |
+
}
|
640 |
+
|
641 |
+
def format_results_from_raw(response_data):
|
642 |
+
"""
|
643 |
+
SerpHouse APIμ μλ΅ λ°μ΄ν°λ₯Ό κ°κ³΅νμ¬,
|
644 |
+
(μλ¬λ©μμ§, κΈ°μ¬λ¦¬μ€νΈ) ννλ‘ λ°ν.
|
645 |
+
"""
|
646 |
+
if "error" in response_data:
|
647 |
+
return "Error: " + response_data["error"], []
|
648 |
+
|
649 |
+
try:
|
650 |
+
results = response_data["results"]
|
651 |
+
translated_query = response_data["translated_query"]
|
652 |
+
|
653 |
+
# μ€μ λ΄μ€ κ²°κ³Ό
|
654 |
+
news_results = results.get('results', {}).get('results', {}).get('news', [])
|
655 |
+
if not news_results:
|
656 |
+
return "κ²μ κ²°κ³Όκ° μμ΅λλ€.", []
|
657 |
+
|
658 |
+
# νκ΅ λλ©μΈ λ° νκ΅ κ΄λ ¨ ν€μλ ν¬ν¨ κΈ°μ¬ μ μΈ
|
659 |
+
korean_domains = [
|
660 |
+
'.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
|
661 |
+
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
|
662 |
+
]
|
663 |
+
korean_keywords = [
|
664 |
+
'korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
|
665 |
+
'gwangju', 'daejeon', 'ulsan', 'sejong'
|
666 |
+
]
|
667 |
+
|
668 |
+
filtered_articles = []
|
669 |
+
for idx, result in enumerate(news_results, 1):
|
670 |
+
url = result.get("url", result.get("link", "")).lower()
|
671 |
+
title = result.get("title", "").lower()
|
672 |
+
channel = result.get("channel", result.get("source", "")).lower()
|
673 |
+
|
674 |
+
is_korean_content = (
|
675 |
+
any(domain in url or domain in channel for domain in korean_domains) or
|
676 |
+
any(keyword in title for keyword in korean_keywords)
|
677 |
+
)
|
678 |
+
|
679 |
+
# νκ΅μ΄ λ΄μ€(λλ νκ΅ λλ©μΈ) μ μΈ
|
680 |
+
if not is_korean_content:
|
681 |
+
filtered_articles.append({
|
682 |
+
"index": idx,
|
683 |
+
"title": result.get("title", "μ λͺ© μμ"),
|
684 |
+
"link": url,
|
685 |
+
"snippet": result.get("snippet", "λ΄μ© μμ"),
|
686 |
+
"channel": result.get("channel", result.get("source", "μ μ μμ")),
|
687 |
+
"time": result.get("time", result.get("date", "μ μ μλ μκ°")),
|
688 |
+
"image_url": result.get("img", result.get("thumbnail", "")),
|
689 |
+
"translated_query": translated_query
|
690 |
+
})
|
691 |
+
|
692 |
+
return "", filtered_articles
|
693 |
+
except Exception as e:
|
694 |
+
return f"κ²°κ³Ό μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}", []
|
695 |
+
|
696 |
+
def serphouse_search(query, country):
|
697 |
+
"""
|
698 |
+
κ²μ λ° κ²°κ³Ό ν¬λ§€ν
κΉμ§ μΌκ΄ μ²λ¦¬
|
699 |
+
"""
|
700 |
+
response_data = search_serphouse(query, country)
|
701 |
+
return format_results_from_raw(response_data)
|
702 |
+
|
703 |
+
|
704 |
+
# CSS (UI 컀μ€ν°λ§μ΄μ§)
|
705 |
css = """
|
706 |
/* μ μ μ€νμΌ */
|
707 |
footer {visibility: hidden;}
|
708 |
|
709 |
+
/* λ μ΄μμ 컨ν
μ΄λ */
|
710 |
#status_area {
|
711 |
background: rgba(255, 255, 255, 0.9);
|
712 |
padding: 15px;
|
|
|
720 |
margin-top: 10px;
|
721 |
}
|
722 |
|
723 |
+
/* ν μ€νμΌ */
|
724 |
.tabs {
|
725 |
border-bottom: 2px solid #ddd !important;
|
726 |
margin-bottom: 20px !important;
|
|
|
748 |
padding: 10px 0;
|
749 |
}
|
750 |
|
751 |
+
/* κΈ°λ³Έ 컨ν
μ΄λ */
|
752 |
.group {
|
753 |
border: 1px solid #eee;
|
754 |
padding: 15px;
|
|
|
763 |
border: none !important;
|
764 |
}
|
765 |
|
766 |
+
/* μ
λ ₯ νλ */
|
767 |
+
.textbox {
|
768 |
+
border: 1px solid #ddd !important;
|
769 |
+
border-radius: 4px !important;
|
770 |
+
}
|
771 |
+
|
772 |
+
/* νλ‘κ·Έλ μ€λ° 컨ν
μ΄λ */
|
773 |
+
.progress-container {
|
774 |
+
position: fixed;
|
775 |
+
top: 0;
|
776 |
+
left: 0;
|
777 |
+
width: 100%;
|
778 |
+
height: 6px;
|
779 |
+
background: #e0e0e0;
|
780 |
+
z-index: 1000;
|
781 |
+
}
|
782 |
+
|
783 |
+
/* νλ‘κ·Έλ μ€bar */
|
784 |
+
.progress-bar {
|
785 |
+
height: 100%;
|
786 |
+
background: linear-gradient(90deg, #2196F3, #00BCD4);
|
787 |
+
box-shadow: 0 0 10px rgba(33, 150, 243, 0.5);
|
788 |
+
transition: width 0.3s ease;
|
789 |
+
animation: progress-glow 1.5s ease-in-out infinite;
|
790 |
+
}
|
791 |
+
|
792 |
+
/* νλ‘κ·Έλ μ€ ν
μ€νΈ */
|
793 |
+
.progress-text {
|
794 |
+
position: fixed;
|
795 |
+
top: 8px;
|
796 |
+
left: 50%;
|
797 |
+
transform: translateX(-50%);
|
798 |
+
background: #333;
|
799 |
+
color: white;
|
800 |
+
padding: 4px 12px;
|
801 |
+
border-radius: 15px;
|
802 |
+
font-size: 14px;
|
803 |
+
z-index: 1001;
|
804 |
+
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
|
805 |
+
}
|
806 |
+
|
807 |
+
/* νλ‘κ·Έλ μ€λ° μ λλ©μ΄μ
*/
|
808 |
+
@keyframes progress-glow {
|
809 |
+
0% {
|
810 |
+
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
|
811 |
+
}
|
812 |
+
50% {
|
813 |
+
box-shadow: 0 0 20px rgba(33, 150, 243, 0.8);
|
814 |
+
}
|
815 |
+
100% {
|
816 |
+
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
|
817 |
+
}
|
818 |
+
}
|
819 |
+
|
820 |
+
/* λ°μν λμμΈ */
|
821 |
+
@media (max-width: 768px) {
|
822 |
+
.group {
|
823 |
+
padding: 10px;
|
824 |
+
margin-bottom: 15px;
|
825 |
+
}
|
826 |
+
|
827 |
+
.progress-text {
|
828 |
+
font-size: 12px;
|
829 |
+
padding: 3px 10px;
|
830 |
+
}
|
831 |
+
}
|
832 |
+
|
833 |
+
/* λ‘λ© μν νμ κ°μ */
|
834 |
+
.loading {
|
835 |
+
opacity: 0.7;
|
836 |
+
pointer-events: none;
|
837 |
+
transition: opacity 0.3s ease;
|
838 |
+
}
|
839 |
+
|
840 |
+
/* κ²°κ³Ό 컨ν
μ΄λ μ λλ©μ΄μ
*/
|
841 |
+
.group {
|
842 |
+
transition: all 0.3s ease;
|
843 |
+
opacity: 0;
|
844 |
+
transform: translateY(20px);
|
845 |
+
}
|
846 |
+
|
847 |
+
.group.visible {
|
848 |
+
opacity: 1;
|
849 |
+
transform: translateY(0);
|
850 |
+
}
|
851 |
+
|
852 |
+
/* Examples μ€νμΌλ§ */
|
853 |
+
.examples-table {
|
854 |
+
margin-top: 10px !important;
|
855 |
+
margin-bottom: 20px !important;
|
856 |
+
}
|
857 |
+
|
858 |
+
.examples-table button {
|
859 |
+
background-color: #f0f0f0 !important;
|
860 |
+
border: 1px solid #ddd !important;
|
861 |
+
border-radius: 4px !important;
|
862 |
+
padding: 5px 10px !important;
|
863 |
+
margin: 2px !important;
|
864 |
+
transition: all 0.3s ease !important;
|
865 |
+
}
|
866 |
+
|
867 |
+
.examples-table button:hover {
|
868 |
+
background-color: #e0e0e0 !important;
|
869 |
+
transform: translateY(-1px) !important;
|
870 |
+
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
|
871 |
+
}
|
872 |
+
|
873 |
+
.examples-table .label {
|
874 |
+
font-weight: bold !important;
|
875 |
+
color: #444 !important;
|
876 |
+
margin-bottom: 5px !important;
|
877 |
+
}
|
878 |
"""
|
879 |
|
880 |
import gradio as gr
|
881 |
|
882 |
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μλΉμ€") as iface:
|
|
|
883 |
init_db()
|
884 |
|
885 |
with gr.Tabs():
|
|
|
888 |
gr.Markdown("## EarnBot: κΈλ‘λ² λΉ
ν
ν¬ κΈ°μ
λ° ν¬μ μ λ§ AI μλ λΆμ")
|
889 |
gr.Markdown(" * 'μ 체 λΆμ λ³΄κ³ μμ½' ν΄λ¦ μ μ 체 μλ λ³΄κ³ μμ±.\n * μλ κ°λ³ μ’
λͺ©μ 'κ²μ(DB μλ μ μ₯)'κ³Ό 'μΆλ ₯(DB μλ νΈμΆ)'λ κ°λ₯.\n * μΆκ°λ‘, μνλ μμ ν€μλ λ° κ΅κ°λ‘ κ²μ/λΆμν μλ μμ΅λλ€.")
|
890 |
|
891 |
+
# (μ¬μ©μ μμ κ²μ μΉμ
)
|
892 |
with gr.Group():
|
893 |
gr.Markdown("### μ¬μ©μ μμ κ²μ")
|
894 |
with gr.Row():
|
|
|
908 |
|
909 |
custom_search_output = gr.Markdown()
|
910 |
|
|
|
911 |
custom_search_btn.click(
|
912 |
fn=search_custom,
|
913 |
inputs=[user_input, country_selection],
|
914 |
outputs=custom_search_output
|
915 |
)
|
916 |
|
917 |
+
# μ 체 λΆμ λ³΄κ³ μμ½ λ²νΌ
|
918 |
with gr.Row():
|
919 |
full_report_btn = gr.Button("μ 체 λΆμ λ³΄κ³ μμ½", variant="primary")
|
920 |
full_report_display = gr.Markdown()
|
921 |
|
|
|
922 |
full_report_btn.click(
|
923 |
fn=full_summary_report,
|
924 |
outputs=full_report_display
|
925 |
)
|
926 |
|
927 |
+
# μ§μ λ 리μ€νΈ (KOREAN_COMPANIES) κ°λ³ κΈ°μ
κ²μ/μΆλ ₯
|
928 |
with gr.Column():
|
929 |
for i in range(0, len(KOREAN_COMPANIES), 2):
|
930 |
with gr.Row():
|
931 |
+
# μΌμͺ½ μ΄
|
932 |
with gr.Column():
|
933 |
company = KOREAN_COMPANIES[i]
|
934 |
with gr.Group():
|
|
|
938 |
load_btn = gr.Button("μΆλ ₯", variant="secondary")
|
939 |
result_display = gr.Markdown()
|
940 |
|
|
|
941 |
search_btn.click(
|
942 |
fn=lambda c=company: search_company(c),
|
943 |
outputs=result_display
|
944 |
)
|
|
|
945 |
load_btn.click(
|
946 |
fn=lambda c=company: load_company(c),
|
947 |
outputs=result_display
|
948 |
)
|
949 |
|
950 |
+
# μ€λ₯Έμͺ½ μ΄
|
951 |
if i + 1 < len(KOREAN_COMPANIES):
|
952 |
with gr.Column():
|
953 |
company = KOREAN_COMPANIES[i + 1]
|