ginipick commited on
Commit
9295f92
Β·
verified Β·
1 Parent(s): 093c47e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +459 -368
app.py CHANGED
@@ -26,22 +26,15 @@ KOREAN_COMPANIES = [
26
  "INTEL",
27
  "SAMSUNG",
28
  "HYNIX",
29
- "BITCOIN",
30
  "crypto",
31
  "stock",
32
  "Economics",
33
  "Finance",
34
- "investing"
35
  ]
36
 
37
- ######################################################################
38
- # 곡톡 ν•¨μˆ˜: μ‹œκ°„ λ³€ν™˜
39
- ######################################################################
40
  def convert_to_seoul_time(timestamp_str):
41
- """
42
- μ£Όμ–΄μ§„ 'YYYY-MM-DD HH:MM:SS' ν˜•νƒœμ˜ μ‹œκ°(UTC κΈ°μ€€ λ“±)을
43
- 'YYYY-MM-DD HH:MM:SS KST' ν˜•νƒœλ‘œ λ³€ν™˜ν•˜μ—¬ λ°˜ν™˜.
44
- """
45
  try:
46
  dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
47
  seoul_tz = pytz.timezone('Asia/Seoul')
@@ -51,23 +44,18 @@ def convert_to_seoul_time(timestamp_str):
51
  print(f"μ‹œκ°„ λ³€ν™˜ 였λ₯˜: {str(e)}")
52
  return timestamp_str
53
 
54
- ######################################################################
55
- # 곡톡 ν•¨μˆ˜: 감성 뢄석
56
- ######################################################################
57
  def analyze_sentiment_batch(articles, client):
58
  """
59
- OpenAI APIλ₯Ό 톡해, λ‰΄μŠ€ κΈ°μ‚¬λ“€μ˜ 제λͺ©+λ‚΄μš©μ„ μ’…ν•©ν•˜μ—¬ 감성 뢄석을 μˆ˜ν–‰.
60
- - κ²°κ³Όλ₯Ό ν•œκ΅­μ–΄λ‘œ μž‘μ„±ν•˜λ„λ‘ ν”„λ‘¬ν”„νŠΈ 내에 λͺ…μ‹œ.
61
  """
62
  try:
63
- # κΈ°μ‚¬λ“€μ˜ title/snippet κ²°ν•©
64
  combined_text = "\n\n".join([
65
  f"제λͺ©: {article.get('title', '')}\nλ‚΄μš©: {article.get('snippet', '')}"
66
  for article in articles
67
  ])
68
 
69
- # ν•œκ΅­μ–΄λ‘œ μž‘μ„±ν•  것을 μœ λ„ν•˜λŠ” 문ꡬ
70
- prompt = f"""λ‹€μŒ λ‰΄μŠ€ λͺ¨μŒμ— λŒ€ν•΄ μ „λ°˜μ μΈ 감성 뢄석을 μˆ˜ν–‰ν•˜μ„Έμš”. (ν•œκ΅­μ–΄λ‘œ μž‘μ„±ν•˜μ„Έμš”)
71
 
72
  λ‰΄μŠ€ λ‚΄μš©:
73
  {combined_text}
@@ -94,14 +82,9 @@ def analyze_sentiment_batch(articles, client):
94
  except Exception as e:
95
  return f"감성 뢄석 μ‹€νŒ¨: {str(e)}"
96
 
97
- ######################################################################
98
- # DB μ΄ˆκΈ°ν™” 및 μž…μΆœλ ₯
99
- ######################################################################
100
  def init_db():
101
- """
102
- SQLite DB 파일(search_results.db)이 μ—†μœΌλ©΄ 생성,
103
- 'searches' ν…Œμ΄λΈ”μ΄ μ—†μœΌλ©΄ 생성
104
- """
105
  db_path = pathlib.Path("search_results.db")
106
  conn = sqlite3.connect(db_path)
107
  c = conn.cursor()
@@ -116,17 +99,16 @@ def init_db():
116
 
117
  def save_to_db(keyword, country, results):
118
  """
119
- (keyword, country)에 λŒ€ν•œ κ²°κ³Ό(JSON)λ₯Ό DB에 insert.
120
  """
121
  conn = sqlite3.connect("search_results.db")
122
  c = conn.cursor()
123
-
124
  seoul_tz = pytz.timezone('Asia/Seoul')
125
  now = datetime.now(seoul_tz)
126
  timestamp = now.strftime('%Y-%m-%d %H:%M:%S')
127
 
128
- c.execute("""INSERT INTO searches
129
- (keyword, country, results, timestamp)
130
  VALUES (?, ?, ?, ?)""",
131
  (keyword, country, json.dumps(results), timestamp))
132
  conn.commit()
@@ -134,324 +116,128 @@ def save_to_db(keyword, country, results):
134
 
135
  def load_from_db(keyword, country):
136
  """
137
- DBμ—μ„œ (keyword, country)에 ν•΄λ‹Ήν•˜λŠ” κ°€μž₯ 졜근 기둝을 λ‘œλ“œ
138
- - μ„±κ³΅μ‹œ (json.loads(...)된 results, KST μ‹œκ°„)
139
- - μ‹€νŒ¨μ‹œ (None, None)
140
  """
141
  conn = sqlite3.connect("search_results.db")
142
  c = conn.cursor()
143
- c.execute("""SELECT results, timestamp
144
- FROM searches
145
- WHERE keyword=? AND country=?
146
- ORDER BY timestamp DESC
147
- LIMIT 1""",
148
  (keyword, country))
149
- row = c.fetchone()
150
  conn.close()
151
- if row:
152
- return json.loads(row[0]), convert_to_seoul_time(row[1])
153
  return None, None
154
 
155
- ######################################################################
156
- # SerpHouse API (검색 ν•¨μˆ˜λ“€)
157
- ######################################################################
158
- API_KEY = os.getenv("SERPHOUSE_API_KEY")
159
-
160
- def is_english(text):
161
- """
162
- ν…μŠ€νŠΈκ°€ μ „λΆ€ ASCII λ²”μœ„λ©΄ True, μ•„λ‹ˆλ©΄ False
163
- """
164
- return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
165
-
166
- @lru_cache(maxsize=100)
167
- def translate_query(query, country):
168
- """
169
- queryλ₯Ό ν•΄λ‹Ή country μ–Έμ–΄λ‘œ λ²ˆμ—­
170
- """
171
- try:
172
- # 이미 μ˜μ–΄λ©΄ κ·Έλƒ₯ λ°˜ν™˜
173
- if is_english(query):
174
- return query
175
-
176
- if country in COUNTRY_LANGUAGES:
177
- target_lang = COUNTRY_LANGUAGES[country]
178
-
179
- url = "https://translate.googleapis.com/translate_a/single"
180
- params = {
181
- "client": "gtx",
182
- "sl": "auto",
183
- "tl": target_lang,
184
- "dt": "t",
185
- "q": query
186
- }
187
-
188
- session = requests.Session()
189
- retries = Retry(total=3, backoff_factor=0.5)
190
- session.mount('https://', HTTPAdapter(max_retries=retries))
191
-
192
- resp = session.get(url, params=params, timeout=(5, 10))
193
- translated_text = resp.json()[0][0][0]
194
- return translated_text
195
-
196
- return query
197
- except Exception as e:
198
- print(f"λ²ˆμ—­ 였λ₯˜: {str(e)}")
199
- return query
200
-
201
- @lru_cache(maxsize=200)
202
- def translate_to_korean(text):
203
- """
204
- snippet 등을 ν•œκΈ€λ‘œ λ²ˆμ—­ν•˜κΈ° μœ„ν•œ ν•¨μˆ˜
205
- """
206
- try:
207
- url = "https://translate.googleapis.com/translate_a/single"
208
- params = {
209
- "client": "gtx",
210
- "sl": "auto",
211
- "tl": "ko",
212
- "dt": "t",
213
- "q": text
214
- }
215
-
216
- session = requests.Session()
217
- retries = Retry(total=3, backoff_factor=0.5)
218
- session.mount('https://', HTTPAdapter(max_retries=retries))
219
-
220
- response = session.get(url, params=params, timeout=(5, 10))
221
- translated_text = response.json()[0][0][0]
222
- return translated_text
223
- except Exception as e:
224
- print(f"ν•œκΈ€ λ²ˆμ—­ 였λ₯˜: {str(e)}")
225
- return text
226
-
227
- def search_serphouse(query, country, page=1, num_result=10):
228
- """
229
- SerpHouse API μ‹€μ‹œκ°„ 검색 -> 'news' (sort_by=date)
230
- """
231
- url = "https://api.serphouse.com/serp/live"
232
-
233
- now = datetime.utcnow()
234
- yesterday = now - timedelta(days=1)
235
- date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
236
-
237
- translated_query = translate_query(query, country)
238
-
239
- payload = {
240
- "data": {
241
- "q": translated_query,
242
- "domain": "google.com",
243
- "loc": COUNTRY_LOCATIONS.get(country, "United States"),
244
- "lang": COUNTRY_LANGUAGES.get(country, "en"),
245
- "device": "desktop",
246
- "serp_type": "news",
247
- "page": str(page),
248
- "num": "100",
249
- "date_range": date_range,
250
- "sort_by": "date"
251
- }
252
- }
253
-
254
- headers = {
255
- "accept": "application/json",
256
- "content-type": "application/json",
257
- "authorization": f"Bearer {API_KEY}"
258
- }
259
-
260
- try:
261
- session = requests.Session()
262
- retries = Retry(
263
- total=5,
264
- backoff_factor=1,
265
- status_forcelist=[429, 500, 502, 503, 504],
266
- allowed_methods=["POST"]
267
- )
268
- adapter = HTTPAdapter(max_retries=retries)
269
- session.mount('http://', adapter)
270
- session.mount('https://', adapter)
271
-
272
- resp = session.post(url, json=payload, headers=headers, timeout=(30, 30))
273
- resp.raise_for_status()
274
-
275
- # 응닡 JSON
276
- return {
277
- "results": resp.json(),
278
- "translated_query": translated_query
279
- }
280
- except requests.exceptions.Timeout:
281
- return {
282
- "error": "검색 μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.",
283
- "translated_query": query
284
- }
285
- except requests.exceptions.RequestException as e:
286
- return {
287
- "error": f"검색 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}",
288
- "translated_query": query
289
- }
290
- except Exception as e:
291
- return {
292
- "error": f"예기치 μ•Šμ€ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}",
293
- "translated_query": query
294
- }
295
-
296
- def format_results_from_raw(response_data):
297
- """
298
- SerpHouse API 응닡을 (error_message, articles_list) ν˜•νƒœλ‘œ 가곡
299
- - ν•œκ΅­ 도메인(kr, korea, etc) μ œμ™Έ
300
- - emptyμ‹œ "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€."
301
- """
302
- if "error" in response_data:
303
- return "Error: " + response_data["error"], []
304
-
305
- try:
306
- results = response_data["results"]
307
- translated_query = response_data["translated_query"]
308
-
309
- # μ‹€μ œ λ‰΄μŠ€ νƒ­ κ²°κ³Ό
310
- news_results = results.get('results', {}).get('results', {}).get('news', [])
311
- if not news_results:
312
- return "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.", []
313
-
314
- # ν•œκ΅­μ–΄ μ œμ™Έ
315
- korean_domains = [
316
- '.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
317
- 'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
318
- ]
319
- korean_keywords = [
320
- 'korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
321
- 'gwangju', 'daejeon', 'ulsan', 'sejong'
322
- ]
323
-
324
- filtered_articles = []
325
- for idx, result in enumerate(news_results, 1):
326
- url = result.get("url", result.get("link", "")).lower()
327
- title = result.get("title", "").lower()
328
- channel = result.get("channel", result.get("source", "")).lower()
329
-
330
- is_korean_content = (
331
- any(domain in url or domain in channel for domain in korean_domains)
332
- or any(keyword in title for keyword in korean_keywords)
333
- )
334
- if not is_korean_content:
335
- filtered_articles.append({
336
- "index": idx,
337
- "title": result.get("title", "제λͺ© μ—†μŒ"),
338
- "link": url,
339
- "snippet": result.get("snippet", "λ‚΄μš© μ—†μŒ"),
340
- "channel": result.get("channel", result.get("source", "μ•Œ 수 μ—†μŒ")),
341
- "time": result.get("time", result.get("date", "μ•Œ 수 μ—†λŠ” μ‹œκ°„")),
342
- "image_url": result.get("img", result.get("thumbnail", "")),
343
- "translated_query": translated_query
344
- })
345
-
346
- return "", filtered_articles
347
- except Exception as e:
348
- return f"κ²°κ³Ό 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}", []
349
-
350
- def serphouse_search(query, country):
351
- """
352
- 전체 νŒŒμ΄ν”„λΌμΈ (search_serphouse -> format_results_from_raw)
353
- λ°˜ν™˜: (error_message, articles_list)
354
- """
355
- response_data = search_serphouse(query, country)
356
- return format_results_from_raw(response_data)
357
-
358
- ######################################################################
359
- # λ‰΄μŠ€ 기사 λͺ©λ‘ -> Markdown
360
- ######################################################################
361
  def display_results(articles):
362
  """
363
- 기사 λͺ©λ‘μ„ Markdown λ¬Έμžμ—΄λ‘œ λ³€ν™˜
364
- - snippet(영문) + ν•œκΈ€ λ²ˆμ—­(snippet)을 ν•¨κ»˜ ν‘œμ‹œ
365
  """
366
  output = ""
367
  for idx, article in enumerate(articles, 1):
368
- # snippet을 ν•œκ΅­μ–΄λ‘œ λ²ˆμ—­
369
- korean_snippet = translate_to_korean(article['snippet'])
370
-
371
  output += f"### {idx}. {article['title']}\n"
372
  output += f"좜처: {article['channel']}\n"
373
  output += f"μ‹œκ°„: {article['time']}\n"
374
  output += f"링크: {article['link']}\n"
375
- output += f"μš”μ•½(원문): {article['snippet']}\n"
376
- output += f"μš”μ•½(ν•œκ΅­μ–΄): {korean_snippet}\n\n"
377
  return output
378
 
379
- ######################################################################
380
- # ν•œκ΅­ κΈ°μ—… λͺ©λ‘ (이미 선언됨)
381
- ######################################################################
382
 
383
- ######################################################################
384
- # 검색/좜λ ₯ ν•¨μˆ˜
385
- ######################################################################
386
  def search_company(company):
387
  """
388
- λ―Έκ΅­(United States) λ‰΄μŠ€ 검색 -> 감성뢄석(ν•œκ΅­μ–΄) -> DBμ €μž₯ -> Markdown λ°˜ν™˜
 
 
389
  """
390
  error_message, articles = serphouse_search(company, "United States")
391
  if not error_message and articles:
 
392
  analysis = analyze_sentiment_batch(articles, client)
393
- data_to_store = {
 
 
394
  "articles": articles,
395
  "analysis": analysis
396
  }
397
- save_to_db(company, "United States", data_to_store)
398
-
399
- out = display_results(articles)
400
- out += f"\n\n### 뢄석 보고\n{analysis}\n"
401
- return out
402
- else:
403
- if error_message:
404
- return error_message
405
- return f"{company}에 λŒ€ν•œ 검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€."
406
-
 
407
  def load_company(company):
408
  """
409
- DBμ—μ„œ (company, United States) 검색 κ²°κ³Όλ₯Ό λΆˆλŸ¬μ™€ 기사+뢄석 좜λ ₯
 
410
  """
411
- loaded, ts = load_from_db(company, "United States")
412
- if loaded:
413
- articles = loaded.get("articles", [])
414
- analysis = loaded.get("analysis", "")
415
- out = f"### {company} 검색 κ²°κ³Ό\nμ €μž₯ μ‹œκ°„: {ts}\n\n"
416
- out += display_results(articles)
417
- out += f"\n\n### 뢄석 보고\n{analysis}\n"
418
- return out
 
 
419
  return f"{company}에 λŒ€ν•œ μ €μž₯된 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€."
420
 
421
- ######################################################################
422
- # 톡계 (EarnBOT 뢄석 리포트)
423
- ######################################################################
 
424
  def show_stats():
425
  """
426
- KOREAN_COMPANIES λ‚΄ λͺ¨λ“  κΈ°μ—…μ˜ κ°€μž₯ 졜근 DB κ²°κ³Ό -> κΈ°μ‚¬μˆ˜, 뢄석, timestamp
 
 
 
 
 
 
427
  """
428
  conn = sqlite3.connect("search_results.db")
429
  c = conn.cursor()
430
-
431
- output = "## EarnBOT 뢄석 리포트\n\n"
432
-
433
- # DBμ—μ„œ 각 κΈ°μ—…μ˜ μ΅œμ‹  μ €μž₯ 기둝
434
  data_list = []
435
- for comp in KOREAN_COMPANIES:
436
  c.execute("""
437
- SELECT results, timestamp
438
- FROM searches
439
- WHERE keyword=?
440
- ORDER BY timestamp DESC
441
  LIMIT 1
442
- """, (comp,))
 
443
  row = c.fetchone()
444
  if row:
445
- results_json, ts = row
446
- data_list.append((comp, ts, results_json))
 
447
  conn.close()
448
-
 
449
  def analyze_data(item):
450
- comp, tstamp, json_str = item
451
- data_obj = json.loads(json_str)
452
- articles = data_obj.get("articles", [])
453
- analysis = data_obj.get("analysis", "")
 
454
  count_articles = len(articles)
 
 
 
 
455
  return (comp, tstamp, count_articles, analysis)
456
 
457
  results_list = []
@@ -459,80 +245,87 @@ def show_stats():
459
  futures = [executor.submit(analyze_data, dl) for dl in data_list]
460
  for future in as_completed(futures):
461
  results_list.append(future.result())
462
-
463
- for comp, tstamp, count_articles, analysis in results_list:
464
- kst_time = convert_to_seoul_time(tstamp)
 
465
  output += f"### {comp}\n"
466
- output += f"- λ§ˆμ§€λ§‰ μ—…λ°μ΄νŠΈ: {kst_time}\n"
467
- output += f"- μ €μž₯된 기사 수: {count_articles}건\n\n"
468
  if analysis:
469
  output += "#### λ‰΄μŠ€ 감성 뢄석\n"
470
  output += f"{analysis}\n\n"
471
  output += "---\n\n"
472
-
473
  return output
474
 
475
- ######################################################################
476
- # 전체 검색+좜λ ₯+뢄석 μ’…ν•©
477
- ######################################################################
478
  def search_all_companies():
479
  """
480
- λͺ¨λ“  κΈ°μ—… 병렬 검색+뢄석 -> DB μ €μž₯ -> Markdown 좜λ ₯
 
481
  """
482
- overall = "# [전체 검색 κ²°κ³Ό]\n\n"
483
-
484
  def do_search(comp):
485
  return comp, search_company(comp)
486
-
487
  with ThreadPoolExecutor(max_workers=5) as executor:
488
  futures = [executor.submit(do_search, c) for c in KOREAN_COMPANIES]
489
  for future in as_completed(futures):
490
- comp, res = future.result()
491
- overall += f"## {comp}\n"
492
- overall += res + "\n\n"
493
-
494
- return overall
495
 
496
  def load_all_companies():
497
  """
498
- λͺ¨λ“  κΈ°μ—… DB λ‘œλ“œ -> 기사+뢄석
 
499
  """
500
- overall = "# [전체 좜λ ₯ κ²°κ³Ό]\n\n"
 
501
  for comp in KOREAN_COMPANIES:
502
- overall += f"## {comp}\n"
503
- overall += load_company(comp)
504
- overall += "\n"
505
- return overall
506
 
507
  def full_summary_report():
508
  """
509
- 1) 전체 검색+뢄석 => DB
510
- 2) 전체 DB λ‘œλ“œ
511
- 3) 감성 뢄석 톡계
512
  """
513
- search_text = search_all_companies()
514
- load_text = load_all_companies()
 
 
 
 
 
515
  stats_text = show_stats()
516
-
517
- combined = (
518
  "# 전체 뢄석 보고 μš”μ•½\n\n"
519
  "μ•„λž˜ μˆœμ„œλ‘œ μ‹€ν–‰λ˜μ—ˆμŠ΅λ‹ˆλ‹€:\n"
520
  "1. λͺ¨λ“  μ’…λͺ© 검색(병렬) + 뢄석 => 2. λͺ¨λ“  μ’…λͺ© DB κ²°κ³Ό 좜λ ₯ => 3. 전체 감성 뢄석 톡계\n\n"
521
- f"{search_text}\n\n"
522
- f"{load_text}\n\n"
523
  "## [전체 감성 뢄석 톡계]\n\n"
524
  f"{stats_text}"
525
  )
526
- return combined
 
527
 
528
- ######################################################################
529
- # μ‚¬μš©μž μž„μ˜ 검색
530
- ######################################################################
531
  def search_custom(query, country):
532
  """
533
- 1) query & country -> 검색+뢄석
534
- 2) DB μ €μž₯
535
- 3) DB μž¬λ‘œλ“œ -> 기사+뢄석 좜λ ₯
536
  """
537
  error_message, articles = serphouse_search(query, country)
538
  if error_message:
@@ -540,31 +333,39 @@ def search_custom(query, country):
540
  if not articles:
541
  return "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€."
542
 
 
543
  analysis = analyze_sentiment_batch(articles, client)
544
- store = {
 
 
545
  "articles": articles,
546
  "analysis": analysis
547
  }
548
- save_to_db(query, country, store)
549
-
550
- loaded, ts = load_from_db(query, country)
551
- if not loaded:
 
552
  return "DBμ—μ„œ λ‘œλ“œ μ‹€νŒ¨"
553
 
554
- arts = loaded.get("articles", [])
555
- analy = loaded.get("analysis", "")
556
-
557
  out = f"## [μ‚¬μš©μž μž„μ˜ 검색 κ²°κ³Ό]\n\n"
558
  out += f"**ν‚€μ›Œλ“œ**: {query}\n\n"
559
  out += f"**κ΅­κ°€**: {country}\n\n"
560
- out += f"**μ €μž₯ μ‹œκ°„**: {ts}\n\n"
 
 
 
 
561
  out += display_results(arts)
562
  out += f"### λ‰΄μŠ€ 감성 뢄석\n{analy}\n"
 
563
  return out
564
 
565
- ######################################################################
566
- # Hugging Face openai Client
567
- ######################################################################
 
568
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
569
  if not ACCESS_TOKEN:
570
  raise ValueError("HF_TOKEN environment variable is not set")
@@ -574,14 +375,17 @@ client = OpenAI(
574
  api_key=ACCESS_TOKEN,
575
  )
576
 
577
- ######################################################################
578
- # κ΅­κ°€ μ„€μ •
579
- ######################################################################
 
 
 
580
  COUNTRY_LANGUAGES = {
581
  "United States": "en",
582
  "KOREA": "ko",
583
  "United Kingdom": "en",
584
- "Taiwan": "zh-TW",
585
  "Canada": "en",
586
  "Australia": "en",
587
  "Germany": "de",
@@ -719,11 +523,190 @@ COUNTRY_LOCATIONS = {
719
  "Iceland": "Iceland"
720
  }
721
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722
  css = """
723
  /* μ „μ—­ μŠ€νƒ€μΌ */
724
  footer {visibility: hidden;}
725
 
726
- /* λ ˆμ΄μ•„μ›ƒ μŠ€νƒ€μΌ, νƒ­ μŠ€νƒ€μΌ, λ“±λ“± */
727
  #status_area {
728
  background: rgba(255, 255, 255, 0.9);
729
  padding: 15px;
@@ -737,6 +720,7 @@ footer {visibility: hidden;}
737
  margin-top: 10px;
738
  }
739
 
 
740
  .tabs {
741
  border-bottom: 2px solid #ddd !important;
742
  margin-bottom: 20px !important;
@@ -764,6 +748,7 @@ footer {visibility: hidden;}
764
  padding: 10px 0;
765
  }
766
 
 
767
  .group {
768
  border: 1px solid #eee;
769
  padding: 15px;
@@ -778,13 +763,123 @@ footer {visibility: hidden;}
778
  border: none !important;
779
  }
780
 
781
- /* 기타 ... */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  """
783
 
784
  import gradio as gr
785
 
786
  with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μ„œλΉ„μŠ€") as iface:
787
- # DB μ΄ˆκΈ°ν™”
788
  init_db()
789
 
790
  with gr.Tabs():
@@ -793,7 +888,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μ„œλΉ„
793
  gr.Markdown("## EarnBot: κΈ€λ‘œλ²Œ λΉ…ν…Œν¬ κΈ°μ—… 및 투자 전망 AI μžλ™ 뢄석")
794
  gr.Markdown(" * '전체 뢄석 보고 μš”μ•½' 클릭 μ‹œ 전체 μžλ™ 보고 생성.\n * μ•„λž˜ κ°œλ³„ μ’…λͺ©μ˜ '검색(DB μžλ™ μ €μž₯)'κ³Ό '좜λ ₯(DB μžλ™ 호좜)'도 κ°€λŠ₯.\n * μΆ”κ°€λ‘œ, μ›ν•˜λŠ” μž„μ˜ ν‚€μ›Œλ“œ 및 κ΅­κ°€λ‘œ 검색/뢄석할 μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€.")
795
 
796
- # μ‚¬μš©μž μž„μ˜ 검색 μ„Ήμ…˜
797
  with gr.Group():
798
  gr.Markdown("### μ‚¬μš©μž μž„μ˜ 검색")
799
  with gr.Row():
@@ -813,29 +908,27 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μ„œλΉ„
813
 
814
  custom_search_output = gr.Markdown()
815
 
816
- # μž„μ˜ 검색 λ²„νŠΌ 클릭
817
  custom_search_btn.click(
818
  fn=search_custom,
819
  inputs=[user_input, country_selection],
820
  outputs=custom_search_output
821
  )
822
 
823
- # 전체 뢄석 보고 λ²„νŠΌ
824
  with gr.Row():
825
  full_report_btn = gr.Button("전체 뢄석 보고 μš”μ•½", variant="primary")
826
  full_report_display = gr.Markdown()
827
 
828
- # 전체 보고 -> full_summary_report
829
  full_report_btn.click(
830
  fn=full_summary_report,
831
  outputs=full_report_display
832
  )
833
 
834
- # μ§€μ •λœ κΈ°μ—… λͺ©λ‘: 검색 / 좜λ ₯
835
  with gr.Column():
836
  for i in range(0, len(KOREAN_COMPANIES), 2):
837
  with gr.Row():
838
- # μ™Όμͺ½
839
  with gr.Column():
840
  company = KOREAN_COMPANIES[i]
841
  with gr.Group():
@@ -845,18 +938,16 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μ„œλΉ„
845
  load_btn = gr.Button("좜λ ₯", variant="secondary")
846
  result_display = gr.Markdown()
847
 
848
- # 검색
849
  search_btn.click(
850
  fn=lambda c=company: search_company(c),
851
  outputs=result_display
852
  )
853
- # 좜λ ₯
854
  load_btn.click(
855
  fn=lambda c=company: load_company(c),
856
  outputs=result_display
857
  )
858
 
859
- # 였λ₯Έμͺ½
860
  if i + 1 < len(KOREAN_COMPANIES):
861
  with gr.Column():
862
  company = KOREAN_COMPANIES[i + 1]
 
26
  "INTEL",
27
  "SAMSUNG",
28
  "HYNIX",
29
+ "BITCOIN",
30
  "crypto",
31
  "stock",
32
  "Economics",
33
  "Finance",
34
+ "investing"
35
  ]
36
 
 
 
 
37
  def convert_to_seoul_time(timestamp_str):
 
 
 
 
38
  try:
39
  dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
40
  seoul_tz = pytz.timezone('Asia/Seoul')
 
44
  print(f"μ‹œκ°„ λ³€ν™˜ 였λ₯˜: {str(e)}")
45
  return timestamp_str
46
 
 
 
 
47
  def analyze_sentiment_batch(articles, client):
48
  """
49
+ OpenAI APIλ₯Ό 톡해 λ‰΄μŠ€ κΈ°μ‚¬λ“€μ˜ μ’…ν•© 감성 뢄석을 μˆ˜ν–‰
 
50
  """
51
  try:
52
+ # λͺ¨λ“  κΈ°μ‚¬μ˜ 제λͺ©κ³Ό λ‚΄μš©μ„ ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ κ²°ν•©
53
  combined_text = "\n\n".join([
54
  f"제λͺ©: {article.get('title', '')}\nλ‚΄μš©: {article.get('snippet', '')}"
55
  for article in articles
56
  ])
57
 
58
+ prompt = f"""λ‹€μŒ λ‰΄μŠ€ λͺ¨μŒμ— λŒ€ν•΄ μ „λ°˜μ μΈ 감성 뢄석을 μˆ˜ν–‰ν•˜μ„Έμš”:
 
59
 
60
  λ‰΄μŠ€ λ‚΄μš©:
61
  {combined_text}
 
82
  except Exception as e:
83
  return f"감성 뢄석 μ‹€νŒ¨: {str(e)}"
84
 
85
+
86
+ # DB μ΄ˆκΈ°ν™” ν•¨μˆ˜
 
87
  def init_db():
 
 
 
 
88
  db_path = pathlib.Path("search_results.db")
89
  conn = sqlite3.connect(db_path)
90
  c = conn.cursor()
 
99
 
100
  def save_to_db(keyword, country, results):
101
  """
102
+ νŠΉμ • (keyword, country) 쑰합에 λŒ€ν•œ 검색 κ²°κ³Όλ₯Ό DB에 μ €μž₯
103
  """
104
  conn = sqlite3.connect("search_results.db")
105
  c = conn.cursor()
 
106
  seoul_tz = pytz.timezone('Asia/Seoul')
107
  now = datetime.now(seoul_tz)
108
  timestamp = now.strftime('%Y-%m-%d %H:%M:%S')
109
 
110
+ c.execute("""INSERT INTO searches
111
+ (keyword, country, results, timestamp)
112
  VALUES (?, ?, ?, ?)""",
113
  (keyword, country, json.dumps(results), timestamp))
114
  conn.commit()
 
116
 
117
  def load_from_db(keyword, country):
118
  """
119
+ νŠΉμ • (keyword, country) 쑰합에 λŒ€ν•œ κ°€μž₯ 졜근 검색 κ²°κ³Όλ₯Ό DBμ—μ„œ 뢈러였기
 
 
120
  """
121
  conn = sqlite3.connect("search_results.db")
122
  c = conn.cursor()
123
+ c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
 
 
 
 
124
  (keyword, country))
125
+ result = c.fetchone()
126
  conn.close()
127
+ if result:
128
+ return json.loads(result[0]), convert_to_seoul_time(result[1])
129
  return None, None
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def display_results(articles):
132
  """
133
+ λ‰΄μŠ€ 기사 λͺ©λ‘μ„ Markdown λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•˜μ—¬ λ°˜ν™˜
 
134
  """
135
  output = ""
136
  for idx, article in enumerate(articles, 1):
 
 
 
137
  output += f"### {idx}. {article['title']}\n"
138
  output += f"좜처: {article['channel']}\n"
139
  output += f"μ‹œκ°„: {article['time']}\n"
140
  output += f"링크: {article['link']}\n"
141
+ output += f"μš”μ•½: {article['snippet']}\n\n"
 
142
  return output
143
 
 
 
 
144
 
145
+ ########################################
146
+ # 1) 검색 μ‹œ => 기사 + 뢄석 λ™μ‹œ 좜λ ₯, DB μ €μž₯
147
+ ########################################
148
  def search_company(company):
149
  """
150
+ 단일 κΈ°μ—…(λ˜λŠ” ν‚€μ›Œλ“œ)에 λŒ€ν•΄ λ―Έκ΅­ λ‰΄μŠ€ 검색 ν›„,
151
+ 1) 기사 λͺ©λ‘ + 2) 감성 뢄석 보고λ₯Ό ν•¨κ»˜ 좜λ ₯
152
+ => { "articles": [...], "analysis": ... } ν˜•νƒœλ‘œ DB에 μ €μž₯
153
  """
154
  error_message, articles = serphouse_search(company, "United States")
155
  if not error_message and articles:
156
+ # 감성 뢄석
157
  analysis = analyze_sentiment_batch(articles, client)
158
+
159
+ # DB μ €μž₯용 데이터 ꡬ성
160
+ store_dict = {
161
  "articles": articles,
162
  "analysis": analysis
163
  }
164
+ save_to_db(company, "United States", store_dict)
165
+
166
+ # ν™”λ©΄ 좜λ ₯용
167
+ output = display_results(articles)
168
+ output += f"\n\n### 뢄석 보고\n{analysis}\n"
169
+ return output
170
+ return f"{company}에 λŒ€ν•œ 검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€."
171
+
172
+ ########################################
173
+ # 2) 좜λ ₯ μ‹œ => DB에 μ €μž₯된 기사 + 뢄석 ν•¨κ»˜ 좜λ ₯
174
+ ########################################
175
  def load_company(company):
176
  """
177
+ DBμ—μ„œ 단일 κΈ°μ—…(λ˜λŠ” ν‚€μ›Œλ“œ)의 λ―Έκ΅­ λ‰΄μŠ€ 검색 κ²°κ³Όλ₯Ό λΆˆλŸ¬μ™€
178
+ 기사 λͺ©λ‘ + 뢄석 κ²°κ³Όλ₯Ό ν•¨κ»˜ 좜λ ₯
179
  """
180
+ data, timestamp = load_from_db(company, "United States")
181
+ if data:
182
+ # dataλŠ” { "articles": [...], "analysis": "..."} ν˜•νƒœ
183
+ articles = data.get("articles", [])
184
+ analysis = data.get("analysis", "")
185
+
186
+ output = f"### {company} 검색 κ²°κ³Ό\nμ €μž₯ μ‹œκ°„: {timestamp}\n\n"
187
+ output += display_results(articles)
188
+ output += f"\n\n### 뢄석 보고\n{analysis}\n"
189
+ return output
190
  return f"{company}에 λŒ€ν•œ μ €μž₯된 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€."
191
 
192
+
193
+ ########################################
194
+ # 3) κΈ°μ‘΄ show_stats()μ—μ„œ 리포트 제λͺ© λ³€κ²½
195
+ ########################################
196
  def show_stats():
197
  """
198
+ KOREAN_COMPANIES λͺ©λ‘ λ‚΄ λͺ¨λ“  기업에 λŒ€ν•΄:
199
+ - κ°€μž₯ 졜근 DB μ €μž₯ 일자
200
+ - 기사 수
201
+ - 감성 뢄석 κ²°κ³Ό
202
+ λ₯Ό λ³‘λ ¬μ²˜λ¦¬λ‘œ μ‘°νšŒν•˜μ—¬ λ³΄κ³ μ„œ ν˜•νƒœλ‘œ λ°˜ν™˜
203
+
204
+ (문ꡬ λ³€κ²½) "ν•œκ΅­ κΈ°μ—… λ‰΄μŠ€ 뢄석 리포트" -> "EarnBOT 뢄석 리포트"
205
  """
206
  conn = sqlite3.connect("search_results.db")
207
  c = conn.cursor()
208
+
209
+ output = "## EarnBOT 뢄석 리포트\n\n" # μ—¬κΈ°μ„œ 문ꡬ λ³€κ²½
210
+
211
+ # λͺ¨λ“  기업에 λŒ€ν•΄ DBμ—μ„œ μ½μ–΄μ˜¬ (company, timestamp, articles) λͺ©λ‘ μˆ˜μ§‘
212
  data_list = []
213
+ for company in KOREAN_COMPANIES:
214
  c.execute("""
215
+ SELECT results, timestamp
216
+ FROM searches
217
+ WHERE keyword = ?
218
+ ORDER BY timestamp DESC
219
  LIMIT 1
220
+ """, (company,))
221
+
222
  row = c.fetchone()
223
  if row:
224
+ results_json, timestamp = row
225
+ data_list.append((company, timestamp, results_json))
226
+
227
  conn.close()
228
+
229
+ # 감성 뢄석 병렬 처리 ν•¨μˆ˜
230
  def analyze_data(item):
231
+ comp, tstamp, results_json = item
232
+ data = json.loads(results_json)
233
+ articles = data.get("articles", [])
234
+ analysis = data.get("analysis", "")
235
+
236
  count_articles = len(articles)
237
+ # μ—¬κΈ°μ„œλŠ” 이미 DB에 "analysis"κ°€ λ“€μ–΄ μžˆμœΌλ―€λ‘œ,
238
+ # ꡳ이 μž¬λΆ„μ„ν•  ν•„μš”κ°€ μ—†μœΌλ©΄ κ·ΈλŒ€λ‘œ μ‚¬μš©
239
+ # (ν•„μš” μ‹œ μž¬λΆ„μ„ κ°€λŠ₯)
240
+
241
  return (comp, tstamp, count_articles, analysis)
242
 
243
  results_list = []
 
245
  futures = [executor.submit(analyze_data, dl) for dl in data_list]
246
  for future in as_completed(futures):
247
  results_list.append(future.result())
248
+
249
+ # κ²°κ³Ό 좜λ ₯
250
+ for comp, tstamp, count, analysis in results_list:
251
+ seoul_time = convert_to_seoul_time(tstamp)
252
  output += f"### {comp}\n"
253
+ output += f"- λ§ˆμ§€λ§‰ μ—…λ°μ΄νŠΈ: {seoul_time}\n"
254
+ output += f"- μ €μž₯된 기사 수: {count}건\n\n"
255
  if analysis:
256
  output += "#### λ‰΄μŠ€ 감성 뢄석\n"
257
  output += f"{analysis}\n\n"
258
  output += "---\n\n"
259
+
260
  return output
261
 
262
+
 
 
263
  def search_all_companies():
264
  """
265
+ KOREAN_COMPANIES 리슀트 λ‚΄ λͺ¨λ“  κΈ°μ—… 검색 (λ©€ν‹°μŠ€λ ˆλ”©) =>
266
+ => 뢄석 + DB μ €μž₯ => κ²°κ³Ό Markdown λ°˜ν™˜
267
  """
268
+ overall_result = "# [전체 검색 κ²°κ³Ό]\n\n"
269
+
270
  def do_search(comp):
271
  return comp, search_company(comp)
272
+
273
  with ThreadPoolExecutor(max_workers=5) as executor:
274
  futures = [executor.submit(do_search, c) for c in KOREAN_COMPANIES]
275
  for future in as_completed(futures):
276
+ comp, res_text = future.result()
277
+ overall_result += f"## {comp}\n"
278
+ overall_result += res_text + "\n\n"
279
+
280
+ return overall_result
281
 
282
  def load_all_companies():
283
  """
284
+ KOREAN_COMPANIES 리슀트 λ‚΄ λͺ¨λ“  κΈ°μ—… DB 뢈러였기 =>
285
+ 기사 λͺ©λ‘ + 뢄석 보고 => κ²°κ³Ό Markdown
286
  """
287
+ overall_result = "# [전체 좜λ ₯ κ²°κ³Ό]\n\n"
288
+
289
  for comp in KOREAN_COMPANIES:
290
+ overall_result += f"## {comp}\n"
291
+ overall_result += load_company(comp)
292
+ overall_result += "\n"
293
+ return overall_result
294
 
295
  def full_summary_report():
296
  """
297
+ (1) λͺ¨λ“  κΈ°μ—… 검색(병렬) -> (2) DBμ—μ„œ 뢈러였기 -> (3) 감성 뢄석 톡계
298
+ μˆœμ„œλŒ€λ‘œ μ‹€ν–‰ν•˜μ—¬, 전체 리포트λ₯Ό 합쳐 λ°˜ν™˜
 
299
  """
300
+ # 1) 전체 검색(병렬) => 기사 + 뢄석 DB μ €μž₯
301
+ search_result_text = search_all_companies()
302
+
303
+ # 2) 전체 좜λ ₯ => DB에 μ €μž₯된 기사 + 뢄석 κ²°κ³Ό
304
+ load_result_text = load_all_companies()
305
+
306
+ # 3) 전체 톡계(감성 뢄석) - 리포트 제λͺ© 변경됨(EarnBOT 뢄석 리포트)
307
  stats_text = show_stats()
308
+
309
+ combined_report = (
310
  "# 전체 뢄석 보고 μš”μ•½\n\n"
311
  "μ•„λž˜ μˆœμ„œλ‘œ μ‹€ν–‰λ˜μ—ˆμŠ΅λ‹ˆλ‹€:\n"
312
  "1. λͺ¨λ“  μ’…λͺ© 검색(병렬) + 뢄석 => 2. λͺ¨λ“  μ’…λͺ© DB κ²°κ³Ό 좜λ ₯ => 3. 전체 감성 뢄석 톡계\n\n"
313
+ f"{search_result_text}\n\n"
314
+ f"{load_result_text}\n\n"
315
  "## [전체 감성 뢄석 톡계]\n\n"
316
  f"{stats_text}"
317
  )
318
+ return combined_report
319
+
320
 
321
+ ########################################
322
+ # μ‚¬μš©μž μž„μ˜ 검색 (μΆ”κ°€ κΈ°λŠ₯)
323
+ ########################################
324
  def search_custom(query, country):
325
  """
326
+ μ‚¬μš©μžκ°€ μž…λ ₯ν•œ (query, country)에 λŒ€ν•΄
327
+ 1) 검색 + 뢄석 => DB μ €μž₯
328
+ 2) DB λ‘œλ“œ => κ²°κ³Ό(기사 λͺ©λ‘ + 뢄석) 좜λ ₯
329
  """
330
  error_message, articles = serphouse_search(query, country)
331
  if error_message:
 
333
  if not articles:
334
  return "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€."
335
 
336
+ # 1) 뢄석
337
  analysis = analyze_sentiment_batch(articles, client)
338
+
339
+ # 2) DB μ €μž₯
340
+ save_data = {
341
  "articles": articles,
342
  "analysis": analysis
343
  }
344
+ save_to_db(query, country, save_data)
345
+
346
+ # 3) DB μž¬λ‘œλ“œ
347
+ loaded_data, timestamp = load_from_db(query, country)
348
+ if not loaded_data:
349
  return "DBμ—μ„œ λ‘œλ“œ μ‹€νŒ¨"
350
 
351
+ # 4) κ²°κ³Ό ν‘œμ‹œ
 
 
352
  out = f"## [μ‚¬μš©μž μž„μ˜ 검색 κ²°κ³Ό]\n\n"
353
  out += f"**ν‚€μ›Œλ“œ**: {query}\n\n"
354
  out += f"**κ΅­κ°€**: {country}\n\n"
355
+ out += f"**μ €μž₯ μ‹œκ°„**: {timestamp}\n\n"
356
+
357
+ arts = loaded_data.get("articles", [])
358
+ analy = loaded_data.get("analysis", "")
359
+
360
  out += display_results(arts)
361
  out += f"### λ‰΄μŠ€ 감성 뢄석\n{analy}\n"
362
+
363
  return out
364
 
365
+
366
+ ########################################
367
+ # API 인증
368
+ ########################################
369
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
370
  if not ACCESS_TOKEN:
371
  raise ValueError("HF_TOKEN environment variable is not set")
 
375
  api_key=ACCESS_TOKEN,
376
  )
377
 
378
+ API_KEY = os.getenv("SERPHOUSE_API_KEY")
379
+
380
+
381
+ ########################################
382
+ # ꡭ가별 μ„€μ •
383
+ ########################################
384
  COUNTRY_LANGUAGES = {
385
  "United States": "en",
386
  "KOREA": "ko",
387
  "United Kingdom": "en",
388
+ "Taiwan": "zh-TW",
389
  "Canada": "en",
390
  "Australia": "en",
391
  "Germany": "de",
 
523
  "Iceland": "Iceland"
524
  }
525
 
526
+
527
+ @lru_cache(maxsize=100)
528
+ def translate_query(query, country):
529
+ """
530
+ Google Translation API(비곡식) μ‚¬μš©ν•˜μ—¬ 검색어λ₯Ό ν•΄λ‹Ή κ΅­κ°€ μ–Έμ–΄λ‘œ λ²ˆμ—­
531
+ """
532
+ try:
533
+ if is_english(query):
534
+ return query
535
+
536
+ if country in COUNTRY_LANGUAGES:
537
+ if country == "South Korea":
538
+ return query
539
+ target_lang = COUNTRY_LANGUAGES[country]
540
+
541
+ url = "https://translate.googleapis.com/translate_a/single"
542
+ params = {
543
+ "client": "gtx",
544
+ "sl": "auto",
545
+ "tl": target_lang,
546
+ "dt": "t",
547
+ "q": query
548
+ }
549
+
550
+ session = requests.Session()
551
+ retries = Retry(total=3, backoff_factor=0.5)
552
+ session.mount('https://', HTTPAdapter(max_retries=retries))
553
+
554
+ response = session.get(url, params=params, timeout=(5, 10))
555
+ translated_text = response.json()[0][0][0]
556
+ return translated_text
557
+
558
+ return query
559
+
560
+ except Exception as e:
561
+ print(f"λ²ˆμ—­ 였λ₯˜: {str(e)}")
562
+ return query
563
+
564
+ def is_english(text):
565
+ return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
566
+
567
+ def search_serphouse(query, country, page=1, num_result=10):
568
+ """
569
+ SerpHouse API에 μ‹€μ‹œκ°„ 검색 μš”μ²­μ„ 보내어,
570
+ 'λ‰΄μŠ€' νƒ­ (sort_by=date)μ—μ„œ ν•΄λ‹Ή query에 λŒ€ν•œ 기사 λͺ©λ‘μ„ κ°€μ Έμ˜¨λ‹€.
571
+ """
572
+ url = "https://api.serphouse.com/serp/live"
573
+
574
+ now = datetime.utcnow()
575
+ yesterday = now - timedelta(days=1)
576
+ date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
577
+
578
+ translated_query = translate_query(query, country)
579
+
580
+ payload = {
581
+ "data": {
582
+ "q": translated_query,
583
+ "domain": "google.com",
584
+ "loc": COUNTRY_LOCATIONS.get(country, "United States"),
585
+ "lang": COUNTRY_LANGUAGES.get(country, "en"),
586
+ "device": "desktop",
587
+ "serp_type": "news",
588
+ "page": str(page),
589
+ "num": "100",
590
+ "date_range": date_range,
591
+ "sort_by": "date"
592
+ }
593
+ }
594
+
595
+ headers = {
596
+ "accept": "application/json",
597
+ "content-type": "application/json",
598
+ "authorization": f"Bearer {API_KEY}"
599
+ }
600
+
601
+ try:
602
+ session = requests.Session()
603
+
604
+ retries = Retry(
605
+ total=5,
606
+ backoff_factor=1,
607
+ status_forcelist=[500, 502, 503, 504, 429],
608
+ allowed_methods=["POST"]
609
+ )
610
+
611
+ adapter = HTTPAdapter(max_retries=retries)
612
+ session.mount('http://', adapter)
613
+ session.mount('https://', adapter)
614
+
615
+ response = session.post(
616
+ url,
617
+ json=payload,
618
+ headers=headers,
619
+ timeout=(30, 30)
620
+ )
621
+
622
+ response.raise_for_status()
623
+ return {"results": response.json(), "translated_query": translated_query}
624
+
625
+ except requests.exceptions.Timeout:
626
+ return {
627
+ "error": "검색 μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.",
628
+ "translated_query": query
629
+ }
630
+ except requests.exceptions.RequestException as e:
631
+ return {
632
+ "error": f"검색 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}",
633
+ "translated_query": query
634
+ }
635
+ except Exception as e:
636
+ return {
637
+ "error": f"예기치 μ•Šμ€ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}",
638
+ "translated_query": query
639
+ }
640
+
641
+ def format_results_from_raw(response_data):
642
+ """
643
+ SerpHouse API의 응닡 데이터λ₯Ό κ°€κ³΅ν•˜μ—¬,
644
+ (μ—λŸ¬λ©”μ‹œμ§€, κΈ°μ‚¬λ¦¬μŠ€νŠΈ) ν˜•νƒœλ‘œ λ°˜ν™˜.
645
+ """
646
+ if "error" in response_data:
647
+ return "Error: " + response_data["error"], []
648
+
649
+ try:
650
+ results = response_data["results"]
651
+ translated_query = response_data["translated_query"]
652
+
653
+ # μ‹€μ œ λ‰΄μŠ€ κ²°κ³Ό
654
+ news_results = results.get('results', {}).get('results', {}).get('news', [])
655
+ if not news_results:
656
+ return "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.", []
657
+
658
+ # ν•œκ΅­ 도메인 및 ν•œκ΅­ κ΄€λ ¨ ν‚€μ›Œλ“œ 포함 기사 μ œμ™Έ
659
+ korean_domains = [
660
+ '.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
661
+ 'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
662
+ ]
663
+ korean_keywords = [
664
+ 'korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
665
+ 'gwangju', 'daejeon', 'ulsan', 'sejong'
666
+ ]
667
+
668
+ filtered_articles = []
669
+ for idx, result in enumerate(news_results, 1):
670
+ url = result.get("url", result.get("link", "")).lower()
671
+ title = result.get("title", "").lower()
672
+ channel = result.get("channel", result.get("source", "")).lower()
673
+
674
+ is_korean_content = (
675
+ any(domain in url or domain in channel for domain in korean_domains) or
676
+ any(keyword in title for keyword in korean_keywords)
677
+ )
678
+
679
+ # ν•œκ΅­μ–΄ λ‰΄μŠ€(λ˜λŠ” ν•œκ΅­ 도메인) μ œμ™Έ
680
+ if not is_korean_content:
681
+ filtered_articles.append({
682
+ "index": idx,
683
+ "title": result.get("title", "제λͺ© μ—†μŒ"),
684
+ "link": url,
685
+ "snippet": result.get("snippet", "λ‚΄μš© μ—†μŒ"),
686
+ "channel": result.get("channel", result.get("source", "μ•Œ 수 μ—†μŒ")),
687
+ "time": result.get("time", result.get("date", "μ•Œ 수 μ—†λŠ” μ‹œκ°„")),
688
+ "image_url": result.get("img", result.get("thumbnail", "")),
689
+ "translated_query": translated_query
690
+ })
691
+
692
+ return "", filtered_articles
693
+ except Exception as e:
694
+ return f"κ²°κ³Ό 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}", []
695
+
696
+ def serphouse_search(query, country):
697
+ """
698
+ 검색 및 κ²°κ³Ό ν¬λ§€νŒ…κΉŒμ§€ 일괄 처리
699
+ """
700
+ response_data = search_serphouse(query, country)
701
+ return format_results_from_raw(response_data)
702
+
703
+
704
+ # CSS (UI μ»€μŠ€ν„°λ§ˆμ΄μ§•)
705
  css = """
706
  /* μ „μ—­ μŠ€νƒ€μΌ */
707
  footer {visibility: hidden;}
708
 
709
+ /* λ ˆμ΄μ•„μ›ƒ μ»¨ν…Œμ΄λ„ˆ */
710
  #status_area {
711
  background: rgba(255, 255, 255, 0.9);
712
  padding: 15px;
 
720
  margin-top: 10px;
721
  }
722
 
723
+ /* νƒ­ μŠ€νƒ€μΌ */
724
  .tabs {
725
  border-bottom: 2px solid #ddd !important;
726
  margin-bottom: 20px !important;
 
748
  padding: 10px 0;
749
  }
750
 
751
+ /* κΈ°λ³Έ μ»¨ν…Œμ΄λ„ˆ */
752
  .group {
753
  border: 1px solid #eee;
754
  padding: 15px;
 
763
  border: none !important;
764
  }
765
 
766
+ /* μž…λ ₯ ν•„λ“œ */
767
+ .textbox {
768
+ border: 1px solid #ddd !important;
769
+ border-radius: 4px !important;
770
+ }
771
+
772
+ /* ν”„λ‘œκ·Έλ ˆμŠ€λ°” μ»¨ν…Œμ΄λ„ˆ */
773
+ .progress-container {
774
+ position: fixed;
775
+ top: 0;
776
+ left: 0;
777
+ width: 100%;
778
+ height: 6px;
779
+ background: #e0e0e0;
780
+ z-index: 1000;
781
+ }
782
+
783
+ /* ν”„λ‘œκ·Έλ ˆμŠ€bar */
784
+ .progress-bar {
785
+ height: 100%;
786
+ background: linear-gradient(90deg, #2196F3, #00BCD4);
787
+ box-shadow: 0 0 10px rgba(33, 150, 243, 0.5);
788
+ transition: width 0.3s ease;
789
+ animation: progress-glow 1.5s ease-in-out infinite;
790
+ }
791
+
792
+ /* ν”„λ‘œκ·Έλ ˆμŠ€ ν…μŠ€νŠΈ */
793
+ .progress-text {
794
+ position: fixed;
795
+ top: 8px;
796
+ left: 50%;
797
+ transform: translateX(-50%);
798
+ background: #333;
799
+ color: white;
800
+ padding: 4px 12px;
801
+ border-radius: 15px;
802
+ font-size: 14px;
803
+ z-index: 1001;
804
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2);
805
+ }
806
+
807
+ /* ν”„λ‘œκ·Έλ ˆμŠ€λ°” μ• λ‹ˆλ©”μ΄μ…˜ */
808
+ @keyframes progress-glow {
809
+ 0% {
810
+ box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
811
+ }
812
+ 50% {
813
+ box-shadow: 0 0 20px rgba(33, 150, 243, 0.8);
814
+ }
815
+ 100% {
816
+ box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
817
+ }
818
+ }
819
+
820
+ /* λ°˜μ‘ν˜• λ””μžμΈ */
821
+ @media (max-width: 768px) {
822
+ .group {
823
+ padding: 10px;
824
+ margin-bottom: 15px;
825
+ }
826
+
827
+ .progress-text {
828
+ font-size: 12px;
829
+ padding: 3px 10px;
830
+ }
831
+ }
832
+
833
+ /* λ‘œλ”© μƒνƒœ ν‘œμ‹œ κ°œμ„  */
834
+ .loading {
835
+ opacity: 0.7;
836
+ pointer-events: none;
837
+ transition: opacity 0.3s ease;
838
+ }
839
+
840
+ /* κ²°κ³Ό μ»¨ν…Œμ΄λ„ˆ μ• λ‹ˆλ©”μ΄μ…˜ */
841
+ .group {
842
+ transition: all 0.3s ease;
843
+ opacity: 0;
844
+ transform: translateY(20px);
845
+ }
846
+
847
+ .group.visible {
848
+ opacity: 1;
849
+ transform: translateY(0);
850
+ }
851
+
852
+ /* Examples μŠ€νƒ€μΌλ§ */
853
+ .examples-table {
854
+ margin-top: 10px !important;
855
+ margin-bottom: 20px !important;
856
+ }
857
+
858
+ .examples-table button {
859
+ background-color: #f0f0f0 !important;
860
+ border: 1px solid #ddd !important;
861
+ border-radius: 4px !important;
862
+ padding: 5px 10px !important;
863
+ margin: 2px !important;
864
+ transition: all 0.3s ease !important;
865
+ }
866
+
867
+ .examples-table button:hover {
868
+ background-color: #e0e0e0 !important;
869
+ transform: translateY(-1px) !important;
870
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
871
+ }
872
+
873
+ .examples-table .label {
874
+ font-weight: bold !important;
875
+ color: #444 !important;
876
+ margin-bottom: 5px !important;
877
+ }
878
  """
879
 
880
  import gradio as gr
881
 
882
  with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI μ„œλΉ„μŠ€") as iface:
 
883
  init_db()
884
 
885
  with gr.Tabs():
 
888
  gr.Markdown("## EarnBot: κΈ€λ‘œλ²Œ λΉ…ν…Œν¬ κΈ°μ—… 및 투자 전망 AI μžλ™ 뢄석")
889
  gr.Markdown(" * '전체 뢄석 보고 μš”μ•½' 클릭 μ‹œ 전체 μžλ™ 보고 생성.\n * μ•„λž˜ κ°œλ³„ μ’…λͺ©μ˜ '검색(DB μžλ™ μ €μž₯)'κ³Ό '좜λ ₯(DB μžλ™ 호좜)'도 κ°€λŠ₯.\n * μΆ”κ°€λ‘œ, μ›ν•˜λŠ” μž„μ˜ ν‚€μ›Œλ“œ 및 κ΅­κ°€λ‘œ 검색/뢄석할 μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€.")
890
 
891
+ # (μ‚¬μš©μž μž„μ˜ 검색 μ„Ήμ…˜)
892
  with gr.Group():
893
  gr.Markdown("### μ‚¬μš©μž μž„μ˜ 검색")
894
  with gr.Row():
 
908
 
909
  custom_search_output = gr.Markdown()
910
 
 
911
  custom_search_btn.click(
912
  fn=search_custom,
913
  inputs=[user_input, country_selection],
914
  outputs=custom_search_output
915
  )
916
 
917
+ # 전체 뢄석 보고 μš”μ•½ λ²„νŠΌ
918
  with gr.Row():
919
  full_report_btn = gr.Button("전체 뢄석 보고 μš”μ•½", variant="primary")
920
  full_report_display = gr.Markdown()
921
 
 
922
  full_report_btn.click(
923
  fn=full_summary_report,
924
  outputs=full_report_display
925
  )
926
 
927
+ # μ§€μ •λœ 리슀트 (KOREAN_COMPANIES) κ°œλ³„ κΈ°μ—… 검색/좜λ ₯
928
  with gr.Column():
929
  for i in range(0, len(KOREAN_COMPANIES), 2):
930
  with gr.Row():
931
+ # μ™Όμͺ½ μ—΄
932
  with gr.Column():
933
  company = KOREAN_COMPANIES[i]
934
  with gr.Group():
 
938
  load_btn = gr.Button("좜λ ₯", variant="secondary")
939
  result_display = gr.Markdown()
940
 
 
941
  search_btn.click(
942
  fn=lambda c=company: search_company(c),
943
  outputs=result_display
944
  )
 
945
  load_btn.click(
946
  fn=lambda c=company: load_company(c),
947
  outputs=result_display
948
  )
949
 
950
+ # 였λ₯Έμͺ½ μ—΄
951
  if i + 1 < len(KOREAN_COMPANIES):
952
  with gr.Column():
953
  company = KOREAN_COMPANIES[i + 1]