ginipick commited on
Commit
9418c49
·
verified ·
1 Parent(s): 9295f92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -173
app.py CHANGED
@@ -14,7 +14,7 @@ import pathlib
14
  import sqlite3
15
  import pytz
16
 
17
- # 한국 기업 리스트
18
  KOREAN_COMPANIES = [
19
  "NVIDIA",
20
  "ALPHABET",
@@ -31,44 +31,47 @@ KOREAN_COMPANIES = [
31
  "stock",
32
  "Economics",
33
  "Finance",
34
- "investing"
35
  ]
36
 
37
  def convert_to_seoul_time(timestamp_str):
 
 
 
38
  try:
39
  dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
40
  seoul_tz = pytz.timezone('Asia/Seoul')
41
  seoul_time = seoul_tz.localize(dt)
42
  return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST')
43
  except Exception as e:
44
- print(f"시간 변환 오류: {str(e)}")
45
  return timestamp_str
46
 
47
  def analyze_sentiment_batch(articles, client):
48
  """
49
- OpenAI API를 통해 뉴스 기사들의 종합 감성 분석을 수행
50
  """
51
  try:
52
- # 모든 기사의 제목과 내용을 하나의 텍스트로 결합
53
  combined_text = "\n\n".join([
54
- f"제목: {article.get('title', '')}\n내용: {article.get('snippet', '')}"
55
  for article in articles
56
  ])
57
 
58
- prompt = f"""다음 뉴스 모음에 대해 전반적인 감성 분석을 수행하세요:
59
 
60
- 뉴스 내용:
61
  {combined_text}
62
 
63
- 다음 형식으로 분석해주세요:
64
- 1. 전반적 감성: [긍정/부정/중립]
65
- 2. 주요 긍정적 요소:
66
- - [항목1]
67
- - [항목2]
68
- 3. 주요 부정적 요소:
69
- - [항목1]
70
- - [항목2]
71
- 4. 종합 평가: [상세 설명]
72
  """
73
 
74
  response = client.chat.completions.create(
@@ -80,11 +83,14 @@ def analyze_sentiment_batch(articles, client):
80
 
81
  return response.choices[0].message.content
82
  except Exception as e:
83
- return f"감성 분석 실패: {str(e)}"
84
 
85
 
86
- # DB 초기화 함수
87
  def init_db():
 
 
 
88
  db_path = pathlib.Path("search_results.db")
89
  conn = sqlite3.connect(db_path)
90
  c = conn.cursor()
@@ -99,7 +105,7 @@ def init_db():
99
 
100
  def save_to_db(keyword, country, results):
101
  """
102
- 특정 (keyword, country) 조합에 대한 검색 결과를 DB에 저장
103
  """
104
  conn = sqlite3.connect("search_results.db")
105
  c = conn.cursor()
@@ -116,12 +122,15 @@ def save_to_db(keyword, country, results):
116
 
117
  def load_from_db(keyword, country):
118
  """
119
- 특정 (keyword, country) 조합에 대한 가장 최근 검색 결과를 DB에서 불러오기
 
120
  """
121
  conn = sqlite3.connect("search_results.db")
122
  c = conn.cursor()
123
- c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
124
- (keyword, country))
 
 
125
  result = c.fetchone()
126
  conn.close()
127
  if result:
@@ -130,85 +139,85 @@ def load_from_db(keyword, country):
130
 
131
  def display_results(articles):
132
  """
133
- 뉴스 기사 목록을 Markdown 문자열로 변환하여 반환
134
  """
135
  output = ""
136
  for idx, article in enumerate(articles, 1):
137
  output += f"### {idx}. {article['title']}\n"
138
- output += f"출처: {article['channel']}\n"
139
- output += f"시간: {article['time']}\n"
140
- output += f"링크: {article['link']}\n"
141
- output += f"요약: {article['snippet']}\n\n"
142
  return output
143
 
144
 
145
  ########################################
146
- # 1) 검색 => 기사 + 분석 동시 출력, DB 저장
147
  ########################################
148
  def search_company(company):
149
  """
150
- 단일 기업(또는 키워드) 대해 미국 뉴스 검색 후,
151
- 1) 기사 목록 + 2) 감성 분석 보고를 함께 출력
152
- => { "articles": [...], "analysis": ... } 형태로 DB에 저장
 
 
153
  """
154
  error_message, articles = serphouse_search(company, "United States")
155
  if not error_message and articles:
156
- # 감성 분석
157
  analysis = analyze_sentiment_batch(articles, client)
158
 
159
- # DB 저장용 데이터 구성
160
  store_dict = {
161
  "articles": articles,
162
  "analysis": analysis
163
  }
164
  save_to_db(company, "United States", store_dict)
165
 
166
- # 화면 출력용
167
  output = display_results(articles)
168
- output += f"\n\n### 분석 보고\n{analysis}\n"
169
  return output
170
- return f"{company}에 대한 검색 결과가 없습니다."
171
 
172
  ########################################
173
- # 2) 출력 => DB에 저장된 기사 + 분석 함께 출력
174
  ########################################
175
  def load_company(company):
176
  """
177
- DB에서 단일 기업(또는 키워드) 미국 뉴스 검색 결과를 불러와
178
- 기사 목록 + 분석 결과를 함께 출력
179
  """
180
  data, timestamp = load_from_db(company, "United States")
181
  if data:
182
- # data는 { "articles": [...], "analysis": "..."} 형태
183
  articles = data.get("articles", [])
184
  analysis = data.get("analysis", "")
185
 
186
- output = f"### {company} 검색 결과\n저장 시간: {timestamp}\n\n"
187
  output += display_results(articles)
188
- output += f"\n\n### 분석 보고\n{analysis}\n"
189
  return output
190
- return f"{company}에 대한 저장된 결과가 없습니다."
191
 
192
 
193
  ########################################
194
- # 3) 기존 show_stats()에서 리포트 제목 변경
195
  ########################################
196
  def show_stats():
197
  """
198
- KOREAN_COMPANIES 목록 모든 기업에 대해:
199
- - 가장 최근 DB 저장 일자
200
- - 기사
201
- - 감성 분석 결과
202
- 병렬처리로 조회하여 보고서 형태로 반환
203
-
204
- (문구 변경) "한국 기업 뉴스 분석 리포트" -> "EarnBOT 분석 리포트"
205
  """
206
  conn = sqlite3.connect("search_results.db")
207
  c = conn.cursor()
208
 
209
- output = "## EarnBOT 분석 리포트\n\n" # 여기서 문구 변경
210
 
211
- # 모든 기업에 대해 DB에서 읽어올 (company, timestamp, articles) 목록 수집
212
  data_list = []
213
  for company in KOREAN_COMPANIES:
214
  c.execute("""
@@ -226,7 +235,6 @@ def show_stats():
226
 
227
  conn.close()
228
 
229
- # 감성 분석 병렬 처리 함수
230
  def analyze_data(item):
231
  comp, tstamp, results_json = item
232
  data = json.loads(results_json)
@@ -234,10 +242,6 @@ def show_stats():
234
  analysis = data.get("analysis", "")
235
 
236
  count_articles = len(articles)
237
- # 여기서는 이미 DB에 "analysis"가 들어 있으므로,
238
- # 굳이 재분석할 필요가 없으면 그대로 사용
239
- # (필요 시 재분석 가능)
240
-
241
  return (comp, tstamp, count_articles, analysis)
242
 
243
  results_list = []
@@ -246,14 +250,13 @@ def show_stats():
246
  for future in as_completed(futures):
247
  results_list.append(future.result())
248
 
249
- # 결과 출력
250
  for comp, tstamp, count, analysis in results_list:
251
  seoul_time = convert_to_seoul_time(tstamp)
252
  output += f"### {comp}\n"
253
- output += f"- 마지막 업데이트: {seoul_time}\n"
254
- output += f"- 저장된 기사 수: {count}건\n\n"
255
  if analysis:
256
- output += "#### 뉴스 감성 분석\n"
257
  output += f"{analysis}\n\n"
258
  output += "---\n\n"
259
 
@@ -262,10 +265,10 @@ def show_stats():
262
 
263
  def search_all_companies():
264
  """
265
- KOREAN_COMPANIES 리스트 모든 기업 검색 (멀티스레딩) =>
266
- => 분석 + DB 저장 => 결과 Markdown 반환
267
  """
268
- overall_result = "# [전체 검색 결과]\n\n"
269
 
270
  def do_search(comp):
271
  return comp, search_company(comp)
@@ -281,10 +284,9 @@ def search_all_companies():
281
 
282
  def load_all_companies():
283
  """
284
- KOREAN_COMPANIES 리스트 모든 기업 DB 불러오기 =>
285
- 기사 목록 + 분석 보고 => 결과 Markdown
286
  """
287
- overall_result = "# [전체 출력 결과]\n\n"
288
 
289
  for comp in KOREAN_COMPANIES:
290
  overall_result += f"## {comp}\n"
@@ -294,77 +296,77 @@ def load_all_companies():
294
 
295
  def full_summary_report():
296
  """
297
- (1) 모든 기업 검색(병렬) -> (2) DB에서 불러오기 -> (3) 감성 분석 통계
298
- 순서대로 실행하여, 전체 리포트를 합쳐 반환
299
  """
300
- # 1) 전체 검색(병렬) => 기사 + 분석 DB 저장
301
  search_result_text = search_all_companies()
302
 
303
- # 2) 전체 출력 => DB에 저장된 기사 + 분석 결과
304
  load_result_text = load_all_companies()
305
 
306
- # 3) 전체 통계(감성 분석) - 리포트 제목 변경됨(EarnBOT 분석 리포트)
307
  stats_text = show_stats()
308
 
309
  combined_report = (
310
- "# 전체 분석 보고 요약\n\n"
311
- "아래 순서로 실행되었습니다:\n"
312
- "1. 모든 종목 검색(병렬) + 분석 => 2. 모든 종목 DB 결과 출력 => 3. 전체 감성 분석 통계\n\n"
313
  f"{search_result_text}\n\n"
314
  f"{load_result_text}\n\n"
315
- "## [전체 감성 분석 통계]\n\n"
316
  f"{stats_text}"
317
  )
318
  return combined_report
319
 
320
 
321
  ########################################
322
- # 사용자 임의 검색 (추가 기능)
323
  ########################################
324
  def search_custom(query, country):
325
  """
326
- 사용자가 입력한 (query, country)에 대해
327
- 1) 검색 + 분석 => DB 저장
328
- 2) DB 로드 => 결과(기사 목록 + 분석) 출력
329
  """
330
  error_message, articles = serphouse_search(query, country)
331
  if error_message:
332
- return f"오류 발생: {error_message}"
333
  if not articles:
334
- return "검색 결과가 없습니다."
335
 
336
- # 1) 분석
337
  analysis = analyze_sentiment_batch(articles, client)
338
 
339
- # 2) DB 저장
340
  save_data = {
341
  "articles": articles,
342
  "analysis": analysis
343
  }
344
  save_to_db(query, country, save_data)
345
 
346
- # 3) DB 재로드
347
  loaded_data, timestamp = load_from_db(query, country)
348
  if not loaded_data:
349
- return "DB에서 로드 실패"
350
 
351
- # 4) 결과 표시
352
- out = f"## [사용자 임의 검색 결과]\n\n"
353
- out += f"**키워드**: {query}\n\n"
354
- out += f"**국가**: {country}\n\n"
355
- out += f"**저장 시간**: {timestamp}\n\n"
356
 
357
  arts = loaded_data.get("articles", [])
358
  analy = loaded_data.get("analysis", "")
359
 
360
  out += display_results(arts)
361
- out += f"### 뉴스 감성 분석\n{analy}\n"
362
 
363
  return out
364
 
365
 
366
  ########################################
367
- # API 인증
368
  ########################################
369
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
370
  if not ACCESS_TOKEN:
@@ -379,13 +381,13 @@ API_KEY = os.getenv("SERPHOUSE_API_KEY")
379
 
380
 
381
  ########################################
382
- # 국가별 설정
383
  ########################################
384
  COUNTRY_LANGUAGES = {
385
  "United States": "en",
386
  "KOREA": "ko",
387
  "United Kingdom": "en",
388
- "Taiwan": "zh-TW",
389
  "Canada": "en",
390
  "Australia": "en",
391
  "Germany": "de",
@@ -527,7 +529,8 @@ COUNTRY_LOCATIONS = {
527
  @lru_cache(maxsize=100)
528
  def translate_query(query, country):
529
  """
530
- Google Translation API(비공식) 사용하여 검색어를 해당 국가 언어로 번역
 
531
  """
532
  try:
533
  if is_english(query):
@@ -554,20 +557,23 @@ def translate_query(query, country):
554
  response = session.get(url, params=params, timeout=(5, 10))
555
  translated_text = response.json()[0][0][0]
556
  return translated_text
557
-
558
  return query
559
 
560
  except Exception as e:
561
- print(f"번역 오류: {str(e)}")
562
  return query
563
 
564
  def is_english(text):
 
 
 
565
  return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
566
 
567
  def search_serphouse(query, country, page=1, num_result=10):
568
  """
569
- SerpHouse API에 실시간 검색 요청을 보내어,
570
- '뉴스' (sort_by=date)에서 해당 query에 대한 기사 목록을 가져온다.
 
571
  """
572
  url = "https://api.serphouse.com/serp/live"
573
 
@@ -624,24 +630,23 @@ def search_serphouse(query, country, page=1, num_result=10):
624
 
625
  except requests.exceptions.Timeout:
626
  return {
627
- "error": "검색 시간이 초과되었습니다. 잠시 다시 시도해주세요.",
628
  "translated_query": query
629
  }
630
  except requests.exceptions.RequestException as e:
631
  return {
632
- "error": f"검색 오류가 발생했습니다: {str(e)}",
633
  "translated_query": query
634
  }
635
  except Exception as e:
636
  return {
637
- "error": f"예기치 않은 오류가 발생했습니다: {str(e)}",
638
  "translated_query": query
639
  }
640
 
641
  def format_results_from_raw(response_data):
642
  """
643
- SerpHouse API 응답 데이터를 가공하여,
644
- (에러메시지, 기사리스트) 형태로 반환.
645
  """
646
  if "error" in response_data:
647
  return "Error: " + response_data["error"], []
@@ -650,12 +655,11 @@ def format_results_from_raw(response_data):
650
  results = response_data["results"]
651
  translated_query = response_data["translated_query"]
652
 
653
- # 실제 뉴스 결과
654
  news_results = results.get('results', {}).get('results', {}).get('news', [])
655
  if not news_results:
656
- return "검색 결과가 없습니다.", []
657
 
658
- # 한국 도메인 한국 관련 키워드 포함 기사 제외
659
  korean_domains = [
660
  '.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
661
  'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
@@ -676,37 +680,45 @@ def format_results_from_raw(response_data):
676
  any(keyword in title for keyword in korean_keywords)
677
  )
678
 
679
- # 한국어 뉴스(또는 한국 도메인) 제외
680
  if not is_korean_content:
681
  filtered_articles.append({
682
  "index": idx,
683
- "title": result.get("title", "제목 없음"),
684
  "link": url,
685
- "snippet": result.get("snippet", "내용 없음"),
686
- "channel": result.get("channel", result.get("source", "알 수 없음")),
687
- "time": result.get("time", result.get("date", " 수 없는 시간")),
688
  "image_url": result.get("img", result.get("thumbnail", "")),
689
  "translated_query": translated_query
690
  })
691
 
692
  return "", filtered_articles
693
  except Exception as e:
694
- return f"결과 처리 오류 발생: {str(e)}", []
695
 
696
  def serphouse_search(query, country):
697
  """
698
- 검색 결과 포매팅까지 일괄 처리
 
699
  """
700
  response_data = search_serphouse(query, country)
701
  return format_results_from_raw(response_data)
702
 
703
 
704
- # CSS (UI 커스터마이징)
705
  css = """
706
- /* 전역 스타일 */
707
- footer {visibility: hidden;}
 
 
708
 
709
- /* 레이아웃 컨테이너 */
 
 
 
 
 
710
  #status_area {
711
  background: rgba(255, 255, 255, 0.9);
712
  padding: 15px;
@@ -715,12 +727,13 @@ footer {visibility: hidden;}
715
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
716
  }
717
 
 
718
  #results_area {
719
  padding: 10px;
720
  margin-top: 10px;
721
  }
722
 
723
- /* 스타일 */
724
  .tabs {
725
  border-bottom: 2px solid #ddd !important;
726
  margin-bottom: 20px !important;
@@ -741,35 +754,42 @@ footer {visibility: hidden;}
741
  color: #1f77b4 !important;
742
  }
743
 
744
- /* 상태 메시지 */
745
  #status_area .markdown-text {
746
  font-size: 1.1em;
747
  color: #2c3e50;
748
  padding: 10px 0;
749
  }
750
 
751
- /* 기본 컨테이너 */
752
  .group {
753
  border: 1px solid #eee;
754
  padding: 15px;
755
  margin-bottom: 15px;
756
  border-radius: 5px;
757
  background: white;
 
 
 
 
 
 
 
758
  }
759
 
760
- /* 버튼 스타일 */
761
  .primary-btn {
762
  background: #1f77b4 !important;
763
  border: none !important;
764
  }
765
 
766
- /* 입력 필드 */
767
  .textbox {
768
  border: 1px solid #ddd !important;
769
  border-radius: 4px !important;
770
  }
771
 
772
- /* 프로그레스바 컨테이너 */
773
  .progress-container {
774
  position: fixed;
775
  top: 0;
@@ -780,7 +800,7 @@ footer {visibility: hidden;}
780
  z-index: 1000;
781
  }
782
 
783
- /* 프로그레스bar */
784
  .progress-bar {
785
  height: 100%;
786
  background: linear-gradient(90deg, #2196F3, #00BCD4);
@@ -789,7 +809,7 @@ footer {visibility: hidden;}
789
  animation: progress-glow 1.5s ease-in-out infinite;
790
  }
791
 
792
- /* 프로그레스 텍스트 */
793
  .progress-text {
794
  position: fixed;
795
  top: 8px;
@@ -804,7 +824,7 @@ footer {visibility: hidden;}
804
  box-shadow: 0 2px 5px rgba(0,0,0,0.2);
805
  }
806
 
807
- /* 프로그레스바 애니메이션 */
808
  @keyframes progress-glow {
809
  0% {
810
  box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
@@ -817,7 +837,14 @@ footer {visibility: hidden;}
817
  }
818
  }
819
 
820
- /* 반응형 디자인 */
 
 
 
 
 
 
 
821
  @media (max-width: 768px) {
822
  .group {
823
  padding: 10px;
@@ -830,26 +857,7 @@ footer {visibility: hidden;}
830
  }
831
  }
832
 
833
- /* 로딩 상태 표시 개선 */
834
- .loading {
835
- opacity: 0.7;
836
- pointer-events: none;
837
- transition: opacity 0.3s ease;
838
- }
839
-
840
- /* 결과 컨테이너 애니메이션 */
841
- .group {
842
- transition: all 0.3s ease;
843
- opacity: 0;
844
- transform: translateY(20px);
845
- }
846
-
847
- .group.visible {
848
- opacity: 1;
849
- transform: translateY(0);
850
- }
851
-
852
- /* Examples 스타일링 */
853
  .examples-table {
854
  margin-top: 10px !important;
855
  margin-bottom: 20px !important;
@@ -877,34 +885,35 @@ footer {visibility: hidden;}
877
  }
878
  """
879
 
880
- import gradio as gr
881
-
882
- with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface:
883
  init_db()
884
 
885
  with gr.Tabs():
886
- # 첫 번째 탭
887
- with gr.Tab("Earnbot"):
888
- gr.Markdown("## EarnBot: 글로벌 빅테크 기업 및 투자 전망 AI 자동 분석")
889
- gr.Markdown(" * '전체 분석 보고 요약' 클릭 전체 자동 보고 생성.\n * 아래 개별 종목의 '검색(DB 자동 저장)'과 '출력(DB 자동 호출)'도 가능.\n * 추가로, 원하는 임의 키워드 및 국가로 검색/분석할 수도 있습니다.")
 
 
 
890
 
891
- # (사용자 임의 검색 섹션)
892
  with gr.Group():
893
- gr.Markdown("### 사용자 임의 검색")
894
  with gr.Row():
895
  with gr.Column():
896
  user_input = gr.Textbox(
897
- label="검색어 입력",
898
- placeholder="예) Apple, Samsung 등 자유롭게"
899
  )
900
  with gr.Column():
901
  country_selection = gr.Dropdown(
902
  choices=list(COUNTRY_LOCATIONS.keys()),
903
  value="United States",
904
- label="국가 선택"
905
  )
906
  with gr.Column():
907
- custom_search_btn = gr.Button("실행", variant="primary")
908
 
909
  custom_search_output = gr.Markdown()
910
 
@@ -914,9 +923,9 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비
914
  outputs=custom_search_output
915
  )
916
 
917
- # 전체 분석 보고 요약 버튼
918
  with gr.Row():
919
- full_report_btn = gr.Button("전체 분석 보고 요약", variant="primary")
920
  full_report_display = gr.Markdown()
921
 
922
  full_report_btn.click(
@@ -924,18 +933,18 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비
924
  outputs=full_report_display
925
  )
926
 
927
- # 지정된 리스트 (KOREAN_COMPANIES) 개별 기업 검색/출력
928
  with gr.Column():
929
  for i in range(0, len(KOREAN_COMPANIES), 2):
930
  with gr.Row():
931
- # 왼쪽
932
  with gr.Column():
933
  company = KOREAN_COMPANIES[i]
934
  with gr.Group():
935
  gr.Markdown(f"### {company}")
936
  with gr.Row():
937
- search_btn = gr.Button("검색", variant="primary")
938
- load_btn = gr.Button("출력", variant="secondary")
939
  result_display = gr.Markdown()
940
 
941
  search_btn.click(
@@ -947,15 +956,15 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비
947
  outputs=result_display
948
  )
949
 
950
- # 오른쪽
951
  if i + 1 < len(KOREAN_COMPANIES):
952
  with gr.Column():
953
  company = KOREAN_COMPANIES[i + 1]
954
  with gr.Group():
955
  gr.Markdown(f"### {company}")
956
  with gr.Row():
957
- search_btn = gr.Button("검색", variant="primary")
958
- load_btn = gr.Button("출력", variant="secondary")
959
  result_display = gr.Markdown()
960
 
961
  search_btn.click(
 
14
  import sqlite3
15
  import pytz
16
 
17
+ # List of target companies/keywords
18
  KOREAN_COMPANIES = [
19
  "NVIDIA",
20
  "ALPHABET",
 
31
  "stock",
32
  "Economics",
33
  "Finance",
34
+ "investing"
35
  ]
36
 
37
  def convert_to_seoul_time(timestamp_str):
38
+ """
39
+ Convert a given timestamp string (UTC) to Seoul time (KST).
40
+ """
41
  try:
42
  dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
43
  seoul_tz = pytz.timezone('Asia/Seoul')
44
  seoul_time = seoul_tz.localize(dt)
45
  return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST')
46
  except Exception as e:
47
+ print(f"Time conversion error: {str(e)}")
48
  return timestamp_str
49
 
50
  def analyze_sentiment_batch(articles, client):
51
  """
52
+ Perform a comprehensive sentiment analysis of the news articles using the OpenAI API.
53
  """
54
  try:
55
+ # Combine all articles into a single text
56
  combined_text = "\n\n".join([
57
+ f"Title: {article.get('title', '')}\nContent: {article.get('snippet', '')}"
58
  for article in articles
59
  ])
60
 
61
+ prompt = f"""Please perform an overall sentiment analysis of the following collection of news articles:
62
 
63
+ News content:
64
  {combined_text}
65
 
66
+ Please follow this format:
67
+ 1. Overall Sentiment: [Positive/Negative/Neutral]
68
+ 2. Key Positive Factors:
69
+ - [Item1]
70
+ - [Item2]
71
+ 3. Key Negative Factors:
72
+ - [Item1]
73
+ - [Item2]
74
+ 4. Summary: [Detailed explanation]
75
  """
76
 
77
  response = client.chat.completions.create(
 
83
 
84
  return response.choices[0].message.content
85
  except Exception as e:
86
+ return f"Sentiment analysis failed: {str(e)}"
87
 
88
 
89
+ # Initialize the database
90
  def init_db():
91
+ """
92
+ Initialize the SQLite database (search_results.db) if it doesn't already exist.
93
+ """
94
  db_path = pathlib.Path("search_results.db")
95
  conn = sqlite3.connect(db_path)
96
  c = conn.cursor()
 
105
 
106
  def save_to_db(keyword, country, results):
107
  """
108
+ Save the search results for a specific (keyword, country) combination into the database.
109
  """
110
  conn = sqlite3.connect("search_results.db")
111
  c = conn.cursor()
 
122
 
123
  def load_from_db(keyword, country):
124
  """
125
+ Load the most recent search results for a specific (keyword, country) combination from the database.
126
+ Returns the data and the timestamp.
127
  """
128
  conn = sqlite3.connect("search_results.db")
129
  c = conn.cursor()
130
+ c.execute(
131
+ "SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
132
+ (keyword, country)
133
+ )
134
  result = c.fetchone()
135
  conn.close()
136
  if result:
 
139
 
140
  def display_results(articles):
141
  """
142
+ Convert a list of news articles into a Markdown string for display.
143
  """
144
  output = ""
145
  for idx, article in enumerate(articles, 1):
146
  output += f"### {idx}. {article['title']}\n"
147
+ output += f"Source: {article['channel']}\n"
148
+ output += f"Time: {article['time']}\n"
149
+ output += f"Link: {article['link']}\n"
150
+ output += f"Summary: {article['snippet']}\n\n"
151
  return output
152
 
153
 
154
  ########################################
155
+ # 1) Search => Articles + Analysis, then save to DB
156
  ########################################
157
  def search_company(company):
158
  """
159
+ For a single company (or keyword), search US news.
160
+ 1) Retrieve a list of articles
161
+ 2) Perform sentiment analysis
162
+ 3) Save results to DB
163
+ 4) Return (articles + analysis) in a single output.
164
  """
165
  error_message, articles = serphouse_search(company, "United States")
166
  if not error_message and articles:
167
+ # Perform sentiment analysis
168
  analysis = analyze_sentiment_batch(articles, client)
169
 
170
+ # Prepare data to save in DB
171
  store_dict = {
172
  "articles": articles,
173
  "analysis": analysis
174
  }
175
  save_to_db(company, "United States", store_dict)
176
 
177
+ # Prepare output for display
178
  output = display_results(articles)
179
+ output += f"\n\n### Analysis Report\n{analysis}\n"
180
  return output
181
+ return f"No search results found for {company}."
182
 
183
  ########################################
184
+ # 2) Load => Return articles + analysis from DB
185
  ########################################
186
  def load_company(company):
187
  """
188
+ Load the most recent US news search results for the given company (or keyword) from the database,
189
+ and return the articles + analysis in a single output.
190
  """
191
  data, timestamp = load_from_db(company, "United States")
192
  if data:
 
193
  articles = data.get("articles", [])
194
  analysis = data.get("analysis", "")
195
 
196
+ output = f"### {company} Search Results\nLast Updated: {timestamp}\n\n"
197
  output += display_results(articles)
198
+ output += f"\n\n### Analysis Report\n{analysis}\n"
199
  return output
200
+ return f"No saved results for {company}."
201
 
202
 
203
  ########################################
204
+ # 3) Updated show_stats() with new title
205
  ########################################
206
  def show_stats():
207
  """
208
+ For each company in KOREAN_COMPANIES:
209
+ - Retrieve the most recent timestamp in DB
210
+ - Number of articles
211
+ - Sentiment analysis result
212
+ Return these in a report format.
213
+
214
+ Title changed to: "EarnBOT Analysis Report"
215
  """
216
  conn = sqlite3.connect("search_results.db")
217
  c = conn.cursor()
218
 
219
+ output = "## EarnBOT Analysis Report\n\n"
220
 
 
221
  data_list = []
222
  for company in KOREAN_COMPANIES:
223
  c.execute("""
 
235
 
236
  conn.close()
237
 
 
238
  def analyze_data(item):
239
  comp, tstamp, results_json = item
240
  data = json.loads(results_json)
 
242
  analysis = data.get("analysis", "")
243
 
244
  count_articles = len(articles)
 
 
 
 
245
  return (comp, tstamp, count_articles, analysis)
246
 
247
  results_list = []
 
250
  for future in as_completed(futures):
251
  results_list.append(future.result())
252
 
 
253
  for comp, tstamp, count, analysis in results_list:
254
  seoul_time = convert_to_seoul_time(tstamp)
255
  output += f"### {comp}\n"
256
+ output += f"- Last updated: {seoul_time}\n"
257
+ output += f"- Number of articles stored: {count}\n\n"
258
  if analysis:
259
+ output += "#### News Sentiment Analysis\n"
260
  output += f"{analysis}\n\n"
261
  output += "---\n\n"
262
 
 
265
 
266
  def search_all_companies():
267
  """
268
+ Search all companies in KOREAN_COMPANIES (in parallel),
269
+ perform sentiment analysis + save to DB => return Markdown of all results.
270
  """
271
+ overall_result = "# [Search Results for All Companies]\n\n"
272
 
273
  def do_search(comp):
274
  return comp, search_company(comp)
 
284
 
285
  def load_all_companies():
286
  """
287
+ Load articles + analysis for all companies in KOREAN_COMPANIES from the DB => return Markdown.
 
288
  """
289
+ overall_result = "# [All Companies Data Output]\n\n"
290
 
291
  for comp in KOREAN_COMPANIES:
292
  overall_result += f"## {comp}\n"
 
296
 
297
  def full_summary_report():
298
  """
299
+ 1) Search all companies (in parallel) -> 2) Load results -> 3) Show sentiment analysis stats
300
+ Return a combined report with all three steps.
301
  """
302
+ # 1) Search all companies => store to DB
303
  search_result_text = search_all_companies()
304
 
305
+ # 2) Load all results => from DB
306
  load_result_text = load_all_companies()
307
 
308
+ # 3) Show stats => EarnBOT Analysis Report
309
  stats_text = show_stats()
310
 
311
  combined_report = (
312
+ "# Full Analysis Summary Report\n\n"
313
+ "Executed in the following order:\n"
314
+ "1. Search all companies (parallel) + sentiment analysis => 2. Load results from DB => 3. Show overall sentiment analysis stats\n\n"
315
  f"{search_result_text}\n\n"
316
  f"{load_result_text}\n\n"
317
+ "## [Overall Sentiment Analysis Stats]\n\n"
318
  f"{stats_text}"
319
  )
320
  return combined_report
321
 
322
 
323
  ########################################
324
+ # Additional feature: User custom search
325
  ########################################
326
  def search_custom(query, country):
327
  """
328
+ For a user-provided (query, country):
329
+ 1) Search + sentiment analysis => save to DB
330
+ 2) Load from DB => display articles + analysis
331
  """
332
  error_message, articles = serphouse_search(query, country)
333
  if error_message:
334
+ return f"An error occurred: {error_message}"
335
  if not articles:
336
+ return "No results were found for your query."
337
 
338
+ # 1) Perform analysis
339
  analysis = analyze_sentiment_batch(articles, client)
340
 
341
+ # 2) Save to DB
342
  save_data = {
343
  "articles": articles,
344
  "analysis": analysis
345
  }
346
  save_to_db(query, country, save_data)
347
 
348
+ # 3) Reload from DB
349
  loaded_data, timestamp = load_from_db(query, country)
350
  if not loaded_data:
351
+ return "Failed to load data from DB."
352
 
353
+ # 4) Prepare final output
354
+ out = f"## [Custom Search Results]\n\n"
355
+ out += f"**Keyword**: {query}\n\n"
356
+ out += f"**Country**: {country}\n\n"
357
+ out += f"**Timestamp**: {timestamp}\n\n"
358
 
359
  arts = loaded_data.get("articles", [])
360
  analy = loaded_data.get("analysis", "")
361
 
362
  out += display_results(arts)
363
+ out += f"### News Sentiment Analysis\n{analy}\n"
364
 
365
  return out
366
 
367
 
368
  ########################################
369
+ # API Authentication
370
  ########################################
371
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
372
  if not ACCESS_TOKEN:
 
381
 
382
 
383
  ########################################
384
+ # Country-specific settings
385
  ########################################
386
  COUNTRY_LANGUAGES = {
387
  "United States": "en",
388
  "KOREA": "ko",
389
  "United Kingdom": "en",
390
+ "Taiwan": "zh-TW",
391
  "Canada": "en",
392
  "Australia": "en",
393
  "Germany": "de",
 
529
  @lru_cache(maxsize=100)
530
  def translate_query(query, country):
531
  """
532
+ Use the unofficial Google Translation API to translate the query into the target country's language.
533
+ If the query is already in English, or if translation fails, return the original query.
534
  """
535
  try:
536
  if is_english(query):
 
557
  response = session.get(url, params=params, timeout=(5, 10))
558
  translated_text = response.json()[0][0][0]
559
  return translated_text
 
560
  return query
561
 
562
  except Exception as e:
563
+ print(f"Translation error: {str(e)}")
564
  return query
565
 
566
  def is_english(text):
567
+ """
568
+ Check if a string is (mostly) English by verifying character code ranges.
569
+ """
570
  return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
571
 
572
  def search_serphouse(query, country, page=1, num_result=10):
573
  """
574
+ Send a real-time search request to the SerpHouse API,
575
+ specifying the 'news' tab (sort_by=date) for the given query.
576
+ Returns a dict with 'results' or 'error'.
577
  """
578
  url = "https://api.serphouse.com/serp/live"
579
 
 
630
 
631
  except requests.exceptions.Timeout:
632
  return {
633
+ "error": "Search timed out. Please try again later.",
634
  "translated_query": query
635
  }
636
  except requests.exceptions.RequestException as e:
637
  return {
638
+ "error": f"Error during search: {str(e)}",
639
  "translated_query": query
640
  }
641
  except Exception as e:
642
  return {
643
+ "error": f"Unexpected error occurred: {str(e)}",
644
  "translated_query": query
645
  }
646
 
647
  def format_results_from_raw(response_data):
648
  """
649
+ Process the SerpHouse API response data and return (error_message, article_list).
 
650
  """
651
  if "error" in response_data:
652
  return "Error: " + response_data["error"], []
 
655
  results = response_data["results"]
656
  translated_query = response_data["translated_query"]
657
 
 
658
  news_results = results.get('results', {}).get('results', {}).get('news', [])
659
  if not news_results:
660
+ return "No search results found.", []
661
 
662
+ # Filter out Korean domains and Korean keywords (example filtering)
663
  korean_domains = [
664
  '.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
665
  'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
 
680
  any(keyword in title for keyword in korean_keywords)
681
  )
682
 
683
+ # Exclude Korean content
684
  if not is_korean_content:
685
  filtered_articles.append({
686
  "index": idx,
687
+ "title": result.get("title", "No Title"),
688
  "link": url,
689
+ "snippet": result.get("snippet", "No Content"),
690
+ "channel": result.get("channel", result.get("source", "Unknown")),
691
+ "time": result.get("time", result.get("date", "Unknown Time")),
692
  "image_url": result.get("img", result.get("thumbnail", "")),
693
  "translated_query": translated_query
694
  })
695
 
696
  return "", filtered_articles
697
  except Exception as e:
698
+ return f"Error processing results: {str(e)}", []
699
 
700
  def serphouse_search(query, country):
701
  """
702
+ Helper function to search and then format results.
703
+ Returns (error_message, article_list).
704
  """
705
  response_data = search_serphouse(query, country)
706
  return format_results_from_raw(response_data)
707
 
708
 
709
+ # Updated CSS for more visual appeal and readability
710
  css = """
711
+ body {
712
+ background: linear-gradient(to bottom right, #ffffff, #e6f7ff);
713
+ font-family: 'Arial', sans-serif;
714
+ }
715
 
716
+ /* Hide default Gradio footer */
717
+ footer {
718
+ visibility: hidden;
719
+ }
720
+
721
+ /* Header/Status area */
722
  #status_area {
723
  background: rgba(255, 255, 255, 0.9);
724
  padding: 15px;
 
727
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
728
  }
729
 
730
+ /* Results area */
731
  #results_area {
732
  padding: 10px;
733
  margin-top: 10px;
734
  }
735
 
736
+ /* Tabs style */
737
  .tabs {
738
  border-bottom: 2px solid #ddd !important;
739
  margin-bottom: 20px !important;
 
754
  color: #1f77b4 !important;
755
  }
756
 
757
+ /* Status message styling */
758
  #status_area .markdown-text {
759
  font-size: 1.1em;
760
  color: #2c3e50;
761
  padding: 10px 0;
762
  }
763
 
764
+ /* Main container grouping */
765
  .group {
766
  border: 1px solid #eee;
767
  padding: 15px;
768
  margin-bottom: 15px;
769
  border-radius: 5px;
770
  background: white;
771
+ transition: all 0.3s ease;
772
+ opacity: 0;
773
+ transform: translateY(20px);
774
+ }
775
+ .group.visible {
776
+ opacity: 1;
777
+ transform: translateY(0);
778
  }
779
 
780
+ /* Buttons */
781
  .primary-btn {
782
  background: #1f77b4 !important;
783
  border: none !important;
784
  }
785
 
786
+ /* Input fields */
787
  .textbox {
788
  border: 1px solid #ddd !important;
789
  border-radius: 4px !important;
790
  }
791
 
792
+ /* Progress bar container */
793
  .progress-container {
794
  position: fixed;
795
  top: 0;
 
800
  z-index: 1000;
801
  }
802
 
803
+ /* Progress bar itself */
804
  .progress-bar {
805
  height: 100%;
806
  background: linear-gradient(90deg, #2196F3, #00BCD4);
 
809
  animation: progress-glow 1.5s ease-in-out infinite;
810
  }
811
 
812
+ /* Progress text */
813
  .progress-text {
814
  position: fixed;
815
  top: 8px;
 
824
  box-shadow: 0 2px 5px rgba(0,0,0,0.2);
825
  }
826
 
827
+ /* Progress bar animation */
828
  @keyframes progress-glow {
829
  0% {
830
  box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
 
837
  }
838
  }
839
 
840
+ /* Loading state */
841
+ .loading {
842
+ opacity: 0.7;
843
+ pointer-events: none;
844
+ transition: opacity 0.3s ease;
845
+ }
846
+
847
+ /* Responsive design for smaller screens */
848
  @media (max-width: 768px) {
849
  .group {
850
  padding: 10px;
 
857
  }
858
  }
859
 
860
+ /* Example section styling */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
861
  .examples-table {
862
  margin-top: 10px !important;
863
  margin-bottom: 20px !important;
 
885
  }
886
  """
887
 
888
+ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI Service") as iface:
 
 
889
  init_db()
890
 
891
  with gr.Tabs():
892
+ with gr.Tab("EarnBot"):
893
+ gr.Markdown("## EarnBot: AI-powered Analysis of Global Big Tech Companies and Investment Outlook")
894
+ gr.Markdown(
895
+ " * Click on 'Generate Full Analysis Summary Report' to create a comprehensive automated report.\n"
896
+ " * You can also 'Search (automatically save to DB)' and 'Load from DB (automatically retrieve)' for each listed company.\n"
897
+ " * Additionally, feel free to search/analyze any custom keyword in your chosen country."
898
+ )
899
 
900
+ # User custom search section
901
  with gr.Group():
902
+ gr.Markdown("### Custom Search")
903
  with gr.Row():
904
  with gr.Column():
905
  user_input = gr.Textbox(
906
+ label="Enter your keyword",
907
+ placeholder="e.g., Apple, Samsung, etc."
908
  )
909
  with gr.Column():
910
  country_selection = gr.Dropdown(
911
  choices=list(COUNTRY_LOCATIONS.keys()),
912
  value="United States",
913
+ label="Select Country"
914
  )
915
  with gr.Column():
916
+ custom_search_btn = gr.Button("Search", variant="primary")
917
 
918
  custom_search_output = gr.Markdown()
919
 
 
923
  outputs=custom_search_output
924
  )
925
 
926
+ # Button to generate a full report
927
  with gr.Row():
928
+ full_report_btn = gr.Button("Generate Full Analysis Summary Report", variant="primary")
929
  full_report_display = gr.Markdown()
930
 
931
  full_report_btn.click(
 
933
  outputs=full_report_display
934
  )
935
 
936
+ # Individual search/load for companies in KOREAN_COMPANIES
937
  with gr.Column():
938
  for i in range(0, len(KOREAN_COMPANIES), 2):
939
  with gr.Row():
940
+ # Left column
941
  with gr.Column():
942
  company = KOREAN_COMPANIES[i]
943
  with gr.Group():
944
  gr.Markdown(f"### {company}")
945
  with gr.Row():
946
+ search_btn = gr.Button("Search", variant="primary")
947
+ load_btn = gr.Button("Load from DB", variant="secondary")
948
  result_display = gr.Markdown()
949
 
950
  search_btn.click(
 
956
  outputs=result_display
957
  )
958
 
959
+ # Right column (if exists)
960
  if i + 1 < len(KOREAN_COMPANIES):
961
  with gr.Column():
962
  company = KOREAN_COMPANIES[i + 1]
963
  with gr.Group():
964
  gr.Markdown(f"### {company}")
965
  with gr.Row():
966
+ search_btn = gr.Button("Search", variant="primary")
967
+ load_btn = gr.Button("Load from DB", variant="secondary")
968
  result_display = gr.Markdown()
969
 
970
  search_btn.click(