Spaces:
Building
Building
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ import pathlib
|
|
14 |
import sqlite3
|
15 |
import pytz
|
16 |
|
17 |
-
#
|
18 |
KOREAN_COMPANIES = [
|
19 |
"NVIDIA",
|
20 |
"ALPHABET",
|
@@ -31,44 +31,47 @@ KOREAN_COMPANIES = [
|
|
31 |
"stock",
|
32 |
"Economics",
|
33 |
"Finance",
|
34 |
-
"investing"
|
35 |
]
|
36 |
|
37 |
def convert_to_seoul_time(timestamp_str):
|
|
|
|
|
|
|
38 |
try:
|
39 |
dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
40 |
seoul_tz = pytz.timezone('Asia/Seoul')
|
41 |
seoul_time = seoul_tz.localize(dt)
|
42 |
return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST')
|
43 |
except Exception as e:
|
44 |
-
print(f"
|
45 |
return timestamp_str
|
46 |
|
47 |
def analyze_sentiment_batch(articles, client):
|
48 |
"""
|
49 |
-
|
50 |
"""
|
51 |
try:
|
52 |
-
#
|
53 |
combined_text = "\n\n".join([
|
54 |
-
f"
|
55 |
for article in articles
|
56 |
])
|
57 |
|
58 |
-
prompt = f"""
|
59 |
|
60 |
-
|
61 |
{combined_text}
|
62 |
|
63 |
-
|
64 |
-
1.
|
65 |
-
2.
|
66 |
-
- [
|
67 |
-
- [
|
68 |
-
3.
|
69 |
-
- [
|
70 |
-
- [
|
71 |
-
4.
|
72 |
"""
|
73 |
|
74 |
response = client.chat.completions.create(
|
@@ -80,11 +83,14 @@ def analyze_sentiment_batch(articles, client):
|
|
80 |
|
81 |
return response.choices[0].message.content
|
82 |
except Exception as e:
|
83 |
-
return f"
|
84 |
|
85 |
|
86 |
-
#
|
87 |
def init_db():
|
|
|
|
|
|
|
88 |
db_path = pathlib.Path("search_results.db")
|
89 |
conn = sqlite3.connect(db_path)
|
90 |
c = conn.cursor()
|
@@ -99,7 +105,7 @@ def init_db():
|
|
99 |
|
100 |
def save_to_db(keyword, country, results):
|
101 |
"""
|
102 |
-
|
103 |
"""
|
104 |
conn = sqlite3.connect("search_results.db")
|
105 |
c = conn.cursor()
|
@@ -116,12 +122,15 @@ def save_to_db(keyword, country, results):
|
|
116 |
|
117 |
def load_from_db(keyword, country):
|
118 |
"""
|
119 |
-
|
|
|
120 |
"""
|
121 |
conn = sqlite3.connect("search_results.db")
|
122 |
c = conn.cursor()
|
123 |
-
c.execute(
|
124 |
-
|
|
|
|
|
125 |
result = c.fetchone()
|
126 |
conn.close()
|
127 |
if result:
|
@@ -130,85 +139,85 @@ def load_from_db(keyword, country):
|
|
130 |
|
131 |
def display_results(articles):
|
132 |
"""
|
133 |
-
|
134 |
"""
|
135 |
output = ""
|
136 |
for idx, article in enumerate(articles, 1):
|
137 |
output += f"### {idx}. {article['title']}\n"
|
138 |
-
output += f"
|
139 |
-
output += f"
|
140 |
-
output += f"
|
141 |
-
output += f"
|
142 |
return output
|
143 |
|
144 |
|
145 |
########################################
|
146 |
-
# 1)
|
147 |
########################################
|
148 |
def search_company(company):
|
149 |
"""
|
150 |
-
|
151 |
-
1)
|
152 |
-
|
|
|
|
|
153 |
"""
|
154 |
error_message, articles = serphouse_search(company, "United States")
|
155 |
if not error_message and articles:
|
156 |
-
#
|
157 |
analysis = analyze_sentiment_batch(articles, client)
|
158 |
|
159 |
-
#
|
160 |
store_dict = {
|
161 |
"articles": articles,
|
162 |
"analysis": analysis
|
163 |
}
|
164 |
save_to_db(company, "United States", store_dict)
|
165 |
|
166 |
-
#
|
167 |
output = display_results(articles)
|
168 |
-
output += f"\n\n###
|
169 |
return output
|
170 |
-
return f"
|
171 |
|
172 |
########################################
|
173 |
-
# 2)
|
174 |
########################################
|
175 |
def load_company(company):
|
176 |
"""
|
177 |
-
|
178 |
-
|
179 |
"""
|
180 |
data, timestamp = load_from_db(company, "United States")
|
181 |
if data:
|
182 |
-
# data는 { "articles": [...], "analysis": "..."} 형태
|
183 |
articles = data.get("articles", [])
|
184 |
analysis = data.get("analysis", "")
|
185 |
|
186 |
-
output = f"### {company}
|
187 |
output += display_results(articles)
|
188 |
-
output += f"\n\n###
|
189 |
return output
|
190 |
-
return f"
|
191 |
|
192 |
|
193 |
########################################
|
194 |
-
# 3)
|
195 |
########################################
|
196 |
def show_stats():
|
197 |
"""
|
198 |
-
|
199 |
-
-
|
200 |
-
-
|
201 |
-
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
"""
|
206 |
conn = sqlite3.connect("search_results.db")
|
207 |
c = conn.cursor()
|
208 |
|
209 |
-
output = "## EarnBOT
|
210 |
|
211 |
-
# 모든 기업에 대해 DB에서 읽어올 (company, timestamp, articles) 목록 수집
|
212 |
data_list = []
|
213 |
for company in KOREAN_COMPANIES:
|
214 |
c.execute("""
|
@@ -226,7 +235,6 @@ def show_stats():
|
|
226 |
|
227 |
conn.close()
|
228 |
|
229 |
-
# 감성 분석 병렬 처리 함수
|
230 |
def analyze_data(item):
|
231 |
comp, tstamp, results_json = item
|
232 |
data = json.loads(results_json)
|
@@ -234,10 +242,6 @@ def show_stats():
|
|
234 |
analysis = data.get("analysis", "")
|
235 |
|
236 |
count_articles = len(articles)
|
237 |
-
# 여기서는 이미 DB에 "analysis"가 들어 있으므로,
|
238 |
-
# 굳이 재분석할 필요가 없으면 그대로 사용
|
239 |
-
# (필요 시 재분석 가능)
|
240 |
-
|
241 |
return (comp, tstamp, count_articles, analysis)
|
242 |
|
243 |
results_list = []
|
@@ -246,14 +250,13 @@ def show_stats():
|
|
246 |
for future in as_completed(futures):
|
247 |
results_list.append(future.result())
|
248 |
|
249 |
-
# 결과 출력
|
250 |
for comp, tstamp, count, analysis in results_list:
|
251 |
seoul_time = convert_to_seoul_time(tstamp)
|
252 |
output += f"### {comp}\n"
|
253 |
-
output += f"-
|
254 |
-
output += f"-
|
255 |
if analysis:
|
256 |
-
output += "####
|
257 |
output += f"{analysis}\n\n"
|
258 |
output += "---\n\n"
|
259 |
|
@@ -262,10 +265,10 @@ def show_stats():
|
|
262 |
|
263 |
def search_all_companies():
|
264 |
"""
|
265 |
-
|
266 |
-
|
267 |
"""
|
268 |
-
overall_result = "# [
|
269 |
|
270 |
def do_search(comp):
|
271 |
return comp, search_company(comp)
|
@@ -281,10 +284,9 @@ def search_all_companies():
|
|
281 |
|
282 |
def load_all_companies():
|
283 |
"""
|
284 |
-
|
285 |
-
기사 목록 + 분석 보고 => 결과 Markdown
|
286 |
"""
|
287 |
-
overall_result = "# [
|
288 |
|
289 |
for comp in KOREAN_COMPANIES:
|
290 |
overall_result += f"## {comp}\n"
|
@@ -294,77 +296,77 @@ def load_all_companies():
|
|
294 |
|
295 |
def full_summary_report():
|
296 |
"""
|
297 |
-
|
298 |
-
|
299 |
"""
|
300 |
-
# 1)
|
301 |
search_result_text = search_all_companies()
|
302 |
|
303 |
-
# 2)
|
304 |
load_result_text = load_all_companies()
|
305 |
|
306 |
-
# 3)
|
307 |
stats_text = show_stats()
|
308 |
|
309 |
combined_report = (
|
310 |
-
"#
|
311 |
-
"
|
312 |
-
"1.
|
313 |
f"{search_result_text}\n\n"
|
314 |
f"{load_result_text}\n\n"
|
315 |
-
"## [
|
316 |
f"{stats_text}"
|
317 |
)
|
318 |
return combined_report
|
319 |
|
320 |
|
321 |
########################################
|
322 |
-
#
|
323 |
########################################
|
324 |
def search_custom(query, country):
|
325 |
"""
|
326 |
-
|
327 |
-
1)
|
328 |
-
2) DB
|
329 |
"""
|
330 |
error_message, articles = serphouse_search(query, country)
|
331 |
if error_message:
|
332 |
-
return f"
|
333 |
if not articles:
|
334 |
-
return "
|
335 |
|
336 |
-
# 1)
|
337 |
analysis = analyze_sentiment_batch(articles, client)
|
338 |
|
339 |
-
# 2) DB
|
340 |
save_data = {
|
341 |
"articles": articles,
|
342 |
"analysis": analysis
|
343 |
}
|
344 |
save_to_db(query, country, save_data)
|
345 |
|
346 |
-
# 3) DB
|
347 |
loaded_data, timestamp = load_from_db(query, country)
|
348 |
if not loaded_data:
|
349 |
-
return "
|
350 |
|
351 |
-
# 4)
|
352 |
-
out = f"## [
|
353 |
-
out += f"
|
354 |
-
out += f"
|
355 |
-
out += f"
|
356 |
|
357 |
arts = loaded_data.get("articles", [])
|
358 |
analy = loaded_data.get("analysis", "")
|
359 |
|
360 |
out += display_results(arts)
|
361 |
-
out += f"###
|
362 |
|
363 |
return out
|
364 |
|
365 |
|
366 |
########################################
|
367 |
-
# API
|
368 |
########################################
|
369 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
370 |
if not ACCESS_TOKEN:
|
@@ -379,13 +381,13 @@ API_KEY = os.getenv("SERPHOUSE_API_KEY")
|
|
379 |
|
380 |
|
381 |
########################################
|
382 |
-
#
|
383 |
########################################
|
384 |
COUNTRY_LANGUAGES = {
|
385 |
"United States": "en",
|
386 |
"KOREA": "ko",
|
387 |
"United Kingdom": "en",
|
388 |
-
"Taiwan": "zh-TW",
|
389 |
"Canada": "en",
|
390 |
"Australia": "en",
|
391 |
"Germany": "de",
|
@@ -527,7 +529,8 @@ COUNTRY_LOCATIONS = {
|
|
527 |
@lru_cache(maxsize=100)
|
528 |
def translate_query(query, country):
|
529 |
"""
|
530 |
-
Google Translation API
|
|
|
531 |
"""
|
532 |
try:
|
533 |
if is_english(query):
|
@@ -554,20 +557,23 @@ def translate_query(query, country):
|
|
554 |
response = session.get(url, params=params, timeout=(5, 10))
|
555 |
translated_text = response.json()[0][0][0]
|
556 |
return translated_text
|
557 |
-
|
558 |
return query
|
559 |
|
560 |
except Exception as e:
|
561 |
-
print(f"
|
562 |
return query
|
563 |
|
564 |
def is_english(text):
|
|
|
|
|
|
|
565 |
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
|
566 |
|
567 |
def search_serphouse(query, country, page=1, num_result=10):
|
568 |
"""
|
569 |
-
|
570 |
-
'
|
|
|
571 |
"""
|
572 |
url = "https://api.serphouse.com/serp/live"
|
573 |
|
@@ -624,24 +630,23 @@ def search_serphouse(query, country, page=1, num_result=10):
|
|
624 |
|
625 |
except requests.exceptions.Timeout:
|
626 |
return {
|
627 |
-
"error": "
|
628 |
"translated_query": query
|
629 |
}
|
630 |
except requests.exceptions.RequestException as e:
|
631 |
return {
|
632 |
-
"error": f"
|
633 |
"translated_query": query
|
634 |
}
|
635 |
except Exception as e:
|
636 |
return {
|
637 |
-
"error": f"
|
638 |
"translated_query": query
|
639 |
}
|
640 |
|
641 |
def format_results_from_raw(response_data):
|
642 |
"""
|
643 |
-
SerpHouse API
|
644 |
-
(에러메시지, 기사리스트) 형태로 반환.
|
645 |
"""
|
646 |
if "error" in response_data:
|
647 |
return "Error: " + response_data["error"], []
|
@@ -650,12 +655,11 @@ def format_results_from_raw(response_data):
|
|
650 |
results = response_data["results"]
|
651 |
translated_query = response_data["translated_query"]
|
652 |
|
653 |
-
# 실제 뉴스 결과
|
654 |
news_results = results.get('results', {}).get('results', {}).get('news', [])
|
655 |
if not news_results:
|
656 |
-
return "
|
657 |
|
658 |
-
#
|
659 |
korean_domains = [
|
660 |
'.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
|
661 |
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
|
@@ -676,37 +680,45 @@ def format_results_from_raw(response_data):
|
|
676 |
any(keyword in title for keyword in korean_keywords)
|
677 |
)
|
678 |
|
679 |
-
#
|
680 |
if not is_korean_content:
|
681 |
filtered_articles.append({
|
682 |
"index": idx,
|
683 |
-
"title": result.get("title", "
|
684 |
"link": url,
|
685 |
-
"snippet": result.get("snippet", "
|
686 |
-
"channel": result.get("channel", result.get("source", "
|
687 |
-
"time": result.get("time", result.get("date", "
|
688 |
"image_url": result.get("img", result.get("thumbnail", "")),
|
689 |
"translated_query": translated_query
|
690 |
})
|
691 |
|
692 |
return "", filtered_articles
|
693 |
except Exception as e:
|
694 |
-
return f"
|
695 |
|
696 |
def serphouse_search(query, country):
|
697 |
"""
|
698 |
-
|
|
|
699 |
"""
|
700 |
response_data = search_serphouse(query, country)
|
701 |
return format_results_from_raw(response_data)
|
702 |
|
703 |
|
704 |
-
# CSS
|
705 |
css = """
|
706 |
-
|
707 |
-
|
|
|
|
|
708 |
|
709 |
-
/*
|
|
|
|
|
|
|
|
|
|
|
710 |
#status_area {
|
711 |
background: rgba(255, 255, 255, 0.9);
|
712 |
padding: 15px;
|
@@ -715,12 +727,13 @@ footer {visibility: hidden;}
|
|
715 |
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
716 |
}
|
717 |
|
|
|
718 |
#results_area {
|
719 |
padding: 10px;
|
720 |
margin-top: 10px;
|
721 |
}
|
722 |
|
723 |
-
/*
|
724 |
.tabs {
|
725 |
border-bottom: 2px solid #ddd !important;
|
726 |
margin-bottom: 20px !important;
|
@@ -741,35 +754,42 @@ footer {visibility: hidden;}
|
|
741 |
color: #1f77b4 !important;
|
742 |
}
|
743 |
|
744 |
-
/*
|
745 |
#status_area .markdown-text {
|
746 |
font-size: 1.1em;
|
747 |
color: #2c3e50;
|
748 |
padding: 10px 0;
|
749 |
}
|
750 |
|
751 |
-
/*
|
752 |
.group {
|
753 |
border: 1px solid #eee;
|
754 |
padding: 15px;
|
755 |
margin-bottom: 15px;
|
756 |
border-radius: 5px;
|
757 |
background: white;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
758 |
}
|
759 |
|
760 |
-
/*
|
761 |
.primary-btn {
|
762 |
background: #1f77b4 !important;
|
763 |
border: none !important;
|
764 |
}
|
765 |
|
766 |
-
/*
|
767 |
.textbox {
|
768 |
border: 1px solid #ddd !important;
|
769 |
border-radius: 4px !important;
|
770 |
}
|
771 |
|
772 |
-
/*
|
773 |
.progress-container {
|
774 |
position: fixed;
|
775 |
top: 0;
|
@@ -780,7 +800,7 @@ footer {visibility: hidden;}
|
|
780 |
z-index: 1000;
|
781 |
}
|
782 |
|
783 |
-
/*
|
784 |
.progress-bar {
|
785 |
height: 100%;
|
786 |
background: linear-gradient(90deg, #2196F3, #00BCD4);
|
@@ -789,7 +809,7 @@ footer {visibility: hidden;}
|
|
789 |
animation: progress-glow 1.5s ease-in-out infinite;
|
790 |
}
|
791 |
|
792 |
-
/*
|
793 |
.progress-text {
|
794 |
position: fixed;
|
795 |
top: 8px;
|
@@ -804,7 +824,7 @@ footer {visibility: hidden;}
|
|
804 |
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
|
805 |
}
|
806 |
|
807 |
-
/*
|
808 |
@keyframes progress-glow {
|
809 |
0% {
|
810 |
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
|
@@ -817,7 +837,14 @@ footer {visibility: hidden;}
|
|
817 |
}
|
818 |
}
|
819 |
|
820 |
-
/*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
821 |
@media (max-width: 768px) {
|
822 |
.group {
|
823 |
padding: 10px;
|
@@ -830,26 +857,7 @@ footer {visibility: hidden;}
|
|
830 |
}
|
831 |
}
|
832 |
|
833 |
-
/*
|
834 |
-
.loading {
|
835 |
-
opacity: 0.7;
|
836 |
-
pointer-events: none;
|
837 |
-
transition: opacity 0.3s ease;
|
838 |
-
}
|
839 |
-
|
840 |
-
/* 결과 컨테이너 애니메이션 */
|
841 |
-
.group {
|
842 |
-
transition: all 0.3s ease;
|
843 |
-
opacity: 0;
|
844 |
-
transform: translateY(20px);
|
845 |
-
}
|
846 |
-
|
847 |
-
.group.visible {
|
848 |
-
opacity: 1;
|
849 |
-
transform: translateY(0);
|
850 |
-
}
|
851 |
-
|
852 |
-
/* Examples 스타일링 */
|
853 |
.examples-table {
|
854 |
margin-top: 10px !important;
|
855 |
margin-bottom: 20px !important;
|
@@ -877,34 +885,35 @@ footer {visibility: hidden;}
|
|
877 |
}
|
878 |
"""
|
879 |
|
880 |
-
|
881 |
-
|
882 |
-
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface:
|
883 |
init_db()
|
884 |
|
885 |
with gr.Tabs():
|
886 |
-
|
887 |
-
|
888 |
-
gr.Markdown(
|
889 |
-
|
|
|
|
|
|
|
890 |
|
891 |
-
#
|
892 |
with gr.Group():
|
893 |
-
gr.Markdown("###
|
894 |
with gr.Row():
|
895 |
with gr.Column():
|
896 |
user_input = gr.Textbox(
|
897 |
-
label="
|
898 |
-
placeholder="
|
899 |
)
|
900 |
with gr.Column():
|
901 |
country_selection = gr.Dropdown(
|
902 |
choices=list(COUNTRY_LOCATIONS.keys()),
|
903 |
value="United States",
|
904 |
-
label="
|
905 |
)
|
906 |
with gr.Column():
|
907 |
-
custom_search_btn = gr.Button("
|
908 |
|
909 |
custom_search_output = gr.Markdown()
|
910 |
|
@@ -914,9 +923,9 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비
|
|
914 |
outputs=custom_search_output
|
915 |
)
|
916 |
|
917 |
-
#
|
918 |
with gr.Row():
|
919 |
-
full_report_btn = gr.Button("
|
920 |
full_report_display = gr.Markdown()
|
921 |
|
922 |
full_report_btn.click(
|
@@ -924,18 +933,18 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비
|
|
924 |
outputs=full_report_display
|
925 |
)
|
926 |
|
927 |
-
#
|
928 |
with gr.Column():
|
929 |
for i in range(0, len(KOREAN_COMPANIES), 2):
|
930 |
with gr.Row():
|
931 |
-
#
|
932 |
with gr.Column():
|
933 |
company = KOREAN_COMPANIES[i]
|
934 |
with gr.Group():
|
935 |
gr.Markdown(f"### {company}")
|
936 |
with gr.Row():
|
937 |
-
search_btn = gr.Button("
|
938 |
-
load_btn = gr.Button("
|
939 |
result_display = gr.Markdown()
|
940 |
|
941 |
search_btn.click(
|
@@ -947,15 +956,15 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비
|
|
947 |
outputs=result_display
|
948 |
)
|
949 |
|
950 |
-
#
|
951 |
if i + 1 < len(KOREAN_COMPANIES):
|
952 |
with gr.Column():
|
953 |
company = KOREAN_COMPANIES[i + 1]
|
954 |
with gr.Group():
|
955 |
gr.Markdown(f"### {company}")
|
956 |
with gr.Row():
|
957 |
-
search_btn = gr.Button("
|
958 |
-
load_btn = gr.Button("
|
959 |
result_display = gr.Markdown()
|
960 |
|
961 |
search_btn.click(
|
|
|
14 |
import sqlite3
|
15 |
import pytz
|
16 |
|
17 |
+
# List of target companies/keywords
|
18 |
KOREAN_COMPANIES = [
|
19 |
"NVIDIA",
|
20 |
"ALPHABET",
|
|
|
31 |
"stock",
|
32 |
"Economics",
|
33 |
"Finance",
|
34 |
+
"investing"
|
35 |
]
|
36 |
|
37 |
def convert_to_seoul_time(timestamp_str):
|
38 |
+
"""
|
39 |
+
Convert a given timestamp string (UTC) to Seoul time (KST).
|
40 |
+
"""
|
41 |
try:
|
42 |
dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
43 |
seoul_tz = pytz.timezone('Asia/Seoul')
|
44 |
seoul_time = seoul_tz.localize(dt)
|
45 |
return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST')
|
46 |
except Exception as e:
|
47 |
+
print(f"Time conversion error: {str(e)}")
|
48 |
return timestamp_str
|
49 |
|
50 |
def analyze_sentiment_batch(articles, client):
|
51 |
"""
|
52 |
+
Perform a comprehensive sentiment analysis of the news articles using the OpenAI API.
|
53 |
"""
|
54 |
try:
|
55 |
+
# Combine all articles into a single text
|
56 |
combined_text = "\n\n".join([
|
57 |
+
f"Title: {article.get('title', '')}\nContent: {article.get('snippet', '')}"
|
58 |
for article in articles
|
59 |
])
|
60 |
|
61 |
+
prompt = f"""Please perform an overall sentiment analysis of the following collection of news articles:
|
62 |
|
63 |
+
News content:
|
64 |
{combined_text}
|
65 |
|
66 |
+
Please follow this format:
|
67 |
+
1. Overall Sentiment: [Positive/Negative/Neutral]
|
68 |
+
2. Key Positive Factors:
|
69 |
+
- [Item1]
|
70 |
+
- [Item2]
|
71 |
+
3. Key Negative Factors:
|
72 |
+
- [Item1]
|
73 |
+
- [Item2]
|
74 |
+
4. Summary: [Detailed explanation]
|
75 |
"""
|
76 |
|
77 |
response = client.chat.completions.create(
|
|
|
83 |
|
84 |
return response.choices[0].message.content
|
85 |
except Exception as e:
|
86 |
+
return f"Sentiment analysis failed: {str(e)}"
|
87 |
|
88 |
|
89 |
+
# Initialize the database
|
90 |
def init_db():
|
91 |
+
"""
|
92 |
+
Initialize the SQLite database (search_results.db) if it doesn't already exist.
|
93 |
+
"""
|
94 |
db_path = pathlib.Path("search_results.db")
|
95 |
conn = sqlite3.connect(db_path)
|
96 |
c = conn.cursor()
|
|
|
105 |
|
106 |
def save_to_db(keyword, country, results):
|
107 |
"""
|
108 |
+
Save the search results for a specific (keyword, country) combination into the database.
|
109 |
"""
|
110 |
conn = sqlite3.connect("search_results.db")
|
111 |
c = conn.cursor()
|
|
|
122 |
|
123 |
def load_from_db(keyword, country):
|
124 |
"""
|
125 |
+
Load the most recent search results for a specific (keyword, country) combination from the database.
|
126 |
+
Returns the data and the timestamp.
|
127 |
"""
|
128 |
conn = sqlite3.connect("search_results.db")
|
129 |
c = conn.cursor()
|
130 |
+
c.execute(
|
131 |
+
"SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
|
132 |
+
(keyword, country)
|
133 |
+
)
|
134 |
result = c.fetchone()
|
135 |
conn.close()
|
136 |
if result:
|
|
|
139 |
|
140 |
def display_results(articles):
|
141 |
"""
|
142 |
+
Convert a list of news articles into a Markdown string for display.
|
143 |
"""
|
144 |
output = ""
|
145 |
for idx, article in enumerate(articles, 1):
|
146 |
output += f"### {idx}. {article['title']}\n"
|
147 |
+
output += f"Source: {article['channel']}\n"
|
148 |
+
output += f"Time: {article['time']}\n"
|
149 |
+
output += f"Link: {article['link']}\n"
|
150 |
+
output += f"Summary: {article['snippet']}\n\n"
|
151 |
return output
|
152 |
|
153 |
|
154 |
########################################
|
155 |
+
# 1) Search => Articles + Analysis, then save to DB
|
156 |
########################################
|
157 |
def search_company(company):
|
158 |
"""
|
159 |
+
For a single company (or keyword), search US news.
|
160 |
+
1) Retrieve a list of articles
|
161 |
+
2) Perform sentiment analysis
|
162 |
+
3) Save results to DB
|
163 |
+
4) Return (articles + analysis) in a single output.
|
164 |
"""
|
165 |
error_message, articles = serphouse_search(company, "United States")
|
166 |
if not error_message and articles:
|
167 |
+
# Perform sentiment analysis
|
168 |
analysis = analyze_sentiment_batch(articles, client)
|
169 |
|
170 |
+
# Prepare data to save in DB
|
171 |
store_dict = {
|
172 |
"articles": articles,
|
173 |
"analysis": analysis
|
174 |
}
|
175 |
save_to_db(company, "United States", store_dict)
|
176 |
|
177 |
+
# Prepare output for display
|
178 |
output = display_results(articles)
|
179 |
+
output += f"\n\n### Analysis Report\n{analysis}\n"
|
180 |
return output
|
181 |
+
return f"No search results found for {company}."
|
182 |
|
183 |
########################################
|
184 |
+
# 2) Load => Return articles + analysis from DB
|
185 |
########################################
|
186 |
def load_company(company):
|
187 |
"""
|
188 |
+
Load the most recent US news search results for the given company (or keyword) from the database,
|
189 |
+
and return the articles + analysis in a single output.
|
190 |
"""
|
191 |
data, timestamp = load_from_db(company, "United States")
|
192 |
if data:
|
|
|
193 |
articles = data.get("articles", [])
|
194 |
analysis = data.get("analysis", "")
|
195 |
|
196 |
+
output = f"### {company} Search Results\nLast Updated: {timestamp}\n\n"
|
197 |
output += display_results(articles)
|
198 |
+
output += f"\n\n### Analysis Report\n{analysis}\n"
|
199 |
return output
|
200 |
+
return f"No saved results for {company}."
|
201 |
|
202 |
|
203 |
########################################
|
204 |
+
# 3) Updated show_stats() with new title
|
205 |
########################################
|
206 |
def show_stats():
|
207 |
"""
|
208 |
+
For each company in KOREAN_COMPANIES:
|
209 |
+
- Retrieve the most recent timestamp in DB
|
210 |
+
- Number of articles
|
211 |
+
- Sentiment analysis result
|
212 |
+
Return these in a report format.
|
213 |
+
|
214 |
+
Title changed to: "EarnBOT Analysis Report"
|
215 |
"""
|
216 |
conn = sqlite3.connect("search_results.db")
|
217 |
c = conn.cursor()
|
218 |
|
219 |
+
output = "## EarnBOT Analysis Report\n\n"
|
220 |
|
|
|
221 |
data_list = []
|
222 |
for company in KOREAN_COMPANIES:
|
223 |
c.execute("""
|
|
|
235 |
|
236 |
conn.close()
|
237 |
|
|
|
238 |
def analyze_data(item):
|
239 |
comp, tstamp, results_json = item
|
240 |
data = json.loads(results_json)
|
|
|
242 |
analysis = data.get("analysis", "")
|
243 |
|
244 |
count_articles = len(articles)
|
|
|
|
|
|
|
|
|
245 |
return (comp, tstamp, count_articles, analysis)
|
246 |
|
247 |
results_list = []
|
|
|
250 |
for future in as_completed(futures):
|
251 |
results_list.append(future.result())
|
252 |
|
|
|
253 |
for comp, tstamp, count, analysis in results_list:
|
254 |
seoul_time = convert_to_seoul_time(tstamp)
|
255 |
output += f"### {comp}\n"
|
256 |
+
output += f"- Last updated: {seoul_time}\n"
|
257 |
+
output += f"- Number of articles stored: {count}\n\n"
|
258 |
if analysis:
|
259 |
+
output += "#### News Sentiment Analysis\n"
|
260 |
output += f"{analysis}\n\n"
|
261 |
output += "---\n\n"
|
262 |
|
|
|
265 |
|
266 |
def search_all_companies():
|
267 |
"""
|
268 |
+
Search all companies in KOREAN_COMPANIES (in parallel),
|
269 |
+
perform sentiment analysis + save to DB => return Markdown of all results.
|
270 |
"""
|
271 |
+
overall_result = "# [Search Results for All Companies]\n\n"
|
272 |
|
273 |
def do_search(comp):
|
274 |
return comp, search_company(comp)
|
|
|
284 |
|
285 |
def load_all_companies():
|
286 |
"""
|
287 |
+
Load articles + analysis for all companies in KOREAN_COMPANIES from the DB => return Markdown.
|
|
|
288 |
"""
|
289 |
+
overall_result = "# [All Companies Data Output]\n\n"
|
290 |
|
291 |
for comp in KOREAN_COMPANIES:
|
292 |
overall_result += f"## {comp}\n"
|
|
|
296 |
|
297 |
def full_summary_report():
|
298 |
"""
|
299 |
+
1) Search all companies (in parallel) -> 2) Load results -> 3) Show sentiment analysis stats
|
300 |
+
Return a combined report with all three steps.
|
301 |
"""
|
302 |
+
# 1) Search all companies => store to DB
|
303 |
search_result_text = search_all_companies()
|
304 |
|
305 |
+
# 2) Load all results => from DB
|
306 |
load_result_text = load_all_companies()
|
307 |
|
308 |
+
# 3) Show stats => EarnBOT Analysis Report
|
309 |
stats_text = show_stats()
|
310 |
|
311 |
combined_report = (
|
312 |
+
"# Full Analysis Summary Report\n\n"
|
313 |
+
"Executed in the following order:\n"
|
314 |
+
"1. Search all companies (parallel) + sentiment analysis => 2. Load results from DB => 3. Show overall sentiment analysis stats\n\n"
|
315 |
f"{search_result_text}\n\n"
|
316 |
f"{load_result_text}\n\n"
|
317 |
+
"## [Overall Sentiment Analysis Stats]\n\n"
|
318 |
f"{stats_text}"
|
319 |
)
|
320 |
return combined_report
|
321 |
|
322 |
|
323 |
########################################
|
324 |
+
# Additional feature: User custom search
|
325 |
########################################
|
326 |
def search_custom(query, country):
|
327 |
"""
|
328 |
+
For a user-provided (query, country):
|
329 |
+
1) Search + sentiment analysis => save to DB
|
330 |
+
2) Load from DB => display articles + analysis
|
331 |
"""
|
332 |
error_message, articles = serphouse_search(query, country)
|
333 |
if error_message:
|
334 |
+
return f"An error occurred: {error_message}"
|
335 |
if not articles:
|
336 |
+
return "No results were found for your query."
|
337 |
|
338 |
+
# 1) Perform analysis
|
339 |
analysis = analyze_sentiment_batch(articles, client)
|
340 |
|
341 |
+
# 2) Save to DB
|
342 |
save_data = {
|
343 |
"articles": articles,
|
344 |
"analysis": analysis
|
345 |
}
|
346 |
save_to_db(query, country, save_data)
|
347 |
|
348 |
+
# 3) Reload from DB
|
349 |
loaded_data, timestamp = load_from_db(query, country)
|
350 |
if not loaded_data:
|
351 |
+
return "Failed to load data from DB."
|
352 |
|
353 |
+
# 4) Prepare final output
|
354 |
+
out = f"## [Custom Search Results]\n\n"
|
355 |
+
out += f"**Keyword**: {query}\n\n"
|
356 |
+
out += f"**Country**: {country}\n\n"
|
357 |
+
out += f"**Timestamp**: {timestamp}\n\n"
|
358 |
|
359 |
arts = loaded_data.get("articles", [])
|
360 |
analy = loaded_data.get("analysis", "")
|
361 |
|
362 |
out += display_results(arts)
|
363 |
+
out += f"### News Sentiment Analysis\n{analy}\n"
|
364 |
|
365 |
return out
|
366 |
|
367 |
|
368 |
########################################
|
369 |
+
# API Authentication
|
370 |
########################################
|
371 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
372 |
if not ACCESS_TOKEN:
|
|
|
381 |
|
382 |
|
383 |
########################################
|
384 |
+
# Country-specific settings
|
385 |
########################################
|
386 |
COUNTRY_LANGUAGES = {
|
387 |
"United States": "en",
|
388 |
"KOREA": "ko",
|
389 |
"United Kingdom": "en",
|
390 |
+
"Taiwan": "zh-TW",
|
391 |
"Canada": "en",
|
392 |
"Australia": "en",
|
393 |
"Germany": "de",
|
|
|
529 |
@lru_cache(maxsize=100)
|
530 |
def translate_query(query, country):
|
531 |
"""
|
532 |
+
Use the unofficial Google Translation API to translate the query into the target country's language.
|
533 |
+
If the query is already in English, or if translation fails, return the original query.
|
534 |
"""
|
535 |
try:
|
536 |
if is_english(query):
|
|
|
557 |
response = session.get(url, params=params, timeout=(5, 10))
|
558 |
translated_text = response.json()[0][0][0]
|
559 |
return translated_text
|
|
|
560 |
return query
|
561 |
|
562 |
except Exception as e:
|
563 |
+
print(f"Translation error: {str(e)}")
|
564 |
return query
|
565 |
|
566 |
def is_english(text):
|
567 |
+
"""
|
568 |
+
Check if a string is (mostly) English by verifying character code ranges.
|
569 |
+
"""
|
570 |
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
|
571 |
|
572 |
def search_serphouse(query, country, page=1, num_result=10):
|
573 |
"""
|
574 |
+
Send a real-time search request to the SerpHouse API,
|
575 |
+
specifying the 'news' tab (sort_by=date) for the given query.
|
576 |
+
Returns a dict with 'results' or 'error'.
|
577 |
"""
|
578 |
url = "https://api.serphouse.com/serp/live"
|
579 |
|
|
|
630 |
|
631 |
except requests.exceptions.Timeout:
|
632 |
return {
|
633 |
+
"error": "Search timed out. Please try again later.",
|
634 |
"translated_query": query
|
635 |
}
|
636 |
except requests.exceptions.RequestException as e:
|
637 |
return {
|
638 |
+
"error": f"Error during search: {str(e)}",
|
639 |
"translated_query": query
|
640 |
}
|
641 |
except Exception as e:
|
642 |
return {
|
643 |
+
"error": f"Unexpected error occurred: {str(e)}",
|
644 |
"translated_query": query
|
645 |
}
|
646 |
|
647 |
def format_results_from_raw(response_data):
|
648 |
"""
|
649 |
+
Process the SerpHouse API response data and return (error_message, article_list).
|
|
|
650 |
"""
|
651 |
if "error" in response_data:
|
652 |
return "Error: " + response_data["error"], []
|
|
|
655 |
results = response_data["results"]
|
656 |
translated_query = response_data["translated_query"]
|
657 |
|
|
|
658 |
news_results = results.get('results', {}).get('results', {}).get('news', [])
|
659 |
if not news_results:
|
660 |
+
return "No search results found.", []
|
661 |
|
662 |
+
# Filter out Korean domains and Korean keywords (example filtering)
|
663 |
korean_domains = [
|
664 |
'.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
|
665 |
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
|
|
|
680 |
any(keyword in title for keyword in korean_keywords)
|
681 |
)
|
682 |
|
683 |
+
# Exclude Korean content
|
684 |
if not is_korean_content:
|
685 |
filtered_articles.append({
|
686 |
"index": idx,
|
687 |
+
"title": result.get("title", "No Title"),
|
688 |
"link": url,
|
689 |
+
"snippet": result.get("snippet", "No Content"),
|
690 |
+
"channel": result.get("channel", result.get("source", "Unknown")),
|
691 |
+
"time": result.get("time", result.get("date", "Unknown Time")),
|
692 |
"image_url": result.get("img", result.get("thumbnail", "")),
|
693 |
"translated_query": translated_query
|
694 |
})
|
695 |
|
696 |
return "", filtered_articles
|
697 |
except Exception as e:
|
698 |
+
return f"Error processing results: {str(e)}", []
|
699 |
|
700 |
def serphouse_search(query, country):
|
701 |
"""
|
702 |
+
Helper function to search and then format results.
|
703 |
+
Returns (error_message, article_list).
|
704 |
"""
|
705 |
response_data = search_serphouse(query, country)
|
706 |
return format_results_from_raw(response_data)
|
707 |
|
708 |
|
709 |
+
# Updated CSS for more visual appeal and readability
|
710 |
css = """
|
711 |
+
body {
|
712 |
+
background: linear-gradient(to bottom right, #ffffff, #e6f7ff);
|
713 |
+
font-family: 'Arial', sans-serif;
|
714 |
+
}
|
715 |
|
716 |
+
/* Hide default Gradio footer */
|
717 |
+
footer {
|
718 |
+
visibility: hidden;
|
719 |
+
}
|
720 |
+
|
721 |
+
/* Header/Status area */
|
722 |
#status_area {
|
723 |
background: rgba(255, 255, 255, 0.9);
|
724 |
padding: 15px;
|
|
|
727 |
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
728 |
}
|
729 |
|
730 |
+
/* Results area */
|
731 |
#results_area {
|
732 |
padding: 10px;
|
733 |
margin-top: 10px;
|
734 |
}
|
735 |
|
736 |
+
/* Tabs style */
|
737 |
.tabs {
|
738 |
border-bottom: 2px solid #ddd !important;
|
739 |
margin-bottom: 20px !important;
|
|
|
754 |
color: #1f77b4 !important;
|
755 |
}
|
756 |
|
757 |
+
/* Status message styling */
|
758 |
#status_area .markdown-text {
|
759 |
font-size: 1.1em;
|
760 |
color: #2c3e50;
|
761 |
padding: 10px 0;
|
762 |
}
|
763 |
|
764 |
+
/* Main container grouping */
|
765 |
.group {
|
766 |
border: 1px solid #eee;
|
767 |
padding: 15px;
|
768 |
margin-bottom: 15px;
|
769 |
border-radius: 5px;
|
770 |
background: white;
|
771 |
+
transition: all 0.3s ease;
|
772 |
+
opacity: 0;
|
773 |
+
transform: translateY(20px);
|
774 |
+
}
|
775 |
+
.group.visible {
|
776 |
+
opacity: 1;
|
777 |
+
transform: translateY(0);
|
778 |
}
|
779 |
|
780 |
+
/* Buttons */
|
781 |
.primary-btn {
|
782 |
background: #1f77b4 !important;
|
783 |
border: none !important;
|
784 |
}
|
785 |
|
786 |
+
/* Input fields */
|
787 |
.textbox {
|
788 |
border: 1px solid #ddd !important;
|
789 |
border-radius: 4px !important;
|
790 |
}
|
791 |
|
792 |
+
/* Progress bar container */
|
793 |
.progress-container {
|
794 |
position: fixed;
|
795 |
top: 0;
|
|
|
800 |
z-index: 1000;
|
801 |
}
|
802 |
|
803 |
+
/* Progress bar itself */
|
804 |
.progress-bar {
|
805 |
height: 100%;
|
806 |
background: linear-gradient(90deg, #2196F3, #00BCD4);
|
|
|
809 |
animation: progress-glow 1.5s ease-in-out infinite;
|
810 |
}
|
811 |
|
812 |
+
/* Progress text */
|
813 |
.progress-text {
|
814 |
position: fixed;
|
815 |
top: 8px;
|
|
|
824 |
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
|
825 |
}
|
826 |
|
827 |
+
/* Progress bar animation */
|
828 |
@keyframes progress-glow {
|
829 |
0% {
|
830 |
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
|
|
|
837 |
}
|
838 |
}
|
839 |
|
840 |
+
/* Loading state */
|
841 |
+
.loading {
|
842 |
+
opacity: 0.7;
|
843 |
+
pointer-events: none;
|
844 |
+
transition: opacity 0.3s ease;
|
845 |
+
}
|
846 |
+
|
847 |
+
/* Responsive design for smaller screens */
|
848 |
@media (max-width: 768px) {
|
849 |
.group {
|
850 |
padding: 10px;
|
|
|
857 |
}
|
858 |
}
|
859 |
|
860 |
+
/* Example section styling */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
861 |
.examples-table {
|
862 |
margin-top: 10px !important;
|
863 |
margin-bottom: 20px !important;
|
|
|
885 |
}
|
886 |
"""
|
887 |
|
888 |
+
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI Service") as iface:
|
|
|
|
|
889 |
init_db()
|
890 |
|
891 |
with gr.Tabs():
|
892 |
+
with gr.Tab("EarnBot"):
|
893 |
+
gr.Markdown("## EarnBot: AI-powered Analysis of Global Big Tech Companies and Investment Outlook")
|
894 |
+
gr.Markdown(
|
895 |
+
" * Click on 'Generate Full Analysis Summary Report' to create a comprehensive automated report.\n"
|
896 |
+
" * You can also 'Search (automatically save to DB)' and 'Load from DB (automatically retrieve)' for each listed company.\n"
|
897 |
+
" * Additionally, feel free to search/analyze any custom keyword in your chosen country."
|
898 |
+
)
|
899 |
|
900 |
+
# User custom search section
|
901 |
with gr.Group():
|
902 |
+
gr.Markdown("### Custom Search")
|
903 |
with gr.Row():
|
904 |
with gr.Column():
|
905 |
user_input = gr.Textbox(
|
906 |
+
label="Enter your keyword",
|
907 |
+
placeholder="e.g., Apple, Samsung, etc."
|
908 |
)
|
909 |
with gr.Column():
|
910 |
country_selection = gr.Dropdown(
|
911 |
choices=list(COUNTRY_LOCATIONS.keys()),
|
912 |
value="United States",
|
913 |
+
label="Select Country"
|
914 |
)
|
915 |
with gr.Column():
|
916 |
+
custom_search_btn = gr.Button("Search", variant="primary")
|
917 |
|
918 |
custom_search_output = gr.Markdown()
|
919 |
|
|
|
923 |
outputs=custom_search_output
|
924 |
)
|
925 |
|
926 |
+
# Button to generate a full report
|
927 |
with gr.Row():
|
928 |
+
full_report_btn = gr.Button("Generate Full Analysis Summary Report", variant="primary")
|
929 |
full_report_display = gr.Markdown()
|
930 |
|
931 |
full_report_btn.click(
|
|
|
933 |
outputs=full_report_display
|
934 |
)
|
935 |
|
936 |
+
# Individual search/load for companies in KOREAN_COMPANIES
|
937 |
with gr.Column():
|
938 |
for i in range(0, len(KOREAN_COMPANIES), 2):
|
939 |
with gr.Row():
|
940 |
+
# Left column
|
941 |
with gr.Column():
|
942 |
company = KOREAN_COMPANIES[i]
|
943 |
with gr.Group():
|
944 |
gr.Markdown(f"### {company}")
|
945 |
with gr.Row():
|
946 |
+
search_btn = gr.Button("Search", variant="primary")
|
947 |
+
load_btn = gr.Button("Load from DB", variant="secondary")
|
948 |
result_display = gr.Markdown()
|
949 |
|
950 |
search_btn.click(
|
|
|
956 |
outputs=result_display
|
957 |
)
|
958 |
|
959 |
+
# Right column (if exists)
|
960 |
if i + 1 < len(KOREAN_COMPANIES):
|
961 |
with gr.Column():
|
962 |
company = KOREAN_COMPANIES[i + 1]
|
963 |
with gr.Group():
|
964 |
gr.Markdown(f"### {company}")
|
965 |
with gr.Row():
|
966 |
+
search_btn = gr.Button("Search", variant="primary")
|
967 |
+
load_btn = gr.Button("Load from DB", variant="secondary")
|
968 |
result_display = gr.Markdown()
|
969 |
|
970 |
search_btn.click(
|