Spaces:
Building
Building
Update app.py
Browse files
app.py
CHANGED
@@ -47,6 +47,7 @@ def convert_to_seoul_time(timestamp_str):
|
|
47 |
def analyze_sentiment_batch(articles, client):
|
48 |
"""
|
49 |
OpenAI API๋ฅผ ํตํด ๋ด์ค ๊ธฐ์ฌ๋ค์ ์ข
ํฉ ๊ฐ์ฑ ๋ถ์์ ์ํ
|
|
|
50 |
"""
|
51 |
try:
|
52 |
# ๋ชจ๋ ๊ธฐ์ฌ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ํ๋์ ํ
์คํธ๋ก ๊ฒฐํฉ
|
@@ -55,7 +56,8 @@ def analyze_sentiment_batch(articles, client):
|
|
55 |
for article in articles
|
56 |
])
|
57 |
|
58 |
-
|
|
|
59 |
|
60 |
๋ด์ค ๋ด์ฉ:
|
61 |
{combined_text}
|
@@ -148,22 +150,18 @@ def display_results(articles):
|
|
148 |
def search_company(company):
|
149 |
"""
|
150 |
๋จ์ผ ๊ธฐ์
(๋๋ ํค์๋)์ ๋ํด ๋ฏธ๊ตญ ๋ด์ค ๊ฒ์ ํ,
|
151 |
-
|
152 |
=> { "articles": [...], "analysis": ... } ํํ๋ก DB์ ์ ์ฅ
|
153 |
"""
|
154 |
error_message, articles = serphouse_search(company, "United States")
|
155 |
if not error_message and articles:
|
156 |
-
# ๊ฐ์ฑ ๋ถ์
|
157 |
analysis = analyze_sentiment_batch(articles, client)
|
158 |
-
|
159 |
-
# DB ์ ์ฅ์ฉ ๋ฐ์ดํฐ ๊ตฌ์ฑ
|
160 |
store_dict = {
|
161 |
"articles": articles,
|
162 |
"analysis": analysis
|
163 |
}
|
164 |
save_to_db(company, "United States", store_dict)
|
165 |
|
166 |
-
# ํ๋ฉด ์ถ๋ ฅ์ฉ
|
167 |
output = display_results(articles)
|
168 |
output += f"\n\n### ๋ถ์ ๋ณด๊ณ \n{analysis}\n"
|
169 |
return output
|
@@ -174,12 +172,11 @@ def search_company(company):
|
|
174 |
########################################
|
175 |
def load_company(company):
|
176 |
"""
|
177 |
-
DB์์
|
178 |
-
๊ธฐ์ฌ ๋ชฉ๋ก + ๋ถ์
|
179 |
"""
|
180 |
data, timestamp = load_from_db(company, "United States")
|
181 |
if data:
|
182 |
-
# data๋ { "articles": [...], "analysis": "..."} ํํ
|
183 |
articles = data.get("articles", [])
|
184 |
analysis = data.get("analysis", "")
|
185 |
|
@@ -191,24 +188,21 @@ def load_company(company):
|
|
191 |
|
192 |
|
193 |
########################################
|
194 |
-
# 3)
|
195 |
########################################
|
196 |
def show_stats():
|
197 |
"""
|
198 |
-
KOREAN_COMPANIES
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
(๋ฌธ๊ตฌ ๋ณ๊ฒฝ) "ํ๊ตญ ๊ธฐ์
๋ด์ค ๋ถ์ ๋ฆฌํฌํธ" -> "EarnBOT ๋ถ์ ๋ฆฌํฌํธ"
|
205 |
"""
|
206 |
conn = sqlite3.connect("search_results.db")
|
207 |
c = conn.cursor()
|
208 |
|
209 |
-
output = "## EarnBOT ๋ถ์ ๋ฆฌํฌํธ\n\n"
|
210 |
|
211 |
-
# ๋ชจ๋ ๊ธฐ์
์ ๋ํด DB์์ ์ฝ์ด์ฌ (company, timestamp, articles) ๋ชฉ๋ก ์์ง
|
212 |
data_list = []
|
213 |
for company in KOREAN_COMPANIES:
|
214 |
c.execute("""
|
@@ -221,23 +215,17 @@ def show_stats():
|
|
221 |
|
222 |
row = c.fetchone()
|
223 |
if row:
|
224 |
-
results_json,
|
225 |
-
data_list.append((company,
|
226 |
|
227 |
conn.close()
|
228 |
|
229 |
-
# ๊ฐ์ฑ ๋ถ์ ๋ณ๋ ฌ ์ฒ๋ฆฌ ํจ์
|
230 |
def analyze_data(item):
|
231 |
comp, tstamp, results_json = item
|
232 |
data = json.loads(results_json)
|
233 |
articles = data.get("articles", [])
|
234 |
analysis = data.get("analysis", "")
|
235 |
-
|
236 |
count_articles = len(articles)
|
237 |
-
# ์ฌ๊ธฐ์๋ ์ด๋ฏธ DB์ "analysis"๊ฐ ๋ค์ด ์์ผ๋ฏ๋ก,
|
238 |
-
# ๊ตณ์ด ์ฌ๋ถ์ํ ํ์๊ฐ ์์ผ๋ฉด ๊ทธ๋๋ก ์ฌ์ฉ
|
239 |
-
# (ํ์ ์ ์ฌ๋ถ์ ๊ฐ๋ฅ)
|
240 |
-
|
241 |
return (comp, tstamp, count_articles, analysis)
|
242 |
|
243 |
results_list = []
|
@@ -246,7 +234,6 @@ def show_stats():
|
|
246 |
for future in as_completed(futures):
|
247 |
results_list.append(future.result())
|
248 |
|
249 |
-
# ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
250 |
for comp, tstamp, count, analysis in results_list:
|
251 |
seoul_time = convert_to_seoul_time(tstamp)
|
252 |
output += f"### {comp}\n"
|
@@ -259,11 +246,9 @@ def show_stats():
|
|
259 |
|
260 |
return output
|
261 |
|
262 |
-
|
263 |
def search_all_companies():
|
264 |
"""
|
265 |
-
|
266 |
-
=> ๋ถ์ + DB ์ ์ฅ => ๊ฒฐ๊ณผ Markdown ๋ฐํ
|
267 |
"""
|
268 |
overall_result = "# [์ ์ฒด ๊ฒ์ ๊ฒฐ๊ณผ]\n\n"
|
269 |
|
@@ -281,8 +266,7 @@ def search_all_companies():
|
|
281 |
|
282 |
def load_all_companies():
|
283 |
"""
|
284 |
-
|
285 |
-
๊ธฐ์ฌ ๋ชฉ๋ก + ๋ถ์ ๋ณด๊ณ => ๊ฒฐ๊ณผ Markdown
|
286 |
"""
|
287 |
overall_result = "# [์ ์ฒด ์ถ๋ ฅ ๊ฒฐ๊ณผ]\n\n"
|
288 |
|
@@ -294,16 +278,12 @@ def load_all_companies():
|
|
294 |
|
295 |
def full_summary_report():
|
296 |
"""
|
297 |
-
|
298 |
-
|
|
|
299 |
"""
|
300 |
-
# 1) ์ ์ฒด ๊ฒ์(๋ณ๋ ฌ) => ๊ธฐ์ฌ + ๋ถ์ DB ์ ์ฅ
|
301 |
search_result_text = search_all_companies()
|
302 |
-
|
303 |
-
# 2) ์ ์ฒด ์ถ๋ ฅ => DB์ ์ ์ฅ๋ ๊ธฐ์ฌ + ๋ถ์ ๊ฒฐ๊ณผ
|
304 |
load_result_text = load_all_companies()
|
305 |
-
|
306 |
-
# 3) ์ ์ฒด ํต๊ณ(๊ฐ์ฑ ๋ถ์) - ๋ฆฌํฌํธ ์ ๋ชฉ ๋ณ๊ฒฝ๋จ(EarnBOT ๋ถ์ ๋ฆฌํฌํธ)
|
307 |
stats_text = show_stats()
|
308 |
|
309 |
combined_report = (
|
@@ -319,13 +299,13 @@ def full_summary_report():
|
|
319 |
|
320 |
|
321 |
########################################
|
322 |
-
# ์ฌ์ฉ์ ์์ ๊ฒ์
|
323 |
########################################
|
324 |
def search_custom(query, country):
|
325 |
"""
|
326 |
-
|
327 |
1) ๊ฒ์ + ๋ถ์ => DB ์ ์ฅ
|
328 |
-
2) DB ๋ก๋ =>
|
329 |
"""
|
330 |
error_message, articles = serphouse_search(query, country)
|
331 |
if error_message:
|
@@ -333,39 +313,30 @@ def search_custom(query, country):
|
|
333 |
if not articles:
|
334 |
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค."
|
335 |
|
336 |
-
# 1) ๋ถ์
|
337 |
analysis = analyze_sentiment_batch(articles, client)
|
338 |
-
|
339 |
-
# 2) DB ์ ์ฅ
|
340 |
save_data = {
|
341 |
"articles": articles,
|
342 |
"analysis": analysis
|
343 |
}
|
344 |
save_to_db(query, country, save_data)
|
345 |
|
346 |
-
# 3) DB ์ฌ๋ก๋
|
347 |
loaded_data, timestamp = load_from_db(query, country)
|
348 |
if not loaded_data:
|
349 |
return "DB์์ ๋ก๋ ์คํจ"
|
350 |
|
351 |
-
|
|
|
|
|
352 |
out = f"## [์ฌ์ฉ์ ์์ ๊ฒ์ ๊ฒฐ๊ณผ]\n\n"
|
353 |
out += f"**ํค์๋**: {query}\n\n"
|
354 |
out += f"**๊ตญ๊ฐ**: {country}\n\n"
|
355 |
out += f"**์ ์ฅ ์๊ฐ**: {timestamp}\n\n"
|
356 |
-
|
357 |
-
arts = loaded_data.get("articles", [])
|
358 |
-
analy = loaded_data.get("analysis", "")
|
359 |
-
|
360 |
out += display_results(arts)
|
361 |
out += f"### ๋ด์ค ๊ฐ์ฑ ๋ถ์\n{analy}\n"
|
362 |
|
363 |
return out
|
364 |
|
365 |
|
366 |
-
########################################
|
367 |
-
# API ์ธ์ฆ
|
368 |
-
########################################
|
369 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
370 |
if not ACCESS_TOKEN:
|
371 |
raise ValueError("HF_TOKEN environment variable is not set")
|
@@ -378,9 +349,6 @@ client = OpenAI(
|
|
378 |
API_KEY = os.getenv("SERPHOUSE_API_KEY")
|
379 |
|
380 |
|
381 |
-
########################################
|
382 |
-
# ๊ตญ๊ฐ๋ณ ์ค์
|
383 |
-
########################################
|
384 |
COUNTRY_LANGUAGES = {
|
385 |
"United States": "en",
|
386 |
"KOREA": "ko",
|
@@ -523,185 +491,6 @@ COUNTRY_LOCATIONS = {
|
|
523 |
"Iceland": "Iceland"
|
524 |
}
|
525 |
|
526 |
-
|
527 |
-
@lru_cache(maxsize=100)
|
528 |
-
def translate_query(query, country):
|
529 |
-
"""
|
530 |
-
Google Translation API(๋น๊ณต์) ์ฌ์ฉํ์ฌ ๊ฒ์์ด๋ฅผ ํด๋น ๊ตญ๊ฐ ์ธ์ด๋ก ๋ฒ์ญ
|
531 |
-
"""
|
532 |
-
try:
|
533 |
-
if is_english(query):
|
534 |
-
return query
|
535 |
-
|
536 |
-
if country in COUNTRY_LANGUAGES:
|
537 |
-
if country == "South Korea":
|
538 |
-
return query
|
539 |
-
target_lang = COUNTRY_LANGUAGES[country]
|
540 |
-
|
541 |
-
url = "https://translate.googleapis.com/translate_a/single"
|
542 |
-
params = {
|
543 |
-
"client": "gtx",
|
544 |
-
"sl": "auto",
|
545 |
-
"tl": target_lang,
|
546 |
-
"dt": "t",
|
547 |
-
"q": query
|
548 |
-
}
|
549 |
-
|
550 |
-
session = requests.Session()
|
551 |
-
retries = Retry(total=3, backoff_factor=0.5)
|
552 |
-
session.mount('https://', HTTPAdapter(max_retries=retries))
|
553 |
-
|
554 |
-
response = session.get(url, params=params, timeout=(5, 10))
|
555 |
-
translated_text = response.json()[0][0][0]
|
556 |
-
return translated_text
|
557 |
-
|
558 |
-
return query
|
559 |
-
|
560 |
-
except Exception as e:
|
561 |
-
print(f"๋ฒ์ญ ์ค๋ฅ: {str(e)}")
|
562 |
-
return query
|
563 |
-
|
564 |
-
def is_english(text):
|
565 |
-
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
|
566 |
-
|
567 |
-
def search_serphouse(query, country, page=1, num_result=10):
|
568 |
-
"""
|
569 |
-
SerpHouse API์ ์ค์๊ฐ ๊ฒ์ ์์ฒญ์ ๋ณด๋ด์ด,
|
570 |
-
'๋ด์ค' ํญ (sort_by=date)์์ ํด๋น query์ ๋ํ ๊ธฐ์ฌ ๋ชฉ๋ก์ ๊ฐ์ ธ์จ๋ค.
|
571 |
-
"""
|
572 |
-
url = "https://api.serphouse.com/serp/live"
|
573 |
-
|
574 |
-
now = datetime.utcnow()
|
575 |
-
yesterday = now - timedelta(days=1)
|
576 |
-
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
|
577 |
-
|
578 |
-
translated_query = translate_query(query, country)
|
579 |
-
|
580 |
-
payload = {
|
581 |
-
"data": {
|
582 |
-
"q": translated_query,
|
583 |
-
"domain": "google.com",
|
584 |
-
"loc": COUNTRY_LOCATIONS.get(country, "United States"),
|
585 |
-
"lang": COUNTRY_LANGUAGES.get(country, "en"),
|
586 |
-
"device": "desktop",
|
587 |
-
"serp_type": "news",
|
588 |
-
"page": str(page),
|
589 |
-
"num": "100",
|
590 |
-
"date_range": date_range,
|
591 |
-
"sort_by": "date"
|
592 |
-
}
|
593 |
-
}
|
594 |
-
|
595 |
-
headers = {
|
596 |
-
"accept": "application/json",
|
597 |
-
"content-type": "application/json",
|
598 |
-
"authorization": f"Bearer {API_KEY}"
|
599 |
-
}
|
600 |
-
|
601 |
-
try:
|
602 |
-
session = requests.Session()
|
603 |
-
|
604 |
-
retries = Retry(
|
605 |
-
total=5,
|
606 |
-
backoff_factor=1,
|
607 |
-
status_forcelist=[500, 502, 503, 504, 429],
|
608 |
-
allowed_methods=["POST"]
|
609 |
-
)
|
610 |
-
|
611 |
-
adapter = HTTPAdapter(max_retries=retries)
|
612 |
-
session.mount('http://', adapter)
|
613 |
-
session.mount('https://', adapter)
|
614 |
-
|
615 |
-
response = session.post(
|
616 |
-
url,
|
617 |
-
json=payload,
|
618 |
-
headers=headers,
|
619 |
-
timeout=(30, 30)
|
620 |
-
)
|
621 |
-
|
622 |
-
response.raise_for_status()
|
623 |
-
return {"results": response.json(), "translated_query": translated_query}
|
624 |
-
|
625 |
-
except requests.exceptions.Timeout:
|
626 |
-
return {
|
627 |
-
"error": "๊ฒ์ ์๊ฐ์ด ์ด๊ณผ๋์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.",
|
628 |
-
"translated_query": query
|
629 |
-
}
|
630 |
-
except requests.exceptions.RequestException as e:
|
631 |
-
return {
|
632 |
-
"error": f"๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
633 |
-
"translated_query": query
|
634 |
-
}
|
635 |
-
except Exception as e:
|
636 |
-
return {
|
637 |
-
"error": f"์๊ธฐ์น ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
638 |
-
"translated_query": query
|
639 |
-
}
|
640 |
-
|
641 |
-
def format_results_from_raw(response_data):
|
642 |
-
"""
|
643 |
-
SerpHouse API์ ์๋ต ๋ฐ์ดํฐ๋ฅผ ๊ฐ๊ณตํ์ฌ,
|
644 |
-
(์๋ฌ๋ฉ์์ง, ๊ธฐ์ฌ๋ฆฌ์คํธ) ํํ๋ก ๋ฐํ.
|
645 |
-
"""
|
646 |
-
if "error" in response_data:
|
647 |
-
return "Error: " + response_data["error"], []
|
648 |
-
|
649 |
-
try:
|
650 |
-
results = response_data["results"]
|
651 |
-
translated_query = response_data["translated_query"]
|
652 |
-
|
653 |
-
# ์ค์ ๋ด์ค ๊ฒฐ๊ณผ
|
654 |
-
news_results = results.get('results', {}).get('results', {}).get('news', [])
|
655 |
-
if not news_results:
|
656 |
-
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", []
|
657 |
-
|
658 |
-
# ํ๊ตญ ๋๋ฉ์ธ ๋ฐ ํ๊ตญ ๊ด๋ จ ํค์๋ ํฌํจ ๊ธฐ์ฌ ์ ์ธ
|
659 |
-
korean_domains = [
|
660 |
-
'.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
|
661 |
-
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
|
662 |
-
]
|
663 |
-
korean_keywords = [
|
664 |
-
'korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
|
665 |
-
'gwangju', 'daejeon', 'ulsan', 'sejong'
|
666 |
-
]
|
667 |
-
|
668 |
-
filtered_articles = []
|
669 |
-
for idx, result in enumerate(news_results, 1):
|
670 |
-
url = result.get("url", result.get("link", "")).lower()
|
671 |
-
title = result.get("title", "").lower()
|
672 |
-
channel = result.get("channel", result.get("source", "")).lower()
|
673 |
-
|
674 |
-
is_korean_content = (
|
675 |
-
any(domain in url or domain in channel for domain in korean_domains) or
|
676 |
-
any(keyword in title for keyword in korean_keywords)
|
677 |
-
)
|
678 |
-
|
679 |
-
# ํ๊ตญ์ด ๋ด์ค(๋๋ ํ๊ตญ ๋๋ฉ์ธ) ์ ์ธ
|
680 |
-
if not is_korean_content:
|
681 |
-
filtered_articles.append({
|
682 |
-
"index": idx,
|
683 |
-
"title": result.get("title", "์ ๋ชฉ ์์"),
|
684 |
-
"link": url,
|
685 |
-
"snippet": result.get("snippet", "๋ด์ฉ ์์"),
|
686 |
-
"channel": result.get("channel", result.get("source", "์ ์ ์์")),
|
687 |
-
"time": result.get("time", result.get("date", "์ ์ ์๋ ์๊ฐ")),
|
688 |
-
"image_url": result.get("img", result.get("thumbnail", "")),
|
689 |
-
"translated_query": translated_query
|
690 |
-
})
|
691 |
-
|
692 |
-
return "", filtered_articles
|
693 |
-
except Exception as e:
|
694 |
-
return f"๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", []
|
695 |
-
|
696 |
-
def serphouse_search(query, country):
|
697 |
-
"""
|
698 |
-
๊ฒ์ ๋ฐ ๊ฒฐ๊ณผ ํฌ๋งคํ
๊น์ง ์ผ๊ด ์ฒ๋ฆฌ
|
699 |
-
"""
|
700 |
-
response_data = search_serphouse(query, country)
|
701 |
-
return format_results_from_raw(response_data)
|
702 |
-
|
703 |
-
|
704 |
-
# CSS (UI ์ปค์คํฐ๋ง์ด์ง)
|
705 |
css = """
|
706 |
/* ์ ์ญ ์คํ์ผ */
|
707 |
footer {visibility: hidden;}
|
@@ -877,8 +666,6 @@ footer {visibility: hidden;}
|
|
877 |
}
|
878 |
"""
|
879 |
|
880 |
-
import gradio as gr
|
881 |
-
|
882 |
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์๋น์ค") as iface:
|
883 |
init_db()
|
884 |
|
|
|
47 |
def analyze_sentiment_batch(articles, client):
|
48 |
"""
|
49 |
OpenAI API๋ฅผ ํตํด ๋ด์ค ๊ธฐ์ฌ๋ค์ ์ข
ํฉ ๊ฐ์ฑ ๋ถ์์ ์ํ
|
50 |
+
(๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ํ๊ตญ์ด๋ก ์์ฑํ๋๋ก ์ ๋)
|
51 |
"""
|
52 |
try:
|
53 |
# ๋ชจ๋ ๊ธฐ์ฌ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ํ๋์ ํ
์คํธ๋ก ๊ฒฐํฉ
|
|
|
56 |
for article in articles
|
57 |
])
|
58 |
|
59 |
+
# ํ๊ตญ์ด ์์ฑ์ ์ ๋ํ๋ ๋ฌธ๊ตฌ ์ถ๊ฐ
|
60 |
+
prompt = f"""๋ค์ ๋ด์ค ๋ชจ์์ ๋ํด ์ ๋ฐ์ ์ธ ๊ฐ์ฑ ๋ถ์์ ์ํํ์ธ์. (ํ๊ตญ์ด๋ก ์์ฑํ์ธ์)
|
61 |
|
62 |
๋ด์ค ๋ด์ฉ:
|
63 |
{combined_text}
|
|
|
150 |
def search_company(company):
|
151 |
"""
|
152 |
๋จ์ผ ๊ธฐ์
(๋๋ ํค์๋)์ ๋ํด ๋ฏธ๊ตญ ๋ด์ค ๊ฒ์ ํ,
|
153 |
+
๊ธฐ์ฌ ๋ชฉ๋ก + ๊ฐ์ฑ ๋ถ์ ๋ณด๊ณ ๋ฅผ ํจ๊ป ์ถ๋ ฅ
|
154 |
=> { "articles": [...], "analysis": ... } ํํ๋ก DB์ ์ ์ฅ
|
155 |
"""
|
156 |
error_message, articles = serphouse_search(company, "United States")
|
157 |
if not error_message and articles:
|
|
|
158 |
analysis = analyze_sentiment_batch(articles, client)
|
|
|
|
|
159 |
store_dict = {
|
160 |
"articles": articles,
|
161 |
"analysis": analysis
|
162 |
}
|
163 |
save_to_db(company, "United States", store_dict)
|
164 |
|
|
|
165 |
output = display_results(articles)
|
166 |
output += f"\n\n### ๋ถ์ ๋ณด๊ณ \n{analysis}\n"
|
167 |
return output
|
|
|
172 |
########################################
|
173 |
def load_company(company):
|
174 |
"""
|
175 |
+
DB์์ (keyword=company, country=United States)์ ํด๋นํ๋
|
176 |
+
๊ธฐ์ฌ ๋ชฉ๋ก + ๋ถ์ ๋ณด๊ณ ๋ฅผ ํจ๊ป ์ถ๋ ฅ
|
177 |
"""
|
178 |
data, timestamp = load_from_db(company, "United States")
|
179 |
if data:
|
|
|
180 |
articles = data.get("articles", [])
|
181 |
analysis = data.get("analysis", "")
|
182 |
|
|
|
188 |
|
189 |
|
190 |
########################################
|
191 |
+
# 3) ๋ฆฌํฌํธ: "EarnBOT ๋ถ์ ๋ฆฌํฌํธ"
|
192 |
########################################
|
193 |
def show_stats():
|
194 |
"""
|
195 |
+
KOREAN_COMPANIES ๋ด ๋ชจ๋ ๊ธฐ์
์
|
196 |
+
- ์ต์ DB ์ ์ฅ ์๊ฐ
|
197 |
+
- ๊ธฐ์ฌ ์
|
198 |
+
- ๊ฐ์ฑ ๋ถ์ ๊ฒฐ๊ณผ
|
199 |
+
๋ฑ์ ๋ณ๋ ฌ๋ก ์กฐํ, ๋ณด๊ณ ์ ํํ๋ก ๋ฐํ
|
|
|
|
|
200 |
"""
|
201 |
conn = sqlite3.connect("search_results.db")
|
202 |
c = conn.cursor()
|
203 |
|
204 |
+
output = "## EarnBOT ๋ถ์ ๋ฆฌํฌํธ\n\n"
|
205 |
|
|
|
206 |
data_list = []
|
207 |
for company in KOREAN_COMPANIES:
|
208 |
c.execute("""
|
|
|
215 |
|
216 |
row = c.fetchone()
|
217 |
if row:
|
218 |
+
results_json, tstamp = row
|
219 |
+
data_list.append((company, tstamp, results_json))
|
220 |
|
221 |
conn.close()
|
222 |
|
|
|
223 |
def analyze_data(item):
|
224 |
comp, tstamp, results_json = item
|
225 |
data = json.loads(results_json)
|
226 |
articles = data.get("articles", [])
|
227 |
analysis = data.get("analysis", "")
|
|
|
228 |
count_articles = len(articles)
|
|
|
|
|
|
|
|
|
229 |
return (comp, tstamp, count_articles, analysis)
|
230 |
|
231 |
results_list = []
|
|
|
234 |
for future in as_completed(futures):
|
235 |
results_list.append(future.result())
|
236 |
|
|
|
237 |
for comp, tstamp, count, analysis in results_list:
|
238 |
seoul_time = convert_to_seoul_time(tstamp)
|
239 |
output += f"### {comp}\n"
|
|
|
246 |
|
247 |
return output
|
248 |
|
|
|
249 |
def search_all_companies():
|
250 |
"""
|
251 |
+
๋ชจ๋ ๊ธฐ์
๊ฒ์ + ๋ถ์ -> DB ์ ์ฅ
|
|
|
252 |
"""
|
253 |
overall_result = "# [์ ์ฒด ๊ฒ์ ๊ฒฐ๊ณผ]\n\n"
|
254 |
|
|
|
266 |
|
267 |
def load_all_companies():
|
268 |
"""
|
269 |
+
๋ชจ๋ ๊ธฐ์
์ ๋ํ DB ์ ์ฅ๋ ๊ธฐ์ฌ + ๋ถ์ ์ถ๋ ฅ
|
|
|
270 |
"""
|
271 |
overall_result = "# [์ ์ฒด ์ถ๋ ฅ ๊ฒฐ๊ณผ]\n\n"
|
272 |
|
|
|
278 |
|
279 |
def full_summary_report():
|
280 |
"""
|
281 |
+
1) ๋ชจ๋ ๊ธฐ์
๋ณ๋ ฌ ๊ฒ์+๋ถ์ => DB ์ ์ฅ
|
282 |
+
2) DB์์ ๋ก๋ => ๊ธฐ์ฌ + ๋ถ์ ์ถ๋ ฅ
|
283 |
+
3) EarnBOT ๋ถ์ ๋ฆฌํฌํธ
|
284 |
"""
|
|
|
285 |
search_result_text = search_all_companies()
|
|
|
|
|
286 |
load_result_text = load_all_companies()
|
|
|
|
|
287 |
stats_text = show_stats()
|
288 |
|
289 |
combined_report = (
|
|
|
299 |
|
300 |
|
301 |
########################################
|
302 |
+
# ์ฌ์ฉ์ ์์ ๊ฒ์
|
303 |
########################################
|
304 |
def search_custom(query, country):
|
305 |
"""
|
306 |
+
(query, country)์ ๋ํด
|
307 |
1) ๊ฒ์ + ๋ถ์ => DB ์ ์ฅ
|
308 |
+
2) DB ๋ก๋ => ๊ธฐ์ฌ+๋ถ์ ์ถ๋ ฅ
|
309 |
"""
|
310 |
error_message, articles = serphouse_search(query, country)
|
311 |
if error_message:
|
|
|
313 |
if not articles:
|
314 |
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค."
|
315 |
|
|
|
316 |
analysis = analyze_sentiment_batch(articles, client)
|
|
|
|
|
317 |
save_data = {
|
318 |
"articles": articles,
|
319 |
"analysis": analysis
|
320 |
}
|
321 |
save_to_db(query, country, save_data)
|
322 |
|
|
|
323 |
loaded_data, timestamp = load_from_db(query, country)
|
324 |
if not loaded_data:
|
325 |
return "DB์์ ๋ก๋ ์คํจ"
|
326 |
|
327 |
+
arts = loaded_data.get("articles", [])
|
328 |
+
analy = loaded_data.get("analysis", "")
|
329 |
+
|
330 |
out = f"## [์ฌ์ฉ์ ์์ ๊ฒ์ ๊ฒฐ๊ณผ]\n\n"
|
331 |
out += f"**ํค์๋**: {query}\n\n"
|
332 |
out += f"**๊ตญ๊ฐ**: {country}\n\n"
|
333 |
out += f"**์ ์ฅ ์๊ฐ**: {timestamp}\n\n"
|
|
|
|
|
|
|
|
|
334 |
out += display_results(arts)
|
335 |
out += f"### ๋ด์ค ๊ฐ์ฑ ๋ถ์\n{analy}\n"
|
336 |
|
337 |
return out
|
338 |
|
339 |
|
|
|
|
|
|
|
340 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
341 |
if not ACCESS_TOKEN:
|
342 |
raise ValueError("HF_TOKEN environment variable is not set")
|
|
|
349 |
API_KEY = os.getenv("SERPHOUSE_API_KEY")
|
350 |
|
351 |
|
|
|
|
|
|
|
352 |
COUNTRY_LANGUAGES = {
|
353 |
"United States": "en",
|
354 |
"KOREA": "ko",
|
|
|
491 |
"Iceland": "Iceland"
|
492 |
}
|
493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
494 |
css = """
|
495 |
/* ์ ์ญ ์คํ์ผ */
|
496 |
footer {visibility: hidden;}
|
|
|
666 |
}
|
667 |
"""
|
668 |
|
|
|
|
|
669 |
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์๋น์ค") as iface:
|
670 |
init_db()
|
671 |
|