MoneyRadar / app.py
ginipick's picture
Update app.py
3c561b8 verified
raw
history blame
29.9 kB
import gradio as gr
import requests
import json
import os
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from openai import OpenAI
from bs4 import BeautifulSoup
import re
import pathlib
import sqlite3
import pytz
# ํ•œ๊ตญ ๊ธฐ์—… ๋ฆฌ์ŠคํŠธ
KOREAN_COMPANIES = [
"NVIDIA",
"ALPHABET",
"APPLE",
"TESLA",
"AMAZON",
"MICROSOFT",
"META",
"INTEL",
"SAMSUNG",
"HYNIX",
"BITCOIN",
"crypto",
"stock",
"Economics",
"Finance",
"investing"
]
def convert_to_seoul_time(timestamp_str):
try:
dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
seoul_tz = pytz.timezone('Asia/Seoul')
seoul_time = seoul_tz.localize(dt)
return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST')
except Exception as e:
print(f"์‹œ๊ฐ„ ๋ณ€ํ™˜ ์˜ค๋ฅ˜: {str(e)}")
return timestamp_str
def analyze_sentiment_batch(articles, client):
"""
OpenAI API๋ฅผ ํ†ตํ•ด ๋‰ด์Šค ๊ธฐ์‚ฌ๋“ค์˜ ์ข…ํ•ฉ ๊ฐ์„ฑ ๋ถ„์„์„ ์ˆ˜ํ–‰
"""
try:
# ๋ชจ๋“  ๊ธฐ์‚ฌ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ํ•˜๋‚˜์˜ ํ…์ŠคํŠธ๋กœ ๊ฒฐํ•ฉ
combined_text = "\n\n".join([
f"์ œ๋ชฉ: {article.get('title', '')}\n๋‚ด์šฉ: {article.get('snippet', '')}"
for article in articles
])
prompt = f"""๋‹ค์Œ ๋‰ด์Šค ๋ชจ์Œ์— ๋Œ€ํ•ด ์ „๋ฐ˜์ ์ธ ๊ฐ์„ฑ ๋ถ„์„์„ ์ˆ˜ํ–‰ํ•˜์„ธ์š”:
๋‰ด์Šค ๋‚ด์šฉ:
{combined_text}
๋‹ค์Œ ํ˜•์‹์œผ๋กœ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”:
1. ์ „๋ฐ˜์  ๊ฐ์„ฑ: [๊ธ์ •/๋ถ€์ •/์ค‘๋ฆฝ]
2. ์ฃผ์š” ๊ธ์ •์  ์š”์†Œ:
- [ํ•ญ๋ชฉ1]
- [ํ•ญ๋ชฉ2]
3. ์ฃผ์š” ๋ถ€์ •์  ์š”์†Œ:
- [ํ•ญ๋ชฉ1]
- [ํ•ญ๋ชฉ2]
4. ์ข…ํ•ฉ ํ‰๊ฐ€: [์ƒ์„ธ ์„ค๋ช…]
"""
response = client.chat.completions.create(
model="CohereForAI/c4ai-command-r-plus-08-2024",
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=1000
)
return response.choices[0].message.content
except Exception as e:
return f"๊ฐ์„ฑ ๋ถ„์„ ์‹คํŒจ: {str(e)}"
# DB ์ดˆ๊ธฐํ™” ํ•จ์ˆ˜
def init_db():
db_path = pathlib.Path("search_results.db")
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS searches
(id INTEGER PRIMARY KEY AUTOINCREMENT,
keyword TEXT,
country TEXT,
results TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''')
conn.commit()
conn.close()
def save_to_db(keyword, country, results):
"""
ํŠน์ • (keyword, country) ์กฐํ•ฉ์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ DB์— ์ €์žฅ
"""
conn = sqlite3.connect("search_results.db")
c = conn.cursor()
seoul_tz = pytz.timezone('Asia/Seoul')
now = datetime.now(seoul_tz)
timestamp = now.strftime('%Y-%m-%d %H:%M:%S')
c.execute("""INSERT INTO searches
(keyword, country, results, timestamp)
VALUES (?, ?, ?, ?)""",
(keyword, country, json.dumps(results), timestamp))
conn.commit()
conn.close()
def load_from_db(keyword, country):
"""
ํŠน์ • (keyword, country) ์กฐํ•ฉ์— ๋Œ€ํ•œ ๊ฐ€์žฅ ์ตœ๊ทผ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ DB์—์„œ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
"""
conn = sqlite3.connect("search_results.db")
c = conn.cursor()
c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
(keyword, country))
result = c.fetchone()
conn.close()
if result:
return json.loads(result[0]), convert_to_seoul_time(result[1])
return None, None
def display_results(articles):
"""
๋‰ด์Šค ๊ธฐ์‚ฌ ๋ชฉ๋ก์„ Markdown ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ๋ฐ˜ํ™˜
"""
output = ""
for idx, article in enumerate(articles, 1):
output += f"### {idx}. {article['title']}\n"
output += f"์ถœ์ฒ˜: {article['channel']}\n"
output += f"์‹œ๊ฐ„: {article['time']}\n"
output += f"๋งํฌ: {article['link']}\n"
output += f"์š”์•ฝ: {article['snippet']}\n\n"
return output
########################################
# 1) ๊ฒ€์ƒ‰ ์‹œ => ๊ธฐ์‚ฌ + ๋ถ„์„ ๋™์‹œ ์ถœ๋ ฅ, DB ์ €์žฅ
########################################
def search_company(company):
"""
๋‹จ์ผ ๊ธฐ์—…(๋˜๋Š” ํ‚ค์›Œ๋“œ)์— ๋Œ€ํ•ด ๋ฏธ๊ตญ ๋‰ด์Šค ๊ฒ€์ƒ‰ ํ›„,
1) ๊ธฐ์‚ฌ ๋ชฉ๋ก + 2) ๊ฐ์„ฑ ๋ถ„์„ ๋ณด๊ณ ๋ฅผ ํ•จ๊ป˜ ์ถœ๋ ฅ
=> { "articles": [...], "analysis": ... } ํ˜•ํƒœ๋กœ DB์— ์ €์žฅ
"""
error_message, articles = serphouse_search(company, "United States")
if not error_message and articles:
# ๊ฐ์„ฑ ๋ถ„์„
analysis = analyze_sentiment_batch(articles, client)
# DB ์ €์žฅ์šฉ ๋ฐ์ดํ„ฐ ๊ตฌ์„ฑ
store_dict = {
"articles": articles,
"analysis": analysis
}
save_to_db(company, "United States", store_dict)
# ํ™”๋ฉด ์ถœ๋ ฅ์šฉ
output = display_results(articles)
output += f"\n\n### ๋ถ„์„ ๋ณด๊ณ \n{analysis}\n"
return output
return f"{company}์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
########################################
# 2) ์ถœ๋ ฅ ์‹œ => DB์— ์ €์žฅ๋œ ๊ธฐ์‚ฌ + ๋ถ„์„ ํ•จ๊ป˜ ์ถœ๋ ฅ
########################################
def load_company(company):
"""
DB์—์„œ ๋‹จ์ผ ๊ธฐ์—…(๋˜๋Š” ํ‚ค์›Œ๋“œ)์˜ ๋ฏธ๊ตญ ๋‰ด์Šค ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ๋ถˆ๋Ÿฌ์™€
๊ธฐ์‚ฌ ๋ชฉ๋ก + ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ํ•จ๊ป˜ ์ถœ๋ ฅ
"""
data, timestamp = load_from_db(company, "United States")
if data:
# data๋Š” { "articles": [...], "analysis": "..."} ํ˜•ํƒœ
articles = data.get("articles", [])
analysis = data.get("analysis", "")
output = f"### {company} ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ\n์ €์žฅ ์‹œ๊ฐ„: {timestamp}\n\n"
output += display_results(articles)
output += f"\n\n### ๋ถ„์„ ๋ณด๊ณ \n{analysis}\n"
return output
return f"{company}์— ๋Œ€ํ•œ ์ €์žฅ๋œ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
########################################
# 3) ๊ธฐ์กด show_stats()์—์„œ ๋ฆฌํฌํŠธ ์ œ๋ชฉ ๋ณ€๊ฒฝ
########################################
def show_stats():
"""
KOREAN_COMPANIES ๋ชฉ๋ก ๋‚ด ๋ชจ๋“  ๊ธฐ์—…์— ๋Œ€ํ•ด:
- ๊ฐ€์žฅ ์ตœ๊ทผ DB ์ €์žฅ ์ผ์ž
- ๊ธฐ์‚ฌ ์ˆ˜
- ๊ฐ์„ฑ ๋ถ„์„ ๊ฒฐ๊ณผ
๋ฅผ ๋ณ‘๋ ฌ์ฒ˜๋ฆฌ๋กœ ์กฐํšŒํ•˜์—ฌ ๋ณด๊ณ ์„œ ํ˜•ํƒœ๋กœ ๋ฐ˜ํ™˜
(๋ฌธ๊ตฌ ๋ณ€๊ฒฝ) "ํ•œ๊ตญ ๊ธฐ์—… ๋‰ด์Šค ๋ถ„์„ ๋ฆฌํฌํŠธ" -> "EarnBOT ๋ถ„์„ ๋ฆฌํฌํŠธ"
"""
conn = sqlite3.connect("search_results.db")
c = conn.cursor()
output = "## EarnBOT ๋ถ„์„ ๋ฆฌํฌํŠธ\n\n" # ์—ฌ๊ธฐ์„œ ๋ฌธ๊ตฌ ๋ณ€๊ฒฝ
# ๋ชจ๋“  ๊ธฐ์—…์— ๋Œ€ํ•ด DB์—์„œ ์ฝ์–ด์˜ฌ (company, timestamp, articles) ๋ชฉ๋ก ์ˆ˜์ง‘
data_list = []
for company in KOREAN_COMPANIES:
c.execute("""
SELECT results, timestamp
FROM searches
WHERE keyword = ?
ORDER BY timestamp DESC
LIMIT 1
""", (company,))
row = c.fetchone()
if row:
results_json, timestamp = row
data_list.append((company, timestamp, results_json))
conn.close()
# ๊ฐ์„ฑ ๋ถ„์„ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜
def analyze_data(item):
comp, tstamp, results_json = item
data = json.loads(results_json)
articles = data.get("articles", [])
analysis = data.get("analysis", "")
count_articles = len(articles)
# ์—ฌ๊ธฐ์„œ๋Š” ์ด๋ฏธ DB์— "analysis"๊ฐ€ ๋“ค์–ด ์žˆ์œผ๋ฏ€๋กœ,
# ๊ตณ์ด ์žฌ๋ถ„์„ํ•  ํ•„์š”๊ฐ€ ์—†์œผ๋ฉด ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
# (ํ•„์š” ์‹œ ์žฌ๋ถ„์„ ๊ฐ€๋Šฅ)
return (comp, tstamp, count_articles, analysis)
results_list = []
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(analyze_data, dl) for dl in data_list]
for future in as_completed(futures):
results_list.append(future.result())
# ๊ฒฐ๊ณผ ์ถœ๋ ฅ
for comp, tstamp, count, analysis in results_list:
seoul_time = convert_to_seoul_time(tstamp)
output += f"### {comp}\n"
output += f"- ๋งˆ์ง€๋ง‰ ์—…๋ฐ์ดํŠธ: {seoul_time}\n"
output += f"- ์ €์žฅ๋œ ๊ธฐ์‚ฌ ์ˆ˜: {count}๊ฑด\n\n"
if analysis:
output += "#### ๋‰ด์Šค ๊ฐ์„ฑ ๋ถ„์„\n"
output += f"{analysis}\n\n"
output += "---\n\n"
return output
def search_all_companies():
"""
KOREAN_COMPANIES ๋ฆฌ์ŠคํŠธ ๋‚ด ๋ชจ๋“  ๊ธฐ์—… ๊ฒ€์ƒ‰ (๋ฉ€ํ‹ฐ์Šค๋ ˆ๋”ฉ) =>
=> ๋ถ„์„ + DB ์ €์žฅ => ๊ฒฐ๊ณผ Markdown ๋ฐ˜ํ™˜
"""
overall_result = "# [์ „์ฒด ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ]\n\n"
def do_search(comp):
return comp, search_company(comp)
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(do_search, c) for c in KOREAN_COMPANIES]
for future in as_completed(futures):
comp, res_text = future.result()
overall_result += f"## {comp}\n"
overall_result += res_text + "\n\n"
return overall_result
def load_all_companies():
"""
KOREAN_COMPANIES ๋ฆฌ์ŠคํŠธ ๋‚ด ๋ชจ๋“  ๊ธฐ์—… DB ๋ถˆ๋Ÿฌ์˜ค๊ธฐ =>
๊ธฐ์‚ฌ ๋ชฉ๋ก + ๋ถ„์„ ๋ณด๊ณ  => ๊ฒฐ๊ณผ Markdown
"""
overall_result = "# [์ „์ฒด ์ถœ๋ ฅ ๊ฒฐ๊ณผ]\n\n"
for comp in KOREAN_COMPANIES:
overall_result += f"## {comp}\n"
overall_result += load_company(comp)
overall_result += "\n"
return overall_result
def full_summary_report():
"""
(1) ๋ชจ๋“  ๊ธฐ์—… ๊ฒ€์ƒ‰(๋ณ‘๋ ฌ) -> (2) DB์—์„œ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ -> (3) ๊ฐ์„ฑ ๋ถ„์„ ํ†ต๊ณ„
์ˆœ์„œ๋Œ€๋กœ ์‹คํ–‰ํ•˜์—ฌ, ์ „์ฒด ๋ฆฌํฌํŠธ๋ฅผ ํ•ฉ์ณ ๋ฐ˜ํ™˜
"""
# 1) ์ „์ฒด ๊ฒ€์ƒ‰(๋ณ‘๋ ฌ) => ๊ธฐ์‚ฌ + ๋ถ„์„ DB ์ €์žฅ
search_result_text = search_all_companies()
# 2) ์ „์ฒด ์ถœ๋ ฅ => DB์— ์ €์žฅ๋œ ๊ธฐ์‚ฌ + ๋ถ„์„ ๊ฒฐ๊ณผ
load_result_text = load_all_companies()
# 3) ์ „์ฒด ํ†ต๊ณ„(๊ฐ์„ฑ ๋ถ„์„) - ๋ฆฌํฌํŠธ ์ œ๋ชฉ ๋ณ€๊ฒฝ๋จ(EarnBOT ๋ถ„์„ ๋ฆฌํฌํŠธ)
stats_text = show_stats()
combined_report = (
"# ์ „์ฒด ๋ถ„์„ ๋ณด๊ณ  ์š”์•ฝ\n\n"
"์•„๋ž˜ ์ˆœ์„œ๋กœ ์‹คํ–‰๋˜์—ˆ์Šต๋‹ˆ๋‹ค:\n"
"1. ๋ชจ๋“  ์ข…๋ชฉ ๊ฒ€์ƒ‰(๋ณ‘๋ ฌ) + ๋ถ„์„ => 2. ๋ชจ๋“  ์ข…๋ชฉ DB ๊ฒฐ๊ณผ ์ถœ๋ ฅ => 3. ์ „์ฒด ๊ฐ์„ฑ ๋ถ„์„ ํ†ต๊ณ„\n\n"
f"{search_result_text}\n\n"
f"{load_result_text}\n\n"
"## [์ „์ฒด ๊ฐ์„ฑ ๋ถ„์„ ํ†ต๊ณ„]\n\n"
f"{stats_text}"
)
return combined_report
########################################
# ์‚ฌ์šฉ์ž ์ž„์˜ ๊ฒ€์ƒ‰ (์ถ”๊ฐ€ ๊ธฐ๋Šฅ)
########################################
def search_custom(query, country):
"""
์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ (query, country)์— ๋Œ€ํ•ด
1) ๊ฒ€์ƒ‰ + ๋ถ„์„ => DB ์ €์žฅ
2) DB ๋กœ๋“œ => ๊ฒฐ๊ณผ(๊ธฐ์‚ฌ ๋ชฉ๋ก + ๋ถ„์„) ์ถœ๋ ฅ
"""
error_message, articles = serphouse_search(query, country)
if error_message:
return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {error_message}"
if not articles:
return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
# 1) ๋ถ„์„
analysis = analyze_sentiment_batch(articles, client)
# 2) DB ์ €์žฅ
save_data = {
"articles": articles,
"analysis": analysis
}
save_to_db(query, country, save_data)
# 3) DB ์žฌ๋กœ๋“œ
loaded_data, timestamp = load_from_db(query, country)
if not loaded_data:
return "DB์—์„œ ๋กœ๋“œ ์‹คํŒจ"
# 4) ๊ฒฐ๊ณผ ํ‘œ์‹œ
out = f"## [์‚ฌ์šฉ์ž ์ž„์˜ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ]\n\n"
out += f"**ํ‚ค์›Œ๋“œ**: {query}\n\n"
out += f"**๊ตญ๊ฐ€**: {country}\n\n"
out += f"**์ €์žฅ ์‹œ๊ฐ„**: {timestamp}\n\n"
arts = loaded_data.get("articles", [])
analy = loaded_data.get("analysis", "")
out += display_results(arts)
out += f"### ๋‰ด์Šค ๊ฐ์„ฑ ๋ถ„์„\n{analy}\n"
return out
########################################
# API ์ธ์ฆ
########################################
ACCESS_TOKEN = os.getenv("HF_TOKEN")
if not ACCESS_TOKEN:
raise ValueError("HF_TOKEN environment variable is not set")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
API_KEY = os.getenv("SERPHOUSE_API_KEY")
########################################
# ๊ตญ๊ฐ€๋ณ„ ์„ค์ •
########################################
COUNTRY_LANGUAGES = {
"United States": "en",
"KOREA": "ko",
"United Kingdom": "en",
"Taiwan": "zh-TW",
"Canada": "en",
"Australia": "en",
"Germany": "de",
"France": "fr",
"Japan": "ja",
"China": "zh",
"India": "hi",
"Brazil": "pt",
"Mexico": "es",
"Russia": "ru",
"Italy": "it",
"Spain": "es",
"Netherlands": "nl",
"Singapore": "en",
"Hong Kong": "zh-HK",
"Indonesia": "id",
"Malaysia": "ms",
"Philippines": "tl",
"Thailand": "th",
"Vietnam": "vi",
"Belgium": "nl",
"Denmark": "da",
"Finland": "fi",
"Ireland": "en",
"Norway": "no",
"Poland": "pl",
"Sweden": "sv",
"Switzerland": "de",
"Austria": "de",
"Czech Republic": "cs",
"Greece": "el",
"Hungary": "hu",
"Portugal": "pt",
"Romania": "ro",
"Turkey": "tr",
"Israel": "he",
"Saudi Arabia": "ar",
"United Arab Emirates": "ar",
"South Africa": "en",
"Argentina": "es",
"Chile": "es",
"Colombia": "es",
"Peru": "es",
"Venezuela": "es",
"New Zealand": "en",
"Bangladesh": "bn",
"Pakistan": "ur",
"Egypt": "ar",
"Morocco": "ar",
"Nigeria": "en",
"Kenya": "sw",
"Ukraine": "uk",
"Croatia": "hr",
"Slovakia": "sk",
"Bulgaria": "bg",
"Serbia": "sr",
"Estonia": "et",
"Latvia": "lv",
"Lithuania": "lt",
"Slovenia": "sl",
"Luxembourg": "Luxembourg",
"Malta": "Malta",
"Cyprus": "Cyprus",
"Iceland": "Iceland"
}
COUNTRY_LOCATIONS = {
"United States": "United States",
"KOREA": "kr",
"United Kingdom": "United Kingdom",
"Taiwan": "Taiwan",
"Canada": "Canada",
"Australia": "Australia",
"Germany": "Germany",
"France": "France",
"Japan": "Japan",
"China": "China",
"India": "India",
"Brazil": "Brazil",
"Mexico": "Mexico",
"Russia": "Russia",
"Italy": "Italy",
"Spain": "Spain",
"Netherlands": "Netherlands",
"Singapore": "Singapore",
"Hong Kong": "Hong Kong",
"Indonesia": "Indonesia",
"Malaysia": "Malaysia",
"Philippines": "Philippines",
"Thailand": "Thailand",
"Vietnam": "Vietnam",
"Belgium": "Belgium",
"Denmark": "Denmark",
"Finland": "Finland",
"Ireland": "Ireland",
"Norway": "Norway",
"Poland": "Poland",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Austria": "Austria",
"Czech Republic": "Czech Republic",
"Greece": "Greece",
"Hungary": "Hungary",
"Portugal": "Portugal",
"Romania": "Romania",
"Turkey": "Turkey",
"Israel": "Israel",
"Saudi Arabia": "Saudi Arabia",
"United Arab Emirates": "United Arab Emirates",
"South Africa": "South Africa",
"Argentina": "Argentina",
"Chile": "Chile",
"Colombia": "Colombia",
"Peru": "Peru",
"Venezuela": "Venezuela",
"New Zealand": "New Zealand",
"Bangladesh": "Bangladesh",
"Pakistan": "Pakistan",
"Egypt": "Egypt",
"Morocco": "Morocco",
"Nigeria": "Nigeria",
"Kenya": "Kenya",
"Ukraine": "Ukraine",
"Croatia": "Croatia",
"Slovakia": "Slovakia",
"Bulgaria": "Bulgaria",
"Serbia": "Serbia",
"Estonia": "et",
"Latvia": "lv",
"Lithuania": "lt",
"Slovenia": "sl",
"Luxembourg": "Luxembourg",
"Malta": "Malta",
"Cyprus": "Cyprus",
"Iceland": "Iceland"
}
@lru_cache(maxsize=100)
def translate_query(query, country):
"""
Google Translation API(๋น„๊ณต์‹) ์‚ฌ์šฉํ•˜์—ฌ ๊ฒ€์ƒ‰์–ด๋ฅผ ํ•ด๋‹น ๊ตญ๊ฐ€ ์–ธ์–ด๋กœ ๋ฒˆ์—ญ
"""
try:
if is_english(query):
return query
if country in COUNTRY_LANGUAGES:
if country == "South Korea":
return query
target_lang = COUNTRY_LANGUAGES[country]
url = "https://translate.googleapis.com/translate_a/single"
params = {
"client": "gtx",
"sl": "auto",
"tl": target_lang,
"dt": "t",
"q": query
}
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5)
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=(5, 10))
translated_text = response.json()[0][0][0]
return translated_text
return query
except Exception as e:
print(f"๋ฒˆ์—ญ ์˜ค๋ฅ˜: {str(e)}")
return query
def is_english(text):
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
def search_serphouse(query, country, page=1, num_result=10):
"""
SerpHouse API์— ์‹ค์‹œ๊ฐ„ ๊ฒ€์ƒ‰ ์š”์ฒญ์„ ๋ณด๋‚ด์–ด,
'๋‰ด์Šค' ํƒญ (sort_by=date)์—์„œ ํ•ด๋‹น query์— ๋Œ€ํ•œ ๊ธฐ์‚ฌ ๋ชฉ๋ก์„ ๊ฐ€์ ธ์˜จ๋‹ค.
"""
url = "https://api.serphouse.com/serp/live"
now = datetime.utcnow()
yesterday = now - timedelta(days=1)
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
translated_query = translate_query(query, country)
payload = {
"data": {
"q": translated_query,
"domain": "google.com",
"loc": COUNTRY_LOCATIONS.get(country, "United States"),
"lang": COUNTRY_LANGUAGES.get(country, "en"),
"device": "desktop",
"serp_type": "news",
"page": str(page),
"num": "100",
"date_range": date_range,
"sort_by": "date"
}
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {API_KEY}"
}
try:
session = requests.Session()
retries = Retry(
total=5,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504, 429],
allowed_methods=["POST"]
)
adapter = HTTPAdapter(max_retries=retries)
session.mount('http://', adapter)
session.mount('https://', adapter)
response = session.post(
url,
json=payload,
headers=headers,
timeout=(30, 30)
)
response.raise_for_status()
return {"results": response.json(), "translated_query": translated_query}
except requests.exceptions.Timeout:
return {
"error": "๊ฒ€์ƒ‰ ์‹œ๊ฐ„์ด ์ดˆ๊ณผ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.",
"translated_query": query
}
except requests.exceptions.RequestException as e:
return {
"error": f"๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
"translated_query": query
}
except Exception as e:
return {
"error": f"์˜ˆ๊ธฐ์น˜ ์•Š์€ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
"translated_query": query
}
def format_results_from_raw(response_data):
"""
SerpHouse API์˜ ์‘๋‹ต ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€๊ณตํ•˜์—ฌ,
(์—๋Ÿฌ๋ฉ”์‹œ์ง€, ๊ธฐ์‚ฌ๋ฆฌ์ŠคํŠธ) ํ˜•ํƒœ๋กœ ๋ฐ˜ํ™˜.
"""
if "error" in response_data:
return "Error: " + response_data["error"], []
try:
results = response_data["results"]
translated_query = response_data["translated_query"]
# ์‹ค์ œ ๋‰ด์Šค ๊ฒฐ๊ณผ
news_results = results.get('results', {}).get('results', {}).get('news', [])
if not news_results:
return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
# ํ•œ๊ตญ ๋„๋ฉ”์ธ ๋ฐ ํ•œ๊ตญ ๊ด€๋ จ ํ‚ค์›Œ๋“œ ํฌํ•จ ๊ธฐ์‚ฌ ์ œ์™ธ
korean_domains = [
'.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
]
korean_keywords = [
'korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
'gwangju', 'daejeon', 'ulsan', 'sejong'
]
filtered_articles = []
for idx, result in enumerate(news_results, 1):
url = result.get("url", result.get("link", "")).lower()
title = result.get("title", "").lower()
channel = result.get("channel", result.get("source", "")).lower()
is_korean_content = (
any(domain in url or domain in channel for domain in korean_domains) or
any(keyword in title for keyword in korean_keywords)
)
# ํ•œ๊ตญ์–ด ๋‰ด์Šค(๋˜๋Š” ํ•œ๊ตญ ๋„๋ฉ”์ธ) ์ œ์™ธ
if not is_korean_content:
filtered_articles.append({
"index": idx,
"title": result.get("title", "์ œ๋ชฉ ์—†์Œ"),
"link": url,
"snippet": result.get("snippet", "๋‚ด์šฉ ์—†์Œ"),
"channel": result.get("channel", result.get("source", "์•Œ ์ˆ˜ ์—†์Œ")),
"time": result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„")),
"image_url": result.get("img", result.get("thumbnail", "")),
"translated_query": translated_query
})
return "", filtered_articles
except Exception as e:
return f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", []
def serphouse_search(query, country):
"""
๊ฒ€์ƒ‰ ๋ฐ ๊ฒฐ๊ณผ ํฌ๋งคํŒ…๊นŒ์ง€ ์ผ๊ด„ ์ฒ˜๋ฆฌ
"""
response_data = search_serphouse(query, country)
return format_results_from_raw(response_data)
# CSS (UI ์ปค์Šคํ„ฐ๋งˆ์ด์ง•)
css = """
/* ์ „์—ญ ์Šคํƒ€์ผ */
footer {visibility: hidden;}
/* ๋ ˆ์ด์•„์›ƒ ์ปจํ…Œ์ด๋„ˆ */
#status_area {
background: rgba(255, 255, 255, 0.9);
padding: 15px;
border-bottom: 1px solid #ddd;
margin-bottom: 20px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
#results_area {
padding: 10px;
margin-top: 10px;
}
/* ํƒญ ์Šคํƒ€์ผ */
.tabs {
border-bottom: 2px solid #ddd !important;
margin-bottom: 20px !important;
}
.tab-nav {
border-bottom: none !important;
margin-bottom: 0 !important;
}
.tab-nav button {
font-weight: bold !important;
padding: 10px 20px !important;
}
.tab-nav button.selected {
border-bottom: 2px solid #1f77b4 !important;
color: #1f77b4 !important;
}
/* ์ƒํƒœ ๋ฉ”์‹œ์ง€ */
#status_area .markdown-text {
font-size: 1.1em;
color: #2c3e50;
padding: 10px 0;
}
/* ๊ธฐ๋ณธ ์ปจํ…Œ์ด๋„ˆ */
.group {
border: 1px solid #eee;
padding: 15px;
margin-bottom: 15px;
border-radius: 5px;
background: white;
}
/* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
.primary-btn {
background: #1f77b4 !important;
border: none !important;
}
/* ์ž…๋ ฅ ํ•„๋“œ */
.textbox {
border: 1px solid #ddd !important;
border-radius: 4px !important;
}
/* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ์ปจํ…Œ์ด๋„ˆ */
.progress-container {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 6px;
background: #e0e0e0;
z-index: 1000;
}
/* ํ”„๋กœ๊ทธ๋ ˆ์Šคbar */
.progress-bar {
height: 100%;
background: linear-gradient(90deg, #2196F3, #00BCD4);
box-shadow: 0 0 10px rgba(33, 150, 243, 0.5);
transition: width 0.3s ease;
animation: progress-glow 1.5s ease-in-out infinite;
}
/* ํ”„๋กœ๊ทธ๋ ˆ์Šค ํ…์ŠคํŠธ */
.progress-text {
position: fixed;
top: 8px;
left: 50%;
transform: translateX(-50%);
background: #333;
color: white;
padding: 4px 12px;
border-radius: 15px;
font-size: 14px;
z-index: 1001;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
}
/* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ์• ๋‹ˆ๋ฉ”์ด์…˜ */
@keyframes progress-glow {
0% {
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
}
50% {
box-shadow: 0 0 20px rgba(33, 150, 243, 0.8);
}
100% {
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
}
}
/* ๋ฐ˜์‘ํ˜• ๋””์ž์ธ */
@media (max-width: 768px) {
.group {
padding: 10px;
margin-bottom: 15px;
}
.progress-text {
font-size: 12px;
padding: 3px 10px;
}
}
/* ๋กœ๋”ฉ ์ƒํƒœ ํ‘œ์‹œ ๊ฐœ์„  */
.loading {
opacity: 0.7;
pointer-events: none;
transition: opacity 0.3s ease;
}
/* ๊ฒฐ๊ณผ ์ปจํ…Œ์ด๋„ˆ ์• ๋‹ˆ๋ฉ”์ด์…˜ */
.group {
transition: all 0.3s ease;
opacity: 0;
transform: translateY(20px);
}
.group.visible {
opacity: 1;
transform: translateY(0);
}
/* Examples ์Šคํƒ€์ผ๋ง */
.examples-table {
margin-top: 10px !important;
margin-bottom: 20px !important;
}
.examples-table button {
background-color: #f0f0f0 !important;
border: 1px solid #ddd !important;
border-radius: 4px !important;
padding: 5px 10px !important;
margin: 2px !important;
transition: all 0.3s ease !important;
}
.examples-table button:hover {
background-color: #e0e0e0 !important;
transform: translateY(-1px) !important;
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
}
.examples-table .label {
font-weight: bold !important;
color: #444 !important;
margin-bottom: 5px !important;
}
"""
import gradio as gr
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
init_db()
with gr.Tabs():
# ์ฒซ ๋ฒˆ์งธ ํƒญ
with gr.Tab("Earnbot"):
gr.Markdown("## EarnBot: ๊ธ€๋กœ๋ฒŒ ๋น…ํ…Œํฌ ๊ธฐ์—… ๋ฐ ํˆฌ์ž ์ „๋ง AI ์ž๋™ ๋ถ„์„")
gr.Markdown(" * '์ „์ฒด ๋ถ„์„ ๋ณด๊ณ  ์š”์•ฝ' ํด๋ฆญ ์‹œ ์ „์ฒด ์ž๋™ ๋ณด๊ณ  ์ƒ์„ฑ.\n * ์•„๋ž˜ ๊ฐœ๋ณ„ ์ข…๋ชฉ์˜ '๊ฒ€์ƒ‰(DB ์ž๋™ ์ €์žฅ)'๊ณผ '์ถœ๋ ฅ(DB ์ž๋™ ํ˜ธ์ถœ)'๋„ ๊ฐ€๋Šฅ.\n * ์ถ”๊ฐ€๋กœ, ์›ํ•˜๋Š” ์ž„์˜ ํ‚ค์›Œ๋“œ ๋ฐ ๊ตญ๊ฐ€๋กœ ๊ฒ€์ƒ‰/๋ถ„์„ํ•  ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค.")
# (์‚ฌ์šฉ์ž ์ž„์˜ ๊ฒ€์ƒ‰ ์„น์…˜)
with gr.Group():
gr.Markdown("### ์‚ฌ์šฉ์ž ์ž„์˜ ๊ฒ€์ƒ‰")
with gr.Row():
with gr.Column():
user_input = gr.Textbox(
label="๊ฒ€์ƒ‰์–ด ์ž…๋ ฅ",
placeholder="์˜ˆ) Apple, Samsung ๋“ฑ ์ž์œ ๋กญ๊ฒŒ"
)
with gr.Column():
country_selection = gr.Dropdown(
choices=list(COUNTRY_LOCATIONS.keys()),
value="United States",
label="๊ตญ๊ฐ€ ์„ ํƒ"
)
with gr.Column():
custom_search_btn = gr.Button("์‹คํ–‰", variant="primary")
custom_search_output = gr.Markdown()
custom_search_btn.click(
fn=search_custom,
inputs=[user_input, country_selection],
outputs=custom_search_output
)
# ์ „์ฒด ๋ถ„์„ ๋ณด๊ณ  ์š”์•ฝ ๋ฒ„ํŠผ
with gr.Row():
full_report_btn = gr.Button("์ „์ฒด ๋ถ„์„ ๋ณด๊ณ  ์š”์•ฝ", variant="primary")
full_report_display = gr.Markdown()
full_report_btn.click(
fn=full_summary_report,
outputs=full_report_display
)
# ์ง€์ •๋œ ๋ฆฌ์ŠคํŠธ (KOREAN_COMPANIES) ๊ฐœ๋ณ„ ๊ธฐ์—… ๊ฒ€์ƒ‰/์ถœ๋ ฅ
with gr.Column():
for i in range(0, len(KOREAN_COMPANIES), 2):
with gr.Row():
# ์™ผ์ชฝ ์—ด
with gr.Column():
company = KOREAN_COMPANIES[i]
with gr.Group():
gr.Markdown(f"### {company}")
with gr.Row():
search_btn = gr.Button("๊ฒ€์ƒ‰", variant="primary")
load_btn = gr.Button("์ถœ๋ ฅ", variant="secondary")
result_display = gr.Markdown()
search_btn.click(
fn=lambda c=company: search_company(c),
outputs=result_display
)
load_btn.click(
fn=lambda c=company: load_company(c),
outputs=result_display
)
# ์˜ค๋ฅธ์ชฝ ์—ด
if i + 1 < len(KOREAN_COMPANIES):
with gr.Column():
company = KOREAN_COMPANIES[i + 1]
with gr.Group():
gr.Markdown(f"### {company}")
with gr.Row():
search_btn = gr.Button("๊ฒ€์ƒ‰", variant="primary")
load_btn = gr.Button("์ถœ๋ ฅ", variant="secondary")
result_display = gr.Markdown()
search_btn.click(
fn=lambda c=company: search_company(c),
outputs=result_display
)
load_btn.click(
fn=lambda c=company: load_company(c),
outputs=result_display
)
iface.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
ssl_verify=False,
show_error=True
)