Spaces:
Building
Building
import gradio as gr | |
import requests | |
import json | |
import os | |
from datetime import datetime, timedelta | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from functools import lru_cache | |
from requests.adapters import HTTPAdapter | |
from requests.packages.urllib3.util.retry import Retry | |
from openai import OpenAI | |
from bs4 import BeautifulSoup | |
import re | |
import pathlib | |
import sqlite3 | |
import pytz | |
# ํ๊ตญ ๊ธฐ์ ๋ฆฌ์คํธ | |
KOREAN_COMPANIES = [ | |
"NVIDIA", | |
"ALPHABET", | |
"APPLE", | |
"TESLA", | |
"AMAZON", | |
"MICROSOFT", | |
"META", | |
"INTEL", | |
"SAMSUNG", | |
"HYNIX", | |
"BITCOIN", | |
"crypto", | |
"stock", | |
"Economics", | |
"Finance", | |
"investing" | |
] | |
def convert_to_seoul_time(timestamp_str): | |
try: | |
dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S') | |
seoul_tz = pytz.timezone('Asia/Seoul') | |
seoul_time = seoul_tz.localize(dt) | |
return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST') | |
except Exception as e: | |
print(f"์๊ฐ ๋ณํ ์ค๋ฅ: {str(e)}") | |
return timestamp_str | |
def analyze_sentiment_batch(articles, client): | |
""" | |
OpenAI API๋ฅผ ํตํด ๋ด์ค ๊ธฐ์ฌ๋ค์ ์ข ํฉ ๊ฐ์ฑ ๋ถ์์ ์ํ | |
""" | |
try: | |
# ๋ชจ๋ ๊ธฐ์ฌ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ํ๋์ ํ ์คํธ๋ก ๊ฒฐํฉ | |
combined_text = "\n\n".join([ | |
f"์ ๋ชฉ: {article.get('title', '')}\n๋ด์ฉ: {article.get('snippet', '')}" | |
for article in articles | |
]) | |
prompt = f"""๋ค์ ๋ด์ค ๋ชจ์์ ๋ํด ์ ๋ฐ์ ์ธ ๊ฐ์ฑ ๋ถ์์ ์ํํ์ธ์: | |
๋ด์ค ๋ด์ฉ: | |
{combined_text} | |
๋ค์ ํ์์ผ๋ก ๋ถ์ํด์ฃผ์ธ์: | |
1. ์ ๋ฐ์ ๊ฐ์ฑ: [๊ธ์ /๋ถ์ /์ค๋ฆฝ] | |
2. ์ฃผ์ ๊ธ์ ์ ์์: | |
- [ํญ๋ชฉ1] | |
- [ํญ๋ชฉ2] | |
3. ์ฃผ์ ๋ถ์ ์ ์์: | |
- [ํญ๋ชฉ1] | |
- [ํญ๋ชฉ2] | |
4. ์ข ํฉ ํ๊ฐ: [์์ธ ์ค๋ช ] | |
""" | |
response = client.chat.completions.create( | |
model="CohereForAI/c4ai-command-r-plus-08-2024", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.3, | |
max_tokens=1000 | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
return f"๊ฐ์ฑ ๋ถ์ ์คํจ: {str(e)}" | |
# DB ์ด๊ธฐํ ํจ์ | |
def init_db(): | |
db_path = pathlib.Path("search_results.db") | |
conn = sqlite3.connect(db_path) | |
c = conn.cursor() | |
c.execute('''CREATE TABLE IF NOT EXISTS searches | |
(id INTEGER PRIMARY KEY AUTOINCREMENT, | |
keyword TEXT, | |
country TEXT, | |
results TEXT, | |
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''') | |
conn.commit() | |
conn.close() | |
def save_to_db(keyword, country, results): | |
""" | |
ํน์ (keyword, country) ์กฐํฉ์ ๋ํ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ DB์ ์ ์ฅ | |
""" | |
conn = sqlite3.connect("search_results.db") | |
c = conn.cursor() | |
seoul_tz = pytz.timezone('Asia/Seoul') | |
now = datetime.now(seoul_tz) | |
timestamp = now.strftime('%Y-%m-%d %H:%M:%S') | |
c.execute("""INSERT INTO searches | |
(keyword, country, results, timestamp) | |
VALUES (?, ?, ?, ?)""", | |
(keyword, country, json.dumps(results), timestamp)) | |
conn.commit() | |
conn.close() | |
def load_from_db(keyword, country): | |
""" | |
ํน์ (keyword, country) ์กฐํฉ์ ๋ํ ๊ฐ์ฅ ์ต๊ทผ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ DB์์ ๋ถ๋ฌ์ค๊ธฐ | |
""" | |
conn = sqlite3.connect("search_results.db") | |
c = conn.cursor() | |
c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1", | |
(keyword, country)) | |
result = c.fetchone() | |
conn.close() | |
if result: | |
return json.loads(result[0]), convert_to_seoul_time(result[1]) | |
return None, None | |
def display_results(articles): | |
""" | |
๋ด์ค ๊ธฐ์ฌ ๋ชฉ๋ก์ Markdown ๋ฌธ์์ด๋ก ๋ณํํ์ฌ ๋ฐํ | |
""" | |
output = "" | |
for idx, article in enumerate(articles, 1): | |
output += f"### {idx}. {article['title']}\n" | |
output += f"์ถ์ฒ: {article['channel']}\n" | |
output += f"์๊ฐ: {article['time']}\n" | |
output += f"๋งํฌ: {article['link']}\n" | |
output += f"์์ฝ: {article['snippet']}\n\n" | |
return output | |
def search_company(company): | |
""" | |
๋จ์ผ ๊ธฐ์ (๋๋ ํค์๋)์ ๋ํด ๋ฏธ๊ตญ ๋ด์ค ๊ฒ์, DB ์ ์ฅ ํ ๊ฒฐ๊ณผ Markdown ๋ฐํ | |
""" | |
error_message, articles = serphouse_search(company, "United States") | |
if not error_message and articles: | |
save_to_db(company, "United States", articles) | |
return display_results(articles) | |
return f"{company}์ ๋ํ ๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค." | |
def load_company(company): | |
""" | |
DB์์ ๋จ์ผ ๊ธฐ์ (๋๋ ํค์๋)์ ๋ฏธ๊ตญ ๋ด์ค ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ถ๋ฌ์ Markdown ๋ฐํ | |
""" | |
results, timestamp = load_from_db(company, "United States") | |
if results: | |
return f"### {company} ๊ฒ์ ๊ฒฐ๊ณผ\n์ ์ฅ ์๊ฐ: {timestamp}\n\n" + display_results(results) | |
return f"{company}์ ๋ํ ์ ์ฅ๋ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค." | |
def show_stats(): | |
""" | |
KOREAN_COMPANIES ๋ชฉ๋ก ๋ด ๋ชจ๋ ๊ธฐ์ ์ ๋ํด: | |
- ๊ฐ์ฅ ์ต๊ทผ DB ์ ์ฅ ์ผ์ | |
- ๊ธฐ์ฌ ์ | |
- ๊ฐ์ฑ ๋ถ์ ๊ฒฐ๊ณผ | |
๋ฅผ ์์ฐจ(๋๋ ๋ณ๋ ฌ)๋ก ์กฐํํ์ฌ ๋ณด๊ณ ์ ํํ๋ก ๋ฐํ | |
""" | |
conn = sqlite3.connect("search_results.db") | |
c = conn.cursor() | |
output = "## ํ๊ตญ ๊ธฐ์ ๋ด์ค ๋ถ์ ๋ฆฌํฌํธ\n\n" | |
# ๋ชจ๋ ๊ธฐ์ ์ ๋ํด DB์์ ์ฝ์ด์ฌ (company, timestamp, articles) ๋ชฉ๋ก ์์ง | |
data_list = [] | |
for company in KOREAN_COMPANIES: | |
c.execute(""" | |
SELECT results, timestamp | |
FROM searches | |
WHERE keyword = ? | |
ORDER BY timestamp DESC | |
LIMIT 1 | |
""", (company,)) | |
row = c.fetchone() | |
if row: | |
results_json, timestamp = row | |
articles = json.loads(results_json) | |
seoul_time = convert_to_seoul_time(timestamp) | |
data_list.append((company, seoul_time, articles)) | |
conn.close() | |
# (์ต์ ) ๊ฐ ๊ธฐ์ ๊ฐ์ฑ ๋ถ์์ ๋ณ๋ ฌ ์ฒ๋ฆฌ | |
def analyze_data(item): | |
comp, tstamp, arts = item | |
sentiment = "" | |
if arts: | |
sentiment = analyze_sentiment_batch(arts, client) | |
return (comp, tstamp, len(arts), sentiment) | |
# ThreadPoolExecutor๋ก ๋ณ๋ ฌ ๊ฐ์ฑ ๋ถ์ | |
results_list = [] | |
with ThreadPoolExecutor(max_workers=5) as executor: | |
futures = [executor.submit(analyze_data, dl) for dl in data_list] | |
for future in as_completed(futures): | |
results_list.append(future.result()) | |
# ๊ฒฐ๊ณผ ์ ๋ ฌ(์ํ๋ ์์๋๋ก) - ์ฌ๊ธฐ์๋ ๊ธฐ์ ๋ช ๊ธฐ์ค or ๊ทธ๋ฅ ์์ ์์ | |
for comp, tstamp, count, sentiment in results_list: | |
output += f"### {comp}\n" | |
output += f"- ๋ง์ง๋ง ์ ๋ฐ์ดํธ: {tstamp}\n" | |
output += f"- ์ ์ฅ๋ ๊ธฐ์ฌ ์: {count}๊ฑด\n\n" | |
if sentiment: | |
output += "#### ๋ด์ค ๊ฐ์ฑ ๋ถ์\n" | |
output += f"{sentiment}\n\n" | |
output += "---\n\n" | |
return output | |
### (1) ์ ์ฒด ๊ฒ์: ๋ฉํฐ์ค๋ ๋ ์ ์ฉ | |
def search_all_companies(): | |
""" | |
KOREAN_COMPANIES ๋ฆฌ์คํธ ๋ด ๋ชจ๋ ๊ธฐ์ ์ ๋ํด, | |
๊ฒ์์ ๋ณ๋ ฌ(์ฐ๋ ๋)๋ก ์ํ ํ ๊ฒฐ๊ณผ๋ฅผ ํฉ์ณ Markdown ํํ๋ก ๋ฐํ | |
""" | |
overall_result = "# [์ ์ฒด ๊ฒ์ ๊ฒฐ๊ณผ]\n\n" | |
def do_search(comp): | |
return comp, search_company(comp) | |
with ThreadPoolExecutor(max_workers=5) as executor: | |
futures = [executor.submit(do_search, c) for c in KOREAN_COMPANIES] | |
for future in as_completed(futures): | |
comp, res_text = future.result() | |
overall_result += f"## {comp}\n" | |
overall_result += res_text + "\n\n" | |
return overall_result | |
def load_all_companies(): | |
""" | |
KOREAN_COMPANIES ๋ฆฌ์คํธ ๋ด ๋ชจ๋ ๊ธฐ์ ์ ๋ํด, | |
DB์์ ๋ถ๋ฌ์จ ๊ฒฐ๊ณผ๋ฅผ ์์ฐจ(๋๋ ๋ณ๋ ฌ)๋ก ํฉ์ณ์ Markdown ํํ๋ก ๋ฐํ | |
""" | |
overall_result = "# [์ ์ฒด ์ถ๋ ฅ ๊ฒฐ๊ณผ]\n\n" | |
for comp in KOREAN_COMPANIES: | |
overall_result += f"## {comp}\n" | |
overall_result += load_company(comp) | |
overall_result += "\n" | |
return overall_result | |
def full_summary_report(): | |
""" | |
(1) ๋ชจ๋ ๊ธฐ์ ๊ฒ์ -> (2) DB์์ ๋ชจ๋ ๊ธฐ์ ๋ถ๋ฌ์ค๊ธฐ -> (3) ๊ฐ์ฑ ๋ถ์ ํต๊ณ | |
์์๋๋ก ์คํํ์ฌ, ์ ์ฒด ๋ฆฌํฌํธ๋ฅผ ํฉ์ณ ๋ฐํ | |
""" | |
# 1) ์ ์ฒด ๊ฒ์(๋ณ๋ ฌ) | |
search_result_text = search_all_companies() | |
# 2) ์ ์ฒด ์ถ๋ ฅ(์์ฐจ) | |
load_result_text = load_all_companies() | |
# 3) ์ ์ฒด ํต๊ณ(๊ฐ์ฑ ๋ถ์) | |
stats_text = show_stats() | |
combined_report = ( | |
"# ์ ์ฒด ๋ถ์ ๋ณด๊ณ ์์ฝ\n\n" | |
"์๋ ์์๋ก ์คํ๋์์ต๋๋ค:\n" | |
"1. ๋ชจ๋ ์ข ๋ชฉ ๊ฒ์(๋ณ๋ ฌ) โ 2. ๋ชจ๋ ์ข ๋ชฉ DB ๊ฒฐ๊ณผ ์ถ๋ ฅ โ 3. ์ ์ฒด ๊ฐ์ฑ ๋ถ์ ํต๊ณ\n\n" | |
f"{search_result_text}\n\n" | |
f"{load_result_text}\n\n" | |
"## [์ ์ฒด ๊ฐ์ฑ ๋ถ์ ํต๊ณ]\n\n" | |
f"{stats_text}" | |
) | |
return combined_report | |
### (2) ์ฌ์ฉ์ ์์ ๊ฒ์ + ๊ตญ๊ฐ ์ ํ ๊ธฐ๋ฅ | |
def search_custom(query, country): | |
""" | |
์ฌ์ฉ์๊ฐ ์ ๋ ฅํ (query, country)๋ฅผ ๋์์ผ๋ก | |
- ๊ฒ์ (API ์์ฒญ) | |
- DB ์ ์ฅ | |
- DB ๋ก๋ ํ ๊ฐ์ฑ ๋ถ์ | |
- ์ต์ข ๊ฒฐ๊ณผ๋ฅผ Markdown ํํ๋ก ๋ฐํ | |
""" | |
# 1) ๊ฒ์ | |
error_message, articles = serphouse_search(query, country) | |
if error_message: | |
return f"์ค๋ฅ ๋ฐ์: {error_message}" | |
if not articles: | |
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค." | |
# 2) DB ์ ์ฅ | |
save_to_db(query, country, articles) | |
# 3) DB์์ ๋ค์ ๋ถ๋ฌ์ค๊ธฐ | |
results, timestamp = load_from_db(query, country) | |
if not results: | |
return f"DB ๋ก๋ ์คํจ: ์ ์ฅ๋ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค." | |
# 4) ๊ฐ์ฑ ๋ถ์ | |
sentiment_analysis = analyze_sentiment_batch(results, client) | |
# 5) ์ต์ข ๋ฆฌํฌํธ(๊ธฐ์ฌ ๋ชฉ๋ก + ๊ฐ์ฑ ๋ถ์) | |
output = f"## [์ฌ์ฉ์ ์์ ๊ฒ์ ๊ฒฐ๊ณผ]\n\n" | |
output += f"**ํค์๋**: {query}\n\n" | |
output += f"**๊ตญ๊ฐ**: {country}\n\n" | |
output += f"**์ ์ฅ ์๊ฐ**: {timestamp}\n\n" | |
output += display_results(results) | |
output += "### ๋ด์ค ๊ฐ์ฑ ๋ถ์\n" | |
output += f"{sentiment_analysis}\n" | |
return output | |
### (ํ์) API ์ธ์ฆ | |
ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
if not ACCESS_TOKEN: | |
raise ValueError("HF_TOKEN environment variable is not set") | |
client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1/", | |
api_key=ACCESS_TOKEN, | |
) | |
API_KEY = os.getenv("SERPHOUSE_API_KEY") | |
### ๊ตญ๊ฐ๋ณ ์ค์ | |
COUNTRY_LANGUAGES = { | |
"United States": "en", | |
"KOREA": "ko", | |
"United Kingdom": "en", | |
"Taiwan": "zh-TW", | |
"Canada": "en", | |
"Australia": "en", | |
"Germany": "de", | |
"France": "fr", | |
"Japan": "ja", | |
"China": "zh", | |
"India": "hi", | |
"Brazil": "pt", | |
"Mexico": "es", | |
"Russia": "ru", | |
"Italy": "it", | |
"Spain": "es", | |
"Netherlands": "nl", | |
"Singapore": "en", | |
"Hong Kong": "zh-HK", | |
"Indonesia": "id", | |
"Malaysia": "ms", | |
"Philippines": "tl", | |
"Thailand": "th", | |
"Vietnam": "vi", | |
"Belgium": "nl", | |
"Denmark": "da", | |
"Finland": "fi", | |
"Ireland": "en", | |
"Norway": "no", | |
"Poland": "pl", | |
"Sweden": "sv", | |
"Switzerland": "de", | |
"Austria": "de", | |
"Czech Republic": "cs", | |
"Greece": "el", | |
"Hungary": "hu", | |
"Portugal": "pt", | |
"Romania": "ro", | |
"Turkey": "tr", | |
"Israel": "he", | |
"Saudi Arabia": "ar", | |
"United Arab Emirates": "ar", | |
"South Africa": "en", | |
"Argentina": "es", | |
"Chile": "es", | |
"Colombia": "es", | |
"Peru": "es", | |
"Venezuela": "es", | |
"New Zealand": "en", | |
"Bangladesh": "bn", | |
"Pakistan": "ur", | |
"Egypt": "ar", | |
"Morocco": "ar", | |
"Nigeria": "en", | |
"Kenya": "sw", | |
"Ukraine": "uk", | |
"Croatia": "hr", | |
"Slovakia": "sk", | |
"Bulgaria": "bg", | |
"Serbia": "sr", | |
"Estonia": "et", | |
"Latvia": "lv", | |
"Lithuania": "lt", | |
"Slovenia": "sl", | |
"Luxembourg": "Luxembourg", | |
"Malta": "Malta", | |
"Cyprus": "Cyprus", | |
"Iceland": "Iceland" | |
} | |
COUNTRY_LOCATIONS = { | |
"United States": "United States", | |
"KOREA": "kr", | |
"United Kingdom": "United Kingdom", | |
"Taiwan": "Taiwan", | |
"Canada": "Canada", | |
"Australia": "Australia", | |
"Germany": "Germany", | |
"France": "France", | |
"Japan": "Japan", | |
"China": "China", | |
"India": "India", | |
"Brazil": "Brazil", | |
"Mexico": "Mexico", | |
"Russia": "Russia", | |
"Italy": "Italy", | |
"Spain": "Spain", | |
"Netherlands": "Netherlands", | |
"Singapore": "Singapore", | |
"Hong Kong": "Hong Kong", | |
"Indonesia": "Indonesia", | |
"Malaysia": "Malaysia", | |
"Philippines": "Philippines", | |
"Thailand": "Thailand", | |
"Vietnam": "Vietnam", | |
"Belgium": "Belgium", | |
"Denmark": "Denmark", | |
"Finland": "Finland", | |
"Ireland": "Ireland", | |
"Norway": "Norway", | |
"Poland": "Poland", | |
"Sweden": "Sweden", | |
"Switzerland": "Switzerland", | |
"Austria": "Austria", | |
"Czech Republic": "Czech Republic", | |
"Greece": "Greece", | |
"Hungary": "Hungary", | |
"Portugal": "Portugal", | |
"Romania": "Romania", | |
"Turkey": "Turkey", | |
"Israel": "Israel", | |
"Saudi Arabia": "Saudi Arabia", | |
"United Arab Emirates": "United Arab Emirates", | |
"South Africa": "South Africa", | |
"Argentina": "Argentina", | |
"Chile": "Chile", | |
"Colombia": "Colombia", | |
"Peru": "Peru", | |
"Venezuela": "Venezuela", | |
"New Zealand": "New Zealand", | |
"Bangladesh": "Bangladesh", | |
"Pakistan": "Pakistan", | |
"Egypt": "Egypt", | |
"Morocco": "Morocco", | |
"Nigeria": "Nigeria", | |
"Kenya": "Kenya", | |
"Ukraine": "Ukraine", | |
"Croatia": "Croatia", | |
"Slovakia": "Slovakia", | |
"Bulgaria": "Bulgaria", | |
"Serbia": "Serbia", | |
"Estonia": "et", | |
"Latvia": "lv", | |
"Lithuania": "lt", | |
"Slovenia": "sl", | |
"Luxembourg": "Luxembourg", | |
"Malta": "Malta", | |
"Cyprus": "Cyprus", | |
"Iceland": "Iceland" | |
} | |
def translate_query(query, country): | |
""" | |
Google Translation API(๋น๊ณต์) ์ฌ์ฉํ์ฌ ๊ฒ์์ด๋ฅผ ํด๋น ๊ตญ๊ฐ ์ธ์ด๋ก ๋ฒ์ญ | |
""" | |
try: | |
if is_english(query): | |
return query | |
if country in COUNTRY_LANGUAGES: | |
if country == "South Korea": | |
return query | |
target_lang = COUNTRY_LANGUAGES[country] | |
url = "https://translate.googleapis.com/translate_a/single" | |
params = { | |
"client": "gtx", | |
"sl": "auto", | |
"tl": target_lang, | |
"dt": "t", | |
"q": query | |
} | |
session = requests.Session() | |
retries = Retry(total=3, backoff_factor=0.5) | |
session.mount('https://', HTTPAdapter(max_retries=retries)) | |
response = session.get(url, params=params, timeout=(5, 10)) | |
translated_text = response.json()[0][0][0] | |
return translated_text | |
return query | |
except Exception as e: | |
print(f"๋ฒ์ญ ์ค๋ฅ: {str(e)}") | |
return query | |
def is_english(text): | |
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', '')) | |
def search_serphouse(query, country, page=1, num_result=10): | |
""" | |
SerpHouse API์ ์ค์๊ฐ ๊ฒ์ ์์ฒญ์ ๋ณด๋ด์ด, | |
'๋ด์ค' ํญ (sort_by=date)์์ ํด๋น query์ ๋ํ ๊ธฐ์ฌ ๋ชฉ๋ก์ ๊ฐ์ ธ์จ๋ค. | |
""" | |
url = "https://api.serphouse.com/serp/live" | |
now = datetime.utcnow() | |
yesterday = now - timedelta(days=1) | |
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" | |
translated_query = translate_query(query, country) | |
payload = { | |
"data": { | |
"q": translated_query, | |
"domain": "google.com", | |
"loc": COUNTRY_LOCATIONS.get(country, "United States"), | |
"lang": COUNTRY_LANGUAGES.get(country, "en"), | |
"device": "desktop", | |
"serp_type": "news", | |
"page": str(page), | |
"num": "100", | |
"date_range": date_range, | |
"sort_by": "date" | |
} | |
} | |
headers = { | |
"accept": "application/json", | |
"content-type": "application/json", | |
"authorization": f"Bearer {API_KEY}" | |
} | |
try: | |
session = requests.Session() | |
retries = Retry( | |
total=5, | |
backoff_factor=1, | |
status_forcelist=[500, 502, 503, 504, 429], | |
allowed_methods=["POST"] | |
) | |
adapter = HTTPAdapter(max_retries=retries) | |
session.mount('http://', adapter) | |
session.mount('https://', adapter) | |
response = session.post( | |
url, | |
json=payload, | |
headers=headers, | |
timeout=(30, 30) | |
) | |
response.raise_for_status() | |
return {"results": response.json(), "translated_query": translated_query} | |
except requests.exceptions.Timeout: | |
return { | |
"error": "๊ฒ์ ์๊ฐ์ด ์ด๊ณผ๋์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.", | |
"translated_query": query | |
} | |
except requests.exceptions.RequestException as e: | |
return { | |
"error": f"๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", | |
"translated_query": query | |
} | |
except Exception as e: | |
return { | |
"error": f"์๊ธฐ์น ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", | |
"translated_query": query | |
} | |
def format_results_from_raw(response_data): | |
""" | |
SerpHouse API์ ์๋ต ๋ฐ์ดํฐ๋ฅผ ๊ฐ๊ณตํ์ฌ, | |
(์๋ฌ๋ฉ์์ง, ๊ธฐ์ฌ๋ฆฌ์คํธ) ํํ๋ก ๋ฐํ. | |
""" | |
if "error" in response_data: | |
return "Error: " + response_data["error"], [] | |
try: | |
results = response_data["results"] | |
translated_query = response_data["translated_query"] | |
# ์ค์ ๋ด์ค ๊ฒฐ๊ณผ | |
news_results = results.get('results', {}).get('results', {}).get('news', []) | |
if not news_results: | |
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", [] | |
# ํ๊ตญ ๋๋ฉ์ธ ๋ฐ ํ๊ตญ ๊ด๋ จ ํค์๋ ํฌํจ ๊ธฐ์ฌ ์ ์ธ | |
korean_domains = [ | |
'.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun', | |
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald' | |
] | |
korean_keywords = [ | |
'korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu', | |
'gwangju', 'daejeon', 'ulsan', 'sejong' | |
] | |
filtered_articles = [] | |
for idx, result in enumerate(news_results, 1): | |
url = result.get("url", result.get("link", "")).lower() | |
title = result.get("title", "").lower() | |
channel = result.get("channel", result.get("source", "")).lower() | |
is_korean_content = ( | |
any(domain in url or domain in channel for domain in korean_domains) or | |
any(keyword in title for keyword in korean_keywords) | |
) | |
# ํ๊ตญ์ด ๋ด์ค(๋๋ ํ๊ตญ ๋๋ฉ์ธ) ์ ์ธ | |
if not is_korean_content: | |
filtered_articles.append({ | |
"index": idx, | |
"title": result.get("title", "์ ๋ชฉ ์์"), | |
"link": url, | |
"snippet": result.get("snippet", "๋ด์ฉ ์์"), | |
"channel": result.get("channel", result.get("source", "์ ์ ์์")), | |
"time": result.get("time", result.get("date", "์ ์ ์๋ ์๊ฐ")), | |
"image_url": result.get("img", result.get("thumbnail", "")), | |
"translated_query": translated_query | |
}) | |
return "", filtered_articles | |
except Exception as e: | |
return f"๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", [] | |
def serphouse_search(query, country): | |
""" | |
๊ฒ์ ๋ฐ ๊ฒฐ๊ณผ ํฌ๋งคํ ๊น์ง ์ผ๊ด ์ฒ๋ฆฌ | |
""" | |
response_data = search_serphouse(query, country) | |
return format_results_from_raw(response_data) | |
# CSS (UI ์ปค์คํฐ๋ง์ด์ง) | |
css = """ | |
/* ์ ์ญ ์คํ์ผ */ | |
footer {visibility: hidden;} | |
/* ๋ ์ด์์ ์ปจํ ์ด๋ */ | |
#status_area { | |
background: rgba(255, 255, 255, 0.9); | |
padding: 15px; | |
border-bottom: 1px solid #ddd; | |
margin-bottom: 20px; | |
box-shadow: 0 2px 5px rgba(0,0,0,0.1); | |
} | |
#results_area { | |
padding: 10px; | |
margin-top: 10px; | |
} | |
/* ํญ ์คํ์ผ */ | |
.tabs { | |
border-bottom: 2px solid #ddd !important; | |
margin-bottom: 20px !important; | |
} | |
.tab-nav { | |
border-bottom: none !important; | |
margin-bottom: 0 !important; | |
} | |
.tab-nav button { | |
font-weight: bold !important; | |
padding: 10px 20px !important; | |
} | |
.tab-nav button.selected { | |
border-bottom: 2px solid #1f77b4 !important; | |
color: #1f77b4 !important; | |
} | |
/* ์ํ ๋ฉ์์ง */ | |
#status_area .markdown-text { | |
font-size: 1.1em; | |
color: #2c3e50; | |
padding: 10px 0; | |
} | |
/* ๊ธฐ๋ณธ ์ปจํ ์ด๋ */ | |
.group { | |
border: 1px solid #eee; | |
padding: 15px; | |
margin-bottom: 15px; | |
border-radius: 5px; | |
background: white; | |
} | |
/* ๋ฒํผ ์คํ์ผ */ | |
.primary-btn { | |
background: #1f77b4 !important; | |
border: none !important; | |
} | |
/* ์ ๋ ฅ ํ๋ */ | |
.textbox { | |
border: 1px solid #ddd !important; | |
border-radius: 4px !important; | |
} | |
/* ํ๋ก๊ทธ๋ ์ค๋ฐ ์ปจํ ์ด๋ */ | |
.progress-container { | |
position: fixed; | |
top: 0; | |
left: 0; | |
width: 100%; | |
height: 6px; | |
background: #e0e0e0; | |
z-index: 1000; | |
} | |
/* ํ๋ก๊ทธ๋ ์คbar */ | |
.progress-bar { | |
height: 100%; | |
background: linear-gradient(90deg, #2196F3, #00BCD4); | |
box-shadow: 0 0 10px rgba(33, 150, 243, 0.5); | |
transition: width 0.3s ease; | |
animation: progress-glow 1.5s ease-in-out infinite; | |
} | |
/* ํ๋ก๊ทธ๋ ์ค ํ ์คํธ */ | |
.progress-text { | |
position: fixed; | |
top: 8px; | |
left: 50%; | |
transform: translateX(-50%); | |
background: #333; | |
color: white; | |
padding: 4px 12px; | |
border-radius: 15px; | |
font-size: 14px; | |
z-index: 1001; | |
box-shadow: 0 2px 5px rgba(0,0,0,0.2); | |
} | |
/* ํ๋ก๊ทธ๋ ์ค๋ฐ ์ ๋๋ฉ์ด์ */ | |
@keyframes progress-glow { | |
0% { | |
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); | |
} | |
50% { | |
box-shadow: 0 0 20px rgba(33, 150, 243, 0.8); | |
} | |
100% { | |
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); | |
} | |
} | |
/* ๋ฐ์ํ ๋์์ธ */ | |
@media (max-width: 768px) { | |
.group { | |
padding: 10px; | |
margin-bottom: 15px; | |
} | |
.progress-text { | |
font-size: 12px; | |
padding: 3px 10px; | |
} | |
} | |
/* ๋ก๋ฉ ์ํ ํ์ ๊ฐ์ */ | |
.loading { | |
opacity: 0.7; | |
pointer-events: none; | |
transition: opacity 0.3s ease; | |
} | |
/* ๊ฒฐ๊ณผ ์ปจํ ์ด๋ ์ ๋๋ฉ์ด์ */ | |
.group { | |
transition: all 0.3s ease; | |
opacity: 0; | |
transform: translateY(20px); | |
} | |
.group.visible { | |
opacity: 1; | |
transform: translateY(0); | |
} | |
/* Examples ์คํ์ผ๋ง */ | |
.examples-table { | |
margin-top: 10px !important; | |
margin-bottom: 20px !important; | |
} | |
.examples-table button { | |
background-color: #f0f0f0 !important; | |
border: 1px solid #ddd !important; | |
border-radius: 4px !important; | |
padding: 5px 10px !important; | |
margin: 2px !important; | |
transition: all 0.3s ease !important; | |
} | |
.examples-table button:hover { | |
background-color: #e0e0e0 !important; | |
transform: translateY(-1px) !important; | |
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important; | |
} | |
.examples-table .label { | |
font-weight: bold !important; | |
color: #444 !important; | |
margin-bottom: 5px !important; | |
} | |
""" | |
import gradio as gr | |
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์๋น์ค") as iface: | |
init_db() | |
with gr.Tabs(): | |
# ์ฒซ ๋ฒ์งธ ํญ | |
with gr.Tab("Earnbot"): | |
gr.Markdown("## EarnBot: ๊ธ๋ก๋ฒ ๋น ํ ํฌ ๊ธฐ์ ๋ฐ ํฌ์ ์ ๋ง AI ์๋ ๋ถ์") | |
gr.Markdown(" * '์ ์ฒด ๋ถ์ ๋ณด๊ณ ์์ฝ' ํด๋ฆญ ์ ์ ์ฒด ์๋ ๋ณด๊ณ ์์ฑ.\n * ์๋ ๊ฐ๋ณ ์ข ๋ชฉ์ '๊ฒ์(DB ์๋ ์ ์ฅ)'๊ณผ '์ถ๋ ฅ(DB ์๋ ํธ์ถ)'๋ ๊ฐ๋ฅ.\n * ์ถ๊ฐ๋ก, ์ํ๋ ์์ ํค์๋ ๋ฐ ๊ตญ๊ฐ๋ก ๊ฒ์/๋ถ์ํ ์๋ ์์ต๋๋ค.") | |
# (2) ์ฌ์ฉ์ ์์ ๊ฒ์ ์น์ | |
with gr.Group(): | |
gr.Markdown("### ์ฌ์ฉ์ ์์ ๊ฒ์") | |
with gr.Row(): | |
with gr.Column(): | |
user_input = gr.Textbox( | |
label="๊ฒ์์ด ์ ๋ ฅ", | |
placeholder="์) Apple, Samsung ๋ฑ ์์ ๋กญ๊ฒ" | |
) | |
with gr.Column(): | |
country_selection = gr.Dropdown( | |
choices=list(COUNTRY_LOCATIONS.keys()), | |
value="United States", | |
label="๊ตญ๊ฐ ์ ํ" | |
) | |
with gr.Column(): | |
custom_search_btn = gr.Button("์คํ", variant="primary") | |
custom_search_output = gr.Markdown() | |
custom_search_btn.click( | |
fn=search_custom, | |
inputs=[user_input, country_selection], | |
outputs=custom_search_output | |
) | |
# ์ ์ฒด ๋ถ์ ๋ณด๊ณ ์์ฝ ๋ฒํผ | |
with gr.Row(): | |
full_report_btn = gr.Button("์ ์ฒด ๋ถ์ ๋ณด๊ณ ์์ฝ", variant="primary") | |
full_report_display = gr.Markdown() | |
full_report_btn.click( | |
fn=full_summary_report, | |
outputs=full_report_display | |
) | |
# ๊ธฐ์กด ๊ฐ๋ณ ๊ธฐ์ ๊ฒ์/์ถ๋ ฅ ์์ญ | |
with gr.Column(): | |
for i in range(0, len(KOREAN_COMPANIES), 2): | |
with gr.Row(): | |
# ์ผ์ชฝ ์ด | |
with gr.Column(): | |
company = KOREAN_COMPANIES[i] | |
with gr.Group(): | |
gr.Markdown(f"### {company}") | |
with gr.Row(): | |
search_btn = gr.Button("๊ฒ์", variant="primary") | |
load_btn = gr.Button("์ถ๋ ฅ", variant="secondary") | |
result_display = gr.Markdown() | |
search_btn.click( | |
fn=lambda c=company: search_company(c), | |
outputs=result_display | |
) | |
load_btn.click( | |
fn=lambda c=company: load_company(c), | |
outputs=result_display | |
) | |
# ์ค๋ฅธ์ชฝ ์ด | |
if i + 1 < len(KOREAN_COMPANIES): | |
with gr.Column(): | |
company = KOREAN_COMPANIES[i + 1] | |
with gr.Group(): | |
gr.Markdown(f"### {company}") | |
with gr.Row(): | |
search_btn = gr.Button("๊ฒ์", variant="primary") | |
load_btn = gr.Button("์ถ๋ ฅ", variant="secondary") | |
result_display = gr.Markdown() | |
search_btn.click( | |
fn=lambda c=company: search_company(c), | |
outputs=result_display | |
) | |
load_btn.click( | |
fn=lambda c=company: load_company(c), | |
outputs=result_display | |
) | |
iface.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=True, | |
ssl_verify=False, | |
show_error=True | |
) | |