MoneyRadar / app.py
seawolf2357's picture
Update app.py
1be9f59 verified
raw
history blame
9.94 kB
import gradio as gr
import requests
import json
import os
from datetime import datetime, timedelta
from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
# ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
# ๊ตญ๊ฐ€ ์ด๋ฆ„๊ณผ Google ๊ฒ€์ƒ‰์—์„œ ์‚ฌ์šฉํ•˜๋Š” ๊ตญ๊ฐ€ ์ฝ”๋“œ๋ฅผ ๋งคํ•‘
COUNTRY_CODE_MAPPING = {
"United States": "us",
"United Kingdom": "uk",
"Canada": "ca",
"Australia": "au",
"Germany": "de",
"France": "fr",
"Japan": "jp",
"South Korea": "kr",
"China": "cn",
"India": "in",
"Brazil": "br",
"Mexico": "mx",
"Russia": "ru",
"Italy": "it",
"Spain": "es",
"Netherlands": "nl",
"Sweden": "se",
"Switzerland": "ch",
"Norway": "no",
"Denmark": "dk",
"Finland": "fi",
"Belgium": "be",
"Austria": "at",
"New Zealand": "nz",
"Ireland": "ie",
"Singapore": "sg",
"Hong Kong": "hk",
"Israel": "il",
"United Arab Emirates": "ae",
"Saudi Arabia": "sa",
"South Africa": "za",
"Turkey": "tr",
"Egypt": "eg",
"Poland": "pl",
"Czech Republic": "cz",
"Hungary": "hu",
"Greece": "gr",
"Portugal": "pt",
"Argentina": "ar",
"Chile": "cl",
"Colombia": "co",
"Peru": "pe",
"Venezuela": "ve",
"Thailand": "th",
"Malaysia": "my",
"Indonesia": "id",
"Philippines": "ph",
"Vietnam": "vn",
"Pakistan": "pk",
"Bangladesh": "bd"
}
MAJOR_COUNTRIES = [
"United States", "United Kingdom", "Canada", "Australia", "Germany",
"France", "Japan", "South Korea", "China", "India",
"Brazil", "Mexico", "Russia", "Italy", "Spain",
"Netherlands", "Sweden", "Switzerland", "Norway", "Denmark",
"Finland", "Belgium", "Austria", "New Zealand", "Ireland",
"Singapore", "Hong Kong", "Israel", "United Arab Emirates", "Saudi Arabia",
"South Africa", "Turkey", "Egypt", "Poland", "Czech Republic",
"Hungary", "Greece", "Portugal", "Argentina", "Chile",
"Colombia", "Peru", "Venezuela", "Thailand", "Malaysia",
"Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh"
]
def search_serphouse(query, country, page=1, num_result=100):
url = "https://api.serphouse.com/serp/live"
now = datetime.utcnow()
yesterday = now - timedelta(days=1)
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
payload = {
"data": {
"q": query,
"domain": "google.com",
"loc": COUNTRY_CODE_MAPPING.get(country, "us"), # ์—ฌ๊ธฐ๊ฐ€ ๋ฌธ์ œ์˜ ์›์ธ์ž…๋‹ˆ๋‹ค
"lang": "en",
"device": "desktop",
"serp_type": "news",
"page": str(page),
"verbatim": "1",
"num": str(num_result),
"date_range": date_range
}
}
def format_results_from_raw(results):
try:
if isinstance(results, dict) and "error" in results:
return "Error: " + results["error"], []
if not isinstance(results, dict):
raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
# 'results' ํ‚ค ๋‚ด๋ถ€์˜ ๊ตฌ์กฐ ํ™•์ธ (์ค‘์ฒฉ๋œ 'results' ์ฒ˜๋ฆฌ)
if 'results' in results:
results_content = results['results']
if 'results' in results_content:
results_content = results_content['results']
# 'news' ํ‚ค ํ™•์ธ
if 'news' in results_content:
news_results = results_content['news']
else:
news_results = []
else:
news_results = []
else:
news_results = []
if not news_results:
return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
articles = []
for idx, result in enumerate(news_results, 1):
title = result.get("title", "์ œ๋ชฉ ์—†์Œ")
link = result.get("url", result.get("link", "#"))
snippet = result.get("snippet", "๋‚ด์šฉ ์—†์Œ")
channel = result.get("channel", result.get("source", "์•Œ ์ˆ˜ ์—†์Œ"))
time = result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„"))
image_url = result.get("img", result.get("thumbnail", ""))
articles.append({
"index": idx,
"title": title,
"link": link,
"snippet": snippet,
"channel": channel,
"time": time,
"image_url": image_url
})
return "", articles
except Exception as e:
error_message = f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
return "Error: " + error_message, []
def serphouse_search(query, country):
# ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
page = 1
num_result = 100
results = search_serphouse(query, country, page, num_result)
error_message, articles = format_results_from_raw(results)
return error_message, articles
# LLM ์„ค์ •
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
def summarize_article(title, snippet):
try:
# ๊ธฐ์‚ฌ ์ œ๋ชฉ๊ณผ ์Šค๋‹ˆํŽซ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์š”์•ฝ ์ƒ์„ฑ
prompt = f"๋‹ค์Œ ๋‰ด์Šค ์ œ๋ชฉ๊ณผ ์š”์•ฝ์„ ๋ฐ”ํƒ•์œผ๋กœ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n์ œ๋ชฉ: {title}\n์š”์•ฝ: {snippet}"
summary = hf_client.text_generation(prompt, max_new_tokens=500)
return summary
except Exception as e:
return f"์š”์•ฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
css = """
footer {
visibility: hidden;
}
/* ๋ถ„์„ ๋ฒ„ํŠผ ์Šคํƒ€์ผ ๊ฐœ์„  */
.analyze-button {
background-color: #4CAF50; /* Green */
border: none;
color: white;
padding: 6px 12px;
text-align: center;
text-decoration: none;
font-size: 14px;
margin: 2px;
cursor: pointer;
border-radius: 4px;
}
.analyze-button:hover {
background-color: #45a049;
}
"""
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
with gr.Blocks(css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
with gr.Column():
with gr.Row():
query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
search_button = gr.Button("๊ฒ€์ƒ‰")
output_table = gr.HTML()
summary_output = gr.Markdown(visible=False)
def search_and_display(query, country):
error_message, articles = serphouse_search(query, country)
if error_message:
return f"<p>{error_message}</p>", gr.update(visible=False)
else:
# ๊ธฐ์‚ฌ ๋ชฉ๋ก์„ HTML ํ…Œ์ด๋ธ”๋กœ ์ƒ์„ฑ
table_html = """
<table border='1' style='width:100%; text-align:left;'>
<tr>
<th>๋ฒˆํ˜ธ</th>
<th>์ œ๋ชฉ</th>
<th>์ถœ์ฒ˜</th>
<th>์‹œ๊ฐ„</th>
<th>๋ถ„์„</th>
</tr>
"""
for article in articles:
# ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ๋ฒ„ํŠผ์— ํ•ด๋‹นํ•˜๋Š” JavaScript ์ฝ”๋“œ๋ฅผ ์‚ฝ์ž…
analyze_button = f"""<button class="analyze-button" onclick="analyzeArticle('{article['index']}')">๋ถ„์„</button>"""
row = f"""
<tr>
<td>{article['index']}</td>
<td><a href="{article['link']}" target="_blank">{article['title']}</a></td>
<td>{article['channel']}</td>
<td>{article['time']}</td>
<td>{analyze_button}</td>
</tr>
"""
table_html += row
table_html += "</table>"
# JavaScript ํ•จ์ˆ˜ ์ •์˜
js_code = """
<script>
function analyzeArticle(index) {
// Gradio์˜ handleFunction์„ ์‚ฌ์šฉํ•˜์—ฌ Python ํ•จ์ˆ˜ ํ˜ธ์ถœ
const articleData = JSON.parse(document.getElementById('articles_data').textContent);
const selectedArticle = articleData.find(article => article.index == index);
if (selectedArticle) {
gradioApp().querySelector('#article_title textarea').value = selectedArticle.title;
gradioApp().querySelector('#article_snippet textarea').value = selectedArticle.snippet;
gradioApp().querySelector('#analyze_button').click();
}
}
</script>
"""
# ๊ธฐ์‚ฌ ๋ฐ์ดํ„ฐ๋ฅผ JSON์œผ๋กœ ์ €์žฅํ•˜์—ฌ JavaScript์—์„œ ์ ‘๊ทผ ๊ฐ€๋Šฅํ•˜๋„๋ก ํ•จ
articles_json = json.dumps(articles)
full_html = f"""
<div id="articles_data" style="display:none;">{articles_json}</div>
{table_html}
{js_code}
"""
return full_html, gr.update(visible=True, value="") # summary_output ์ดˆ๊ธฐํ™”
def analyze_article(title, snippet):
summary = summarize_article(title, snippet)
return summary
article_title = gr.Textbox(visible=False, elem_id="article_title")
article_snippet = gr.Textbox(visible=False, elem_id="article_snippet")
analyze_button = gr.Button("๋ถ„์„", visible=False, elem_id="analyze_button")
search_button.click(
search_and_display,
inputs=[query, country],
outputs=[output_table, summary_output]
)
analyze_button.click(
analyze_article,
inputs=[article_title, article_snippet],
outputs=[summary_output]
)
iface.launch(auth=("gini", "pick"))