MoneyRadar / app-backup.py
seawolf2357's picture
Update app-backup.py
76d493c verified
raw
history blame
7.79 kB
import gradio as gr
import requests
import json
import os
from datetime import datetime, timedelta
from bs4 import BeautifulSoup # μ›Ή νŽ˜μ΄μ§€μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜κΈ° μœ„ν•΄ μ‚¬μš©
from huggingface_hub import InferenceClient # LLM μ‚¬μš©μ„ μœ„ν•΄ ν•„μš”
# ν•„μš”ν•œ νŒ¨ν‚€μ§€ μ„€μΉ˜ (ν•„μš”ν•œ 경우 주석을 μ œκ±°ν•˜κ³  μ‹€ν–‰)
# !pip install bs4 huggingface_hub
# ν™˜κ²½ λ³€μˆ˜μ—μ„œ API ν‚€ κ°€μ Έμ˜€κΈ° (API ν‚€λŠ” μ•ˆμ „ν•˜κ²Œ κ΄€λ¦¬λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€)
API_KEY = os.getenv("SERPHOUSE_API_KEY") # 본인의 SerpHouse API ν‚€λ₯Ό ν™˜κ²½ λ³€μˆ˜λ‘œ μ„€μ •ν•˜μ„Έμš”.
HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API 토큰을 ν™˜κ²½ λ³€μˆ˜λ‘œ μ„€μ •ν•˜μ„Έμš”.
MAJOR_COUNTRIES = [
"United States", "United Kingdom", "Canada", "Australia", "Germany",
"France", "Japan", "South Korea", "China", "India",
"Brazil", "Mexico", "Russia", "Italy", "Spain",
"Netherlands", "Sweden", "Switzerland", "Norway", "Denmark",
"Finland", "Belgium", "Austria", "New Zealand", "Ireland",
"Singapore", "Hong Kong", "Israel", "United Arab Emirates", "Saudi Arabia",
"South Africa", "Turkey", "Egypt", "Poland", "Czech Republic",
"Hungary", "Greece", "Portugal", "Argentina", "Chile",
"Colombia", "Peru", "Venezuela", "Thailand", "Malaysia",
"Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh"
]
def search_serphouse(query, country, page=1, num_result=10):
url = "https://api.serphouse.com/serp/live"
now = datetime.utcnow()
yesterday = now - timedelta(days=1)
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
payload = {
"data": {
"q": query,
"domain": "google.com",
"loc": country,
"lang": "en",
"device": "desktop",
"serp_type": "news",
"page": str(page),
"verbatim": "1",
"num": str(num_result),
"date_range": date_range
}
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {API_KEY}"
}
try:
response = requests.post(url, json=payload, headers=headers)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
error_msg = f"Error: {str(e)}"
if response.text:
error_msg += f"\nResponse content: {response.text}"
return {"error": error_msg}
def format_results_from_raw(results):
try:
if isinstance(results, dict) and "error" in results:
return "Error: " + results["error"], []
if not isinstance(results, dict):
raise ValueError("κ²°κ³Όκ°€ 사전 ν˜•μ‹μ΄ μ•„λ‹™λ‹ˆλ‹€.")
# 'results' ν‚€ λ‚΄λΆ€μ˜ ꡬ쑰 확인 (μ€‘μ²©λœ 'results' 처리)
if 'results' in results:
results_content = results['results']
if 'results' in results_content:
results_content = results_content['results']
# 'news' ν‚€ 확인
if 'news' in results_content:
news_results = results_content['news']
else:
news_results = []
else:
news_results = []
else:
news_results = []
if not news_results:
return "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.", []
articles = []
for idx, result in enumerate(news_results, 1):
title = result.get("title", "제λͺ© μ—†μŒ")
link = result.get("url", result.get("link", "#"))
snippet = result.get("snippet", "λ‚΄μš© μ—†μŒ")
channel = result.get("channel", result.get("source", "μ•Œ 수 μ—†μŒ"))
time = result.get("time", result.get("date", "μ•Œ 수 μ—†λŠ” μ‹œκ°„"))
image_url = result.get("img", result.get("thumbnail", ""))
articles.append({
"title": title,
"link": link,
"snippet": snippet,
"channel": channel,
"time": time,
"image_url": image_url
})
return "", articles
except Exception as e:
error_message = f"κ²°κ³Ό 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
return "Error: " + error_message, []
def serphouse_search(query, country):
# νŽ˜μ΄μ§€μ™€ κ²°κ³Ό 수의 기본값을 μ„€μ •ν•©λ‹ˆλ‹€.
page = 1
num_result = 10
results = search_serphouse(query, country, page, num_result)
error_message, articles = format_results_from_raw(results)
return error_message, articles
# LLM μ„€μ •
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
def summarize_article(url):
try:
# μ›Ή νŽ˜μ΄μ§€μ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# λͺ¨λ“  ν…μŠ€νŠΈλ₯Ό μΆ”μΆœ (κ°„λ‹¨ν•œ μ˜ˆμ‹œ)
text = ' '.join([p.get_text() for p in soup.find_all('p')])
if not text.strip():
return "기사 λ‚΄μš©μ„ κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€."
# μš”μ•½ 생성
prompt = f"λ‹€μŒ μ˜μ–΄ 기사λ₯Ό ν•œκ΅­μ–΄λ‘œ 3λ¬Έμž₯으둜 μš”μ•½ν•˜μ„Έμš”:\n{text}"
summary = hf_client.text_generation(prompt, max_new_tokens=500)
return summary
except Exception as e:
return f"μš”μ•½ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
css = """
footer {
visibility: hidden;
}
"""
# Gradio μΈν„°νŽ˜μ΄μŠ€ ꡬ성
with gr.Blocks(css=css, title="NewsAI μ„œλΉ„μŠ€") as iface:
gr.Markdown("검색어λ₯Ό μž…λ ₯ν•˜κ³  μ›ν•˜λŠ” κ΅­κ°€λ₯Ό μ„ νƒν•˜λ©΄, 검색어와 μΌμΉ˜ν•˜λŠ” 24μ‹œκ°„ 이내 λ‰΄μŠ€λ₯Ό μ΅œλŒ€ 10개 좜λ ₯ν•©λ‹ˆλ‹€.")
with gr.Column():
with gr.Row():
query = gr.Textbox(label="검색어")
country = gr.Dropdown(MAJOR_COUNTRIES, label="κ΅­κ°€", value="South Korea")
search_button = gr.Button("검색")
article_outputs = []
def search_and_display(query, country):
error_message, articles = serphouse_search(query, country)
if error_message:
return gr.update(visible=True, value=error_message)
else:
# κΈ°μ‘΄ 좜λ ₯λ¬Ό 제거
for components in article_outputs:
for component in components:
component.visible = False
article_outputs.clear()
# 각 기사에 λŒ€ν•΄ 좜λ ₯ 생성
for article in articles:
with gr.Column():
title = gr.Markdown(f"### [{article['title']}]({article['link']})")
image = gr.Image(value=article['image_url'], visible=bool(article['image_url']), shape=(200, 150))
snippet = gr.Markdown(f"**μš”μ•½:** {article['snippet']}")
info = gr.Markdown(f"**좜처:** {article['channel']} | **μ‹œκ°„:** {article['time']}")
analyze_button = gr.Button("뢄석")
summary_output = gr.Markdown(visible=False)
def analyze_article(url):
summary = summarize_article(url)
summary_output.update(value=summary, visible=True)
analyze_button.click(analyze_article, inputs=gr.State(article['link']), outputs=summary_output)
article_outputs.append([title, image, snippet, info, analyze_button, summary_output])
return gr.update()
search_button.click(
search_and_display,
inputs=[query, country],
outputs=[]
)
iface.launch(auth=("gini", "pick"))