MoneyRadar

Building

App Files Files Community

ginipick commited on Jan 22

Commit

c6beee2

verified ·

1 Parent(s): a1e836a

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -28

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import requests
 import json
 import os
 from datetime import datetime, timedelta
-from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
@@ -45,6 +45,9 @@ def convert_to_seoul_time(timestamp_str):
         return timestamp_str
 def analyze_sentiment_batch(articles, client):
     try:
         # 모든 기사의 제목과 내용을 하나의 텍스트로 결합
         combined_text = "\n\n".join([
@@ -52,7 +55,6 @@ def analyze_sentiment_batch(articles, client):
             for article in articles
         ])
-        # f""" ... """ 형태로 여러 줄 문자열을 정확히 사용
         prompt = f"""다음 뉴스 모음에 대해 전반적인 감성 분석을 수행하세요:
 뉴스 내용:
@@ -96,6 +98,9 @@ def init_db():
     conn.close()
 def save_to_db(keyword, country, results):
     conn = sqlite3.connect("search_results.db")
     c = conn.cursor()
     seoul_tz = pytz.timezone('Asia/Seoul')
@@ -110,6 +115,9 @@ def save_to_db(keyword, country, results):
     conn.close()
 def load_from_db(keyword, country):
     conn = sqlite3.connect("search_results.db")
     c = conn.cursor()
     c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
@@ -121,6 +129,9 @@ def load_from_db(keyword, country):
     return None, None
 def display_results(articles):
     output = ""
     for idx, article in enumerate(articles, 1):
         output += f"### {idx}. {article['title']}\n"
@@ -131,6 +142,9 @@ def display_results(articles):
     return output
 def search_company(company):
     error_message, articles = serphouse_search(company, "United States")
     if not error_message and articles:
         save_to_db(company, "United States", articles)
@@ -138,17 +152,29 @@ def search_company(company):
     return f"{company}에 대한 검색 결과가 없습니다."
 def load_company(company):
     results, timestamp = load_from_db(company, "United States")
     if results:
         return f"### {company} 검색 결과\n저장 시간: {timestamp}\n\n" + display_results(results)
     return f"{company}에 대한 저장된 결과가 없습니다."
 def show_stats():
     conn = sqlite3.connect("search_results.db")
     c = conn.cursor()
     output = "## 한국 기업 뉴스 분석 리포트\n\n"
     for company in KOREAN_COMPANIES:
         c.execute("""
             SELECT results, timestamp
@@ -158,36 +184,70 @@ def show_stats():
             LIMIT 1
         """, (company,))
-        result = c.fetchone()
-        if result:
-            results_json, timestamp = result
             articles = json.loads(results_json)
             seoul_time = convert_to_seoul_time(timestamp)
-            output += f"### {company}\n"
-            output += f"- 마지막 업데이트: {seoul_time}\n"
-            output += f"- 저장된 기사 수: {len(articles)}건\n\n"
-            if articles:
-                sentiment_analysis = analyze_sentiment_batch(articles, client)
-                output += "#### 뉴스 감성 분석\n"
-                output += f"{sentiment_analysis}\n\n"
-            output += "---\n\n"
     conn.close()
     return output
 def search_all_companies():
     overall_result = "# [전체 검색 결과]\n\n"
-    for comp in KOREAN_COMPANIES:
-        overall_result += f"## {comp}\n"
-        overall_result += search_company(comp)
-        overall_result += "\n"
     return overall_result
 def load_all_companies():
     overall_result = "# [전체 출력 결과]\n\n"
     for comp in KOREAN_COMPANIES:
         overall_result += f"## {comp}\n"
         overall_result += load_company(comp)
@@ -195,17 +255,23 @@ def load_all_companies():
     return overall_result
 def full_summary_report():
-    # 1) 전체 검색
     search_result_text = search_all_companies()
-    # 2) 전체 출력
     load_result_text = load_all_companies()
-    # 3) 전체 통계
     stats_text = show_stats()
     combined_report = (
         "# 전체 분석 보고 요약\n\n"
         "아래 순서로 실행되었습니다:\n"
-        "1. 모든 종목 검색 → 2. 모든 종목 DB 결과 출력 → 3. 전체 감성 분석 통계\n\n"
         f"{search_result_text}\n\n"
         f"{load_result_text}\n\n"
         "## [전체 감성 분석 통계]\n\n"
@@ -213,6 +279,47 @@ def full_summary_report():
     )
     return combined_report
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 if not ACCESS_TOKEN:
     raise ValueError("HF_TOKEN environment variable is not set")
@@ -224,6 +331,8 @@ client = OpenAI(
 API_KEY = os.getenv("SERPHOUSE_API_KEY")
 COUNTRY_LANGUAGES = {
     "United States": "en",
     "KOREA": "ko",
@@ -366,8 +475,12 @@ COUNTRY_LOCATIONS = {
     "Iceland": "Iceland"
 }
 @lru_cache(maxsize=100)
 def translate_query(query, country):
     try:
         if is_english(query):
             return query
@@ -404,6 +517,10 @@ def is_english(text):
     return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
 def search_serphouse(query, country, page=1, num_result=10):
     url = "https://api.serphouse.com/serp/live"
     now = datetime.utcnow()
@@ -420,7 +537,7 @@ def search_serphouse(query, country, page=1, num_result=10):
             "lang": COUNTRY_LANGUAGES.get(country, "en"),
             "device": "desktop",
             "serp_type": "news",
-            "page": "1",
             "num": "100",
             "date_range": date_range,
             "sort_by": "date"
@@ -474,6 +591,10 @@ def search_serphouse(query, country, page=1, num_result=10):
         }
 def format_results_from_raw(response_data):
     if "error" in response_data:
         return "Error: " + response_data["error"], []
@@ -481,10 +602,12 @@ def format_results_from_raw(response_data):
         results = response_data["results"]
         translated_query = response_data["translated_query"]
         news_results = results.get('results', {}).get('results', {}).get('news', [])
         if not news_results:
             return "검색 결과가 없습니다.", []
         korean_domains = [
             '.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
             'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
@@ -502,9 +625,10 @@ def format_results_from_raw(response_data):
             is_korean_content = (
                 any(domain in url or domain in channel for domain in korean_domains) or
-                any(keyword in title.lower() for keyword in korean_keywords)
             )
             if not is_korean_content:
                 filtered_articles.append({
                     "index": idx,
@@ -522,9 +646,14 @@ def format_results_from_raw(response_data):
         return f"결과 처리 중 오류 발생: {str(e)}", []
 def serphouse_search(query, country):
     response_data = search_serphouse(query, country)
     return format_results_from_raw(response_data)
 css = """
 /* 전역 스타일 */
 footer {visibility: hidden;}
@@ -700,15 +829,44 @@ footer {visibility: hidden;}
 }
 """
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface:
     init_db()
     with gr.Tabs():
-        # 첫 번째 탭 (DB 검색)
         with gr.Tab("Earnbot"):
             gr.Markdown("## EarnBot: 글로벌 빅테크 기업 및 투자 전망 AI 자동 분석")
-            gr.Markdown(" '전체 분석 보고 요약' 클릭시 전체 자동 보고 생성, 개별은 '검색(DB 자동 저장)'과 '출력(DB자동 호출)' 사용")
             with gr.Row():
                 full_report_btn = gr.Button("전체 분석 보고 요약", variant="primary")
                 full_report_display = gr.Markdown()
@@ -718,6 +876,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비
                 outputs=full_report_display
             )
             with gr.Column():
                 for i in range(0, len(KOREAN_COMPANIES), 2):
                     with gr.Row():

 import json
 import os
 from datetime import datetime, timedelta
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from functools import lru_cache
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
         return timestamp_str
 def analyze_sentiment_batch(articles, client):
+    """
+    OpenAI API를 통해 뉴스 기사들의 종합 감성 분석을 수행
+    """
     try:
         # 모든 기사의 제목과 내용을 하나의 텍스트로 결합
         combined_text = "\n\n".join([
             for article in articles
         ])
         prompt = f"""다음 뉴스 모음에 대해 전반적인 감성 분석을 수행하세요:
 뉴스 내용:
     conn.close()
 def save_to_db(keyword, country, results):
+    """
+    특정 (keyword, country) 조합에 대한 검색 결과를 DB에 저장
+    """
     conn = sqlite3.connect("search_results.db")
     c = conn.cursor()
     seoul_tz = pytz.timezone('Asia/Seoul')
     conn.close()
 def load_from_db(keyword, country):
+    """
+    특정 (keyword, country) 조합에 대한 가장 최근 검색 결과를 DB에서 불러오기
+    """
     conn = sqlite3.connect("search_results.db")
     c = conn.cursor()
     c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
     return None, None
 def display_results(articles):
+    """
+    뉴스 기사 목록을 Markdown 문자열로 변환하여 반환
+    """
     output = ""
     for idx, article in enumerate(articles, 1):
         output += f"### {idx}. {article['title']}\n"
     return output
 def search_company(company):
+    """
+    단일 기업(또는 키워드)에 대해 미국 뉴스 검색, DB 저장 후 결과 Markdown 반환
+    """
     error_message, articles = serphouse_search(company, "United States")
     if not error_message and articles:
         save_to_db(company, "United States", articles)
     return f"{company}에 대한 검색 결과가 없습니다."
 def load_company(company):
+    """
+    DB에서 단일 기업(또는 키워드)의 미국 뉴스 검색 결과를 불러와 Markdown 반환
+    """
     results, timestamp = load_from_db(company, "United States")
     if results:
         return f"### {company} 검색 결과\n저장 시간: {timestamp}\n\n" + display_results(results)
     return f"{company}에 대한 저장된 결과가 없습니다."
 def show_stats():
+    """
+    KOREAN_COMPANIES 목록 내 모든 기업에 대해:
+      - 가장 최근 DB 저장 일자
+      - 기사 수
+      - 감성 분석 결과
+    를 순차(또는 병렬)로 조회하여 보고서 형태로 반환
+    """
     conn = sqlite3.connect("search_results.db")
     c = conn.cursor()
     output = "## 한국 기업 뉴스 분석 리포트\n\n"
+    # 모든 기업에 대해 DB에서 읽어올 (company, timestamp, articles) 목록 수집
+    data_list = []
     for company in KOREAN_COMPANIES:
         c.execute("""
             SELECT results, timestamp
             LIMIT 1
         """, (company,))
+        row = c.fetchone()
+        if row:
+            results_json, timestamp = row
             articles = json.loads(results_json)
             seoul_time = convert_to_seoul_time(timestamp)
+            data_list.append((company, seoul_time, articles))
     conn.close()
+    # (옵션) 각 기업 감성 분석을 병렬 처리
+    def analyze_data(item):
+        comp, tstamp, arts = item
+        sentiment = ""
+        if arts:
+            sentiment = analyze_sentiment_batch(arts, client)
+        return (comp, tstamp, len(arts), sentiment)
+    # ThreadPoolExecutor로 병렬 감성 분석
+    results_list = []
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        futures = [executor.submit(analyze_data, dl) for dl in data_list]
+        for future in as_completed(futures):
+            results_list.append(future.result())
+    # 결과 정렬(원하는 순서대로) - 여기서는 기업명 기준 or 그냥 순서 없음
+    for comp, tstamp, count, sentiment in results_list:
+        output += f"### {comp}\n"
+        output += f"- 마지막 업데이트: {tstamp}\n"
+        output += f"- 저장된 기사 수: {count}건\n\n"
+        if sentiment:
+            output += "#### 뉴스 감성 분석\n"
+            output += f"{sentiment}\n\n"
+        output += "---\n\n"
     return output
+### (1) 전체 검색: 멀티스레드 적용
 def search_all_companies():
+    """
+    KOREAN_COMPANIES 리스트 내 모든 기업에 대해,
+    검색을 병렬(쓰레드)로 수행 후 결과를 합쳐 Markdown 형태로 반환
+    """
     overall_result = "# [전체 검색 결과]\n\n"
+    def do_search(comp):
+        return comp, search_company(comp)
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        futures = [executor.submit(do_search, c) for c in KOREAN_COMPANIES]
+        for future in as_completed(futures):
+            comp, res_text = future.result()
+            overall_result += f"## {comp}\n"
+            overall_result += res_text + "\n\n"
     return overall_result
 def load_all_companies():
+    """
+    KOREAN_COMPANIES 리스트 내 모든 기업에 대해,
+    DB에서 불러온 결과를 순차(또는 병렬)로 합쳐서 Markdown 형태로 반환
+    """
     overall_result = "# [전체 출력 결과]\n\n"
     for comp in KOREAN_COMPANIES:
         overall_result += f"## {comp}\n"
         overall_result += load_company(comp)
     return overall_result
 def full_summary_report():
+    """
+    (1) 모든 기업 검색 -> (2) DB에서 모든 기업 불러오기 -> (3) 감성 분석 통계
+    순서대로 실행하여, 전체 리포트를 합쳐 반환
+    """
+    # 1) 전체 검색(병렬)
     search_result_text = search_all_companies()
+    # 2) 전체 출력(순차)
     load_result_text = load_all_companies()
+    # 3) 전체 통계(감성 분석)
     stats_text = show_stats()
     combined_report = (
         "# 전체 분석 보고 요약\n\n"
         "아래 순서로 실행되었습니다:\n"
+        "1. 모든 종목 검색(병렬) → 2. 모든 종목 DB 결과 출력 → 3. 전체 감성 분석 통계\n\n"
         f"{search_result_text}\n\n"
         f"{load_result_text}\n\n"
         "## [전체 감성 분석 통계]\n\n"
     )
     return combined_report
+### (2) 사용자 임의 검색 + 국가 선택 기능
+def search_custom(query, country):
+    """
+    사용자가 입력한 (query, country)를 대상으로
+    - 검색 (API 요청)
+    - DB 저장
+    - DB 로드 후 감성 분석
+    - 최종 결과를 Markdown 형태로 반환
+    """
+    # 1) 검색
+    error_message, articles = serphouse_search(query, country)
+    if error_message:
+        return f"오류 발생: {error_message}"
+    if not articles:
+        return "검색 결과가 없습니다."
+    # 2) DB 저장
+    save_to_db(query, country, articles)
+    # 3) DB에서 다시 불러오기
+    results, timestamp = load_from_db(query, country)
+    if not results:
+        return f"DB 로드 실패: 저장된 결과가 없습니다."
+    # 4) 감성 분석
+    sentiment_analysis = analyze_sentiment_batch(results, client)
+    # 5) 최종 리포트(기사 목록 + 감성 분석)
+    output = f"## [사용자 임의 검색 결과]\n\n"
+    output += f"**키워드**: {query}\n\n"
+    output += f"**국가**: {country}\n\n"
+    output += f"**저장 시간**: {timestamp}\n\n"
+    output += display_results(results)
+    output += "### 뉴스 감성 분석\n"
+    output += f"{sentiment_analysis}\n"
+    return output
+### (필수) API 인증
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 if not ACCESS_TOKEN:
     raise ValueError("HF_TOKEN environment variable is not set")
 API_KEY = os.getenv("SERPHOUSE_API_KEY")
+### 국가별 설정
 COUNTRY_LANGUAGES = {
     "United States": "en",
     "KOREA": "ko",
     "Iceland": "Iceland"
 }
 @lru_cache(maxsize=100)
 def translate_query(query, country):
+    """
+    Google Translation API(비공식) 사용하여 검색어를 해당 국가 언어로 번역
+    """
     try:
         if is_english(query):
             return query
     return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
 def search_serphouse(query, country, page=1, num_result=10):
+    """
+    SerpHouse API에 실시간 검색 요청을 보내어,
+    '뉴스' 탭 (sort_by=date)에서 해당 query에 대한 기사 목록을 가져온다.
+    """
     url = "https://api.serphouse.com/serp/live"
     now = datetime.utcnow()
             "lang": COUNTRY_LANGUAGES.get(country, "en"),
             "device": "desktop",
             "serp_type": "news",
+            "page": str(page),
             "num": "100",
             "date_range": date_range,
             "sort_by": "date"
         }
 def format_results_from_raw(response_data):
+    """
+    SerpHouse API의 응답 데이터를 가공하여,
+    (에러메시지, 기사리스트) 형태로 반환.
+    """
     if "error" in response_data:
         return "Error: " + response_data["error"], []
         results = response_data["results"]
         translated_query = response_data["translated_query"]
+        # 실제 뉴스 결과
         news_results = results.get('results', {}).get('results', {}).get('news', [])
         if not news_results:
             return "검색 결과가 없습니다.", []
+        # 한국 도메인 및 한국 관련 키워드 포함 기사 제외
         korean_domains = [
             '.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
             'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
             is_korean_content = (
                 any(domain in url or domain in channel for domain in korean_domains) or
+                any(keyword in title for keyword in korean_keywords)
             )
+            # 한국어 뉴스(또는 한국 도메인) 제외
             if not is_korean_content:
                 filtered_articles.append({
                     "index": idx,
         return f"결과 처리 중 오류 발생: {str(e)}", []
 def serphouse_search(query, country):
+    """
+    검색 및 결과 포매팅까지 일괄 처리
+    """
     response_data = search_serphouse(query, country)
     return format_results_from_raw(response_data)
+# CSS (UI 커스터마이징)
 css = """
 /* 전역 스타일 */
 footer {visibility: hidden;}
 }
 """
+import gradio as gr
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface:
     init_db()
     with gr.Tabs():
+        # 첫 번째 탭
         with gr.Tab("Earnbot"):
             gr.Markdown("## EarnBot: 글로벌 빅테크 기업 및 투자 전망 AI 자동 분석")
+            gr.Markdown(" * '전체 분석 보고 요약' 클릭 시 전체 자동 보고 생성.\n * 아래 개별 종목의 '검색(DB 자동 저장)'과 '출력(DB 자동 호출)'도 가능.\n * 추가로, 원하는 임의 키워드 및 국가로 검색/분석할 수도 있습니다.")
+            # (2) 사용자 임의 검색 섹션
+            with gr.Group():
+                gr.Markdown("### 사용자 임의 검색")
+                with gr.Row():
+                    with gr.Column():
+                        user_input = gr.Textbox(
+                            label="검색어 입력",
+                            placeholder="예) Apple, Samsung 등 자유롭게"
+                        )
+                    with gr.Column():
+                        country_selection = gr.Dropdown(
+                            choices=list(COUNTRY_LOCATIONS.keys()),
+                            value="United States",
+                            label="국가 선택"
+                        )
+                    with gr.Column():
+                        custom_search_btn = gr.Button("실행", variant="primary")
+                custom_search_output = gr.Markdown()
+                custom_search_btn.click(
+                    fn=search_custom,
+                    inputs=[user_input, country_selection],
+                    outputs=custom_search_output
+                )
+            # 전체 분석 보고 요약 버튼
             with gr.Row():
                 full_report_btn = gr.Button("전체 분석 보고 요약", variant="primary")
                 full_report_display = gr.Markdown()
                 outputs=full_report_display
             )
+            # 기존 개별 기업 검색/출력 영역
             with gr.Column():
                 for i in range(0, len(KOREAN_COMPANIES), 2):
                     with gr.Row():