Spaces:

aicodingfun
/

Crypto-Pulse

Sleeping

App Files Files Community

aicodingfun commited on Jul 7

Commit

b257d13

verified ·

1 Parent(s): 2d54fce

Create app.py

Browse files

Files changed (1) hide show

app.py +333 -0

app.py ADDED Viewed

	@@ -0,0 +1,333 @@

+import gradio as gr
+import pandas as pd
+import plotly.graph_objects as go
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+import numpy as np
+from tqdm.auto import tqdm
+import warnings
+import os
+from datetime import datetime, timedelta
+from scipy.stats import pearsonr
+import ast
+warnings.simplefilter(action='ignore', category=FutureWarning)
+DEVELOPER_NAME = "汪于捷、李哲弘、黃千宥、陳奕瑄、洪寓澤"
+NEWS_CSV_PATH = 'cryptonews.csv'
+BTC_CSV_PATH = 'BTC.csv'
+PROCESSED_DATA_PATH = 'processed_btc_sentiment_data.csv'
+SENTIMENT_PIPELINE = None
+def initialize_pipeline():
+    """載入情緒分析模型，只在需要時執行一次。"""
+    global SENTIMENT_PIPELINE
+    if SENTIMENT_PIPELINE is None:
+        try:
+            print("⏳ 正在載入情緒分析模型 (Hugging Face)...")
+            MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+            model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
+            SENTIMENT_PIPELINE = pipeline(
+                "sentiment-analysis", model=model, tokenizer=tokenizer, device=0
+            )
+            print("✅ 模型載入成功！")
+        except Exception as e:
+            print(f"❌ 載入模型時發生錯誤: {e}")
+            SENTIMENT_PIPELINE = None
+def safe_literal_eval(val):
+    """安全地解析字串，如果失敗則回傳空字典。"""
+    try:
+        return ast.literal_eval(val)
+    except (ValueError, SyntaxError):
+        return {}
+def preprocess_and_cache_data():
+    """
+    執行一次性的資料預處理，分析來源為新聞標題(title)與內文(text)的組合。
+    """
+    if not os.path.exists(NEWS_CSV_PATH) or not os.path.exists(BTC_CSV_PATH):
+        raise FileNotFoundError(f"請確認 '{NEWS_CSV_PATH}' 和 '{BTC_CSV_PATH}' 檔案存在。")
+    initialize_pipeline()
+    if SENTIMENT_PIPELINE is None:
+        raise RuntimeError("情緒分析模型未能成功初始化。")
+    print(f"⏳ 正在讀取原始資料: '{NEWS_CSV_PATH}'...")
+    news_df = pd.read_csv(NEWS_CSV_PATH)
+    news_df.dropna(subset=['title', 'text', 'sentiment'], inplace=True)
+    news_df['date'] = pd.to_datetime(news_df['date'], errors='coerce').dt.date
+    news_df.dropna(subset=['date'], inplace=True)
+    print("⏳ 正在合併新聞標題與內文...")
+    news_df['full_text'] = news_df['title'] + ". " + news_df['text']
+    print("⏳ 正在對新聞完整內容 (標題+內文) 進行模型情緒分析...")
+    texts_to_analyze = news_df['full_text'].tolist()
+    sentiments_model = SENTIMENT_PIPELINE(
+        texts_to_analyze,
+        batch_size=256,
+        truncation=True,
+        max_length=512
+    )
+    score_map_model = {'LABEL_2': 1, 'LABEL_1': 0, 'LABEL_0': -1}
+    news_df['model_sentiment_score'] = [score_map_model.get(s['label'], 0) for s in sentiments_model]
+    print("⏳ 正在解析預存的情緒欄位 (class, polarity, subjectivity)...")
+    sentiment_dicts = news_df['sentiment'].apply(safe_literal_eval)
+    class_score_map = {'positive': 1, 'neutral': 0, 'negative': -1}
+    news_df['class_sentiment_score'] = sentiment_dicts.apply(lambda x: class_score_map.get(x.get('class', 'neutral'), 0))
+    news_df['polarity'] = sentiment_dicts.apply(lambda x: x.get('polarity', 0.0))
+    news_df['subjectivity'] = sentiment_dicts.apply(lambda x: x.get('subjectivity', 0.0))
+    print("⏳ 正在計算每日平均情緒指標...")
+    daily_metrics = news_df.groupby('date').agg(
+        avg_model_sentiment=('model_sentiment_score', 'mean'),
+        avg_class_sentiment=('class_sentiment_score', 'mean'),
+        avg_polarity=('polarity', 'mean'),
+        avg_subjectivity=('subjectivity', 'mean')
+    ).reset_index()
+    print(f"⏳ 正在讀取比特幣價格資料: '{BTC_CSV_PATH}'...")
+    btc_df = pd.read_csv(BTC_CSV_PATH)
+    btc_df['date'] = pd.to_datetime(btc_df['date'], errors='coerce').dt.date
+    btc_df['price_change_pct'] = btc_df['close'].pct_change() * 100
+    print("⏳ 正在合併所有資料...")
+    daily_metrics['date'] = pd.to_datetime(daily_metrics['date'])
+    btc_df['date'] = pd.to_datetime(btc_df['date'])
+    merged_df = pd.merge(btc_df, daily_metrics, on='date', how='inner')
+    news_content_df = news_df.groupby('date').agg(
+        titles=('title', list),
+        texts=('text', list)
+    ).reset_index()
+    news_content_df['date'] = pd.to_datetime(news_content_df['date'])
+    final_df = pd.merge(merged_df, news_content_df, on='date', how='left')
+    print(f"✅ 資料預處理完成！正在將結果儲存至 '{PROCESSED_DATA_PATH}'...")
+    final_df.to_csv(PROCESSED_DATA_PATH, index=False)
+    return final_df
+def load_data():
+    """載入資料，若快取不存在則執行預處理。"""
+    if os.path.exists(PROCESSED_DATA_PATH):
+        print(f"✅ 發現已處理的資料快取，正在從 '{PROCESSED_DATA_PATH}' 載入...")
+        df = pd.read_csv(PROCESSED_DATA_PATH)
+        df['date'] = pd.to_datetime(df['date'])
+        df['titles'] = df['titles'].apply(ast.literal_eval)
+        df['texts'] = df['texts'].apply(ast.literal_eval)
+        return df
+    else:
+        print("⚠️ 未發現已處理的資料，將執行首次預處理...")
+        return preprocess_and_cache_data()
+df = load_data()
+# 確保資料按日期排序
+df.sort_values(by='date', inplace=True)
+df.set_index('date', inplace=True)
+def get_filtered_df(start_date, end_date):
+    """根據日期範圍篩選 DataFrame。"""
+    if start_date is None or end_date is None:
+        return pd.DataFrame()
+    return df[(df.index >= pd.to_datetime(start_date)) & (df.index <= pd.to_datetime(end_date))].copy()
+def plot_price_and_sentiment(filtered_df, sentiment_col, sentiment_name, color):
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=filtered_df.index, y=filtered_df['close'], name='BTC 收盤價', line=dict(color='deepskyblue'), yaxis='y1'))
+    fig.add_trace(go.Scatter(x=filtered_df.index, y=filtered_df[sentiment_col], name=sentiment_name, line=dict(color=color, dash='dash'), yaxis='y2'))
+    fig.update_layout(
+        # title=f'📈 比特幣價格 vs. {sentiment_name}趨勢',
+        xaxis_title='日期',
+        yaxis=dict(title='價格 (USD)', color='deepskyblue'),
+        yaxis2=dict(title='情緒分數', overlaying='y', side='right', color=color, range=[-1, 1]),
+        legend=dict(x=0.01, y=0.99, orientation='h'),
+        template='plotly_dark', paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0.2)'
+    )
+    return fig
+def plot_subjectivity_trend(filtered_df):
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=filtered_df.index, y=filtered_df['avg_subjectivity'], name='每日新聞主觀性', line=dict(color='lightgreen')))
+    fig.update_layout(
+        # title='🧐 每日新聞主觀性趨勢',
+        xaxis_title='日期',
+        yaxis=dict(title='主觀性分數 (0=客觀, 1=主觀)', color='lightgreen', range=[0, 1]),
+        template='plotly_dark', paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0.2)'
+    )
+    return fig
+def plot_correlation(filtered_df, sentiment_col, lag_days):
+    df_corr = filtered_df[[sentiment_col, 'price_change_pct']].copy()
+    df_corr['price_change_pct_lagged'] = df_corr['price_change_pct'].shift(-lag_days)
+    df_corr.dropna(inplace=True)
+    if df_corr.empty or len(df_corr) < 2:
+        correlation, p_value = 0, 1
+    else:
+        correlation, p_value = pearsonr(df_corr[sentiment_col], df_corr['price_change_pct_lagged'])
+    fig = go.Figure(data=go.Scatter(x=df_corr[sentiment_col], y=df_corr['price_change_pct_lagged'], mode='markers', marker=dict(color='mediumpurple', opacity=0.7)))
+    fig.update_layout(
+        title=f'🔗 情緒與 {lag_days} 天後價格變化的關聯性 (相關係數: {correlation:.3f})',
+        xaxis_title='每日平均情緒分數', yaxis_title=f'{lag_days} 天後價格變化 (%)',
+        template='plotly_dark', paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0.2)'
+    )
+    return fig, correlation, p_value
+def get_top_bottom_news(date_obj):
+    """
+    獲取指定日期的最正面與最負面新聞。
+    """
+    date_ts = pd.to_datetime(date_obj)
+    if date_ts not in df.index:
+        return "<ul><li>無此日期資料</li></ul>", "<ul><li>無此日期資料</li></ul>"
+    day_data = df.loc[date_ts]
+    titles, texts = day_data.get('titles', []), day_data.get('texts', [])
+    initialize_pipeline()
+    if SENTIMENT_PIPELINE is None or not isinstance(titles, list) or not isinstance(texts, list) or len(titles) != len(texts):
+        return "<ul><li>模型未載入或新聞資料格式錯誤</li></ul>", "<ul><li>模型未載入或新聞資料格式錯誤</li></ul>"
+    full_texts_for_day = [f"{title}. {text}" for title, text in zip(titles, texts)]
+    if not full_texts_for_day:
+        return "<ul><li>當日無新聞</li></ul>", "<ul><li>當日無新聞</li></ul>"
+    sentiments = SENTIMENT_PIPELINE(full_texts_for_day, batch_size=8, truncation=True, max_length=512)
+    score_map = {'LABEL_2': 1, 'LABEL_1': 0, 'LABEL_0': -1}
+    scored_titles = []
+    for i, sentiment in enumerate(sentiments):
+        directional_score = score_map.get(sentiment['label'], 0) * sentiment['score']
+        scored_titles.append((titles[i], directional_score))
+    positive_news = sorted([item for item in scored_titles if item[1] > 0], key=lambda x: x[1], reverse=True)
+    negative_news = sorted([item for item in scored_titles if item[1] < 0], key=lambda x: x[1], reverse=False)
+    if positive_news:
+        top_news_html = "".join([f"<li>{title}</li>" for title, score in positive_news[:3]])
+    else:
+        top_news_html = "<li>當日無正面情緒新聞</li>"
+    if negative_news:
+        bottom_news_html = "".join([f"<li>{title}</li>" for title, score in negative_news[:3]])
+    else:
+        bottom_news_html = "<li>當日無負面情緒新聞</li>"
+    return f"<ul>{top_news_html}</ul>", f"<ul>{bottom_news_html}</ul>"
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="orange"), css=".gradio-container {background: #0B0F19}") as app:
+    gr.Markdown(f"""<div style='text-align: center; padding: 20px; color: white;'><h1 style='font-size: 3em; color: #00BFFF;'>📈 Crypto Pulse</h1><p style='font-size: 1.2em; color: #A9A9A9;'>比特幣新聞情緒與價格分析儀表板</p><p style='font-size: 0.9em; color: #888;'>Designed by: {DEVELOPER_NAME}</p></div>""")
+    max_date_dt = df.index.max()
+    # 確保資料數足夠
+    if len(df) > 360:
+        min_date_dt = df.index[-360]
+    else:
+        min_date_dt = df.index.min()
+    with gr.Row():
+        start_date_input = gr.DateTime(label="📅 開始日期", type="datetime", value=min_date_dt)
+        end_date_input = gr.DateTime(label="📅 結束日期", type="datetime", value=max_date_dt)
+    with gr.Tabs() as tabs:
+        with gr.TabItem("📊 模型情緒總覽", id=0):
+            plot_overview = gr.Plot(label="模型情緒 vs. 價格趨勢圖")
+            gr.Markdown("此圖展示了由 `twitter-roberta-base-sentiment` 模型分析出的**新聞內容（標題+內文）**情緒分數（右軸）與比特幣價格（左軸）的對比。")
+        with gr.TabItem("🔬 多維度情緒分析", id=1):
+            gr.Markdown("""
+            ### 指標說明
+            此處的情緒指標來自資料集 `cryptonews.csv` 中預先計算好的 `sentiment` 欄位。
+            * **資料集預設情緒分類**: 將資料集內建的 `positive`, `neutral`, `negative` 類別轉換為 `1, 0, -1` 的數值分數。
+            * **情感極性 (Polarity)**: 衡量文本的正面或負面程度。值域從 -1 (非常負面) 到 +1 (非常正面)。
+            * **主觀性 (Subjectivity)**: 衡量文本是偏向客觀事實還是主觀意見。值域從 0 (非常客觀) 到 1 (非常主觀)。
+            """)
+            plot_class_sentiment = gr.Plot(label="資料集預設情緒 vs. 價格趨勢圖")
+            plot_polarity = gr.Plot(label="情感極性 vs. 價格趨勢圖")
+            plot_subjectivity = gr.Plot(label="新聞主觀性趨勢圖")
+        with gr.TabItem("🔍 關聯性深掘", id=2):
+            with gr.Row():
+                with gr.Column(scale=1, min_width=200):
+                    sentiment_type_radio = gr.Radio(
+                        ["模型情緒分數", "資料集預設情緒分類", "情感極性 (Polarity)"],
+                        label="選擇分析的情緒指標", value="模型情緒分數"
+                    )
+                    lag_slider = gr.Slider(minimum=0, maximum=14, value=1, step=1, label="🕒 情緒延遲天數 (Lag Days)")
+                    correlation_output = gr.Textbox(label="Pearson 相關係數", interactive=False)
+                    p_value_output = gr.Textbox(label="P-Value", interactive=False)
+                with gr.Column(scale=3):
+                    plot_corr = gr.Plot(label="情緒 vs. 價格變化 散點圖")
+        with gr.TabItem("📰 新聞瀏覽器", id=3):
+            gr.Markdown("在此處選擇特定日期，即可查看當天的熱點新聞。")
+            news_date_input = gr.DateTime(label="🗓️ 選擇查詢日期", type="datetime", value=max_date_dt)
+            with gr.Row():
+                gr.Markdown("### 👍 當日最正面新聞 Top 3"); gr.Markdown("### 👎 當日最負面新聞 Top 3")
+            with gr.Row():
+                top_news_output = gr.HTML(); bottom_news_output = gr.HTML()
+    def update_all(start_date, end_date, lag_days, sentiment_type):
+        if start_date is None or end_date is None or start_date > end_date:
+            gr.Warning("請選擇有效的開始與結束日期。")
+            empty_fig = go.Figure()
+            return empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, "N/A", "N/A"
+        start_date, end_date = pd.to_datetime(start_date), pd.to_datetime(end_date)
+        filtered_df = get_filtered_df(start_date, end_date)
+        if filtered_df.empty:
+            gr.Warning("此日期範圍內無資料，請擴大範圍。")
+            empty_fig = go.Figure()
+            return empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, "N/A", "N/A"
+        overview_fig = plot_price_and_sentiment(filtered_df, 'avg_model_sentiment', '模型情緒分數', 'crimson')
+        class_sentiment_fig = plot_price_and_sentiment(filtered_df, 'avg_class_sentiment', '資料集預設情緒分類', 'yellow')
+        polarity_fig = plot_price_and_sentiment(filtered_df, 'avg_polarity', '情感極性 (Polarity)', 'orange')
+        subjectivity_fig = plot_subjectivity_trend(filtered_df)
+        if sentiment_type == "模型情緒分數":
+            sentiment_col = 'avg_model_sentiment'
+        elif sentiment_type == "資料集預設情緒分類":
+            sentiment_col = 'avg_class_sentiment'
+        else: # Polarity
+            sentiment_col = 'avg_polarity'
+        corr_fig, corr_val, p_val = plot_correlation(filtered_df, sentiment_col, lag_days)
+        return overview_fig, class_sentiment_fig, polarity_fig, subjectivity_fig, corr_fig, f"{corr_val:.4f}", f"{p_val:.4f}"
+    def update_news_browser(date_obj):
+        if date_obj is None:
+            return "請選擇日期", "無"
+        top_news, bottom_news = get_top_bottom_news(date_obj)
+        return top_news, bottom_news
+    inputs_for_main_update = [start_date_input, end_date_input, lag_slider, sentiment_type_radio]
+    outputs_for_main_update = [plot_overview, plot_class_sentiment, plot_polarity, plot_subjectivity, plot_corr, correlation_output, p_value_output]
+    for component in [start_date_input, end_date_input, lag_slider, sentiment_type_radio]:
+        component.change(fn=update_all, inputs=inputs_for_main_update, outputs=outputs_for_main_update)
+    news_date_input.change(
+        fn=update_news_browser,
+        inputs=[news_date_input],
+        outputs=[top_news_output, bottom_news_output]
+    )
+    def load_app():
+        main_outputs = update_all(min_date_dt, max_date_dt, 1, "模型情緒分數")
+        news_outputs = update_news_browser(max_date_dt)
+        return main_outputs + news_outputs
+    app.load(
+        fn=load_app,
+        inputs=None,
+        outputs=outputs_for_main_update + [top_news_output, bottom_news_output]
+    )
+app.launch(debug=False, share=True, show_error=True, show_api=False)