#!/usr/bin/env python3 # 學號:R09942097、姓名:陳建成、streamlit cloud link: https://jeffeuxmartin-assignment-1-jeffeuxmartin-twnlp-appsrcapp-8mil4y.streamlitapp.com/ from pathlib import Path import streamlit as st, pandas as pd, re, time from views.components.spinner import dowload_ckip_package, download_cwn_drivers def load_corpus(path): print(path) full_df = pd.read_json(path) full_df.sort_values('index', ascending=False) df = full_df[['title', 'web_url']] return df, full_df def make_clickable(url, text): # Ref.: https://discuss.streamlit.io/t/display-urls-in-dataframe-column-as-a-clickable-hyperlink/743/7 return f'{text}' def run_app(path, ckip_nlp_models, cwn_upgrade) -> None: # need to download first because CWN packages will first check whether # there is .cwn_graph folder in the root directory. download_cwn_drivers(cwn_upgrade) dowload_ckip_package(ckip_nlp_models) from views.components.sidebar import visualize_side_bar from views.containers import display_cwn, display_ckip, display_data_form st.title("PTT 語料庫搜尋分析工具 (ver. 0.1)") input_data = display_data_form() max_articles = st.slider('最多標題數:', min_value=0, max_value=30, step=1, value=3) model, pipeline, active_visualizers = visualize_side_bar(ckip_nlp_models) display_factories = { "CKIP": display_ckip, "CWN": display_cwn, } df, full_df = load_corpus(path) if "input_data" in st.session_state: queries = st.session_state["input_data"] for query in queries: df = df[df["title"].str.contains(query)] df = df.iloc[:max_articles] if len(df) > 0: st.markdown("#### 搜尋文章標題結果 ####") st.markdown('\n'.join( f"1. [{it.title}]({it.web_url})" for it in df.itertuples()) ) _cleaned_titles = [ re.sub('^\[[^]]*\] *', '', re.sub('^R\: *', '', title)) for title in df['title']] cleaned_titles = [] for t in _cleaned_titles: if t not in cleaned_titles: cleaned_titles.append(t) display_factories[pipeline]( model, active_visualizers, cleaned_titles, ) else: st.markdown("## No results match! Q_Q... ##") if __name__ == "__main__": ckip_nlp_models = ["bert-base", "albert-tiny", "bert-tiny", "albert-base"] run_app( str(Path(__file__).parent.resolve() / 'data/corpus.json'), ckip_nlp_models, cwn_upgrade=False)