Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 # 學號:R09942097、姓名:陳建成、streamlit cloud link: https://jeffeuxmartin-assignment-1-jeffeuxmartin-twnlp-appsrcapp-8mil4y.streamlitapp.com/ | |
from pathlib import Path | |
import streamlit as st, pandas as pd, re, time | |
from views.components.spinner import dowload_ckip_package, download_cwn_drivers | |
def load_corpus(path): | |
print(path) | |
full_df = pd.read_json(path) | |
full_df.sort_values('index', ascending=False) | |
df = full_df[['title', 'web_url']] | |
return df, full_df | |
def make_clickable(url, text): | |
# Ref.: https://discuss.streamlit.io/t/display-urls-in-dataframe-column-as-a-clickable-hyperlink/743/7 | |
return f'<a target="_blank" href="{url}">{text}</a>' | |
def run_app(path, ckip_nlp_models, cwn_upgrade) -> None: | |
# need to download first because CWN packages will first check whether | |
# there is .cwn_graph folder in the root directory. | |
download_cwn_drivers(cwn_upgrade) | |
dowload_ckip_package(ckip_nlp_models) | |
from views.components.sidebar import visualize_side_bar | |
from views.containers import display_cwn, display_ckip, display_data_form | |
st.title("PTT 語料庫搜尋分析工具 (ver. 0.1)") | |
input_data = display_data_form() | |
max_articles = st.slider('最多標題數:', min_value=0, max_value=30, step=1, value=3) | |
model, pipeline, active_visualizers = visualize_side_bar(ckip_nlp_models) | |
display_factories = { | |
"CKIP": display_ckip, | |
"CWN": display_cwn, | |
} | |
df, full_df = load_corpus(path) | |
if "input_data" in st.session_state: | |
queries = st.session_state["input_data"] | |
for query in queries: | |
df = df[df["title"].str.contains(query)] | |
df = df.iloc[:max_articles] | |
if len(df) > 0: | |
st.markdown("#### 搜尋文章標題結果 ####") | |
st.markdown('\n'.join( | |
f"1. [{it.title}]({it.web_url})" | |
for it in df.itertuples()) | |
) | |
_cleaned_titles = [ | |
re.sub('^\[[^]]*\] *', '', | |
re.sub('^R\: *', '', title)) | |
for title in df['title']] | |
cleaned_titles = [] | |
for t in _cleaned_titles: | |
if t not in cleaned_titles: | |
cleaned_titles.append(t) | |
display_factories[pipeline]( | |
model, active_visualizers, | |
cleaned_titles, | |
) | |
else: | |
st.markdown("## No results match! Q_Q... ##") | |
if __name__ == "__main__": | |
ckip_nlp_models = ["bert-base", "albert-tiny", "bert-tiny", "albert-base"] | |
run_app( | |
str(Path(__file__).parent.resolve() / 'data/corpus.json'), ckip_nlp_models, cwn_upgrade=False) | |