Spaces:
Build error
Build error
#R11142005 紀柔安 | |
import streamlit as st | |
from twNLP-app.src.views.components.spinner import dowload_ckip_package, download_cwn_drivers | |
import pandas as pd | |
import requests | |
import bs4 | |
from snownlp import SnowNLP | |
def run_app(ckip_nlp_models, cwn_upgrade) -> None: | |
# need to download first because CWN packages will first check whether | |
# there is .cwn_graph folder in the root directory. | |
download_cwn_drivers(cwn_upgrade) | |
dowload_ckip_package(ckip_nlp_models) | |
from views.components.sidebar import visualize_side_bar | |
from views.containers import display_cwn, display_ckip, display_data_form | |
st.title("NLP app for PTT") | |
st.write("這是一個針對PTT語料的 情緒分析|中文NLP管線處理🔎") | |
st.image("/Users/joannechi/nlpWeb/myApp/nlpweb/nlp_assignment_1/img/Mo-PTT-Logo.png", width=200) | |
#menu = ["Text","Sentences"] | |
#choice = st.sidebar.selectbox("Menu",menu) | |
#spectra = st.file_uploader("upload your file", type={"csv", "txt"}) | |
#if spectra is not None: | |
# spectra_df = pd.read_csv(spectra) #讀取csv | |
# st.write(spectra_df) | |
#~~web crawler~~ | |
st.subheader("PTT Crawler 🐛") | |
st.text('目前看板有:HatePolitics|Gossiping|Military|Stock') | |
selected = st.selectbox('請選擇看板:', | |
['HatePolitics', 'Gossiping','Military','Stock']) | |
if selected=='HatePolitics': | |
URL = "https://www.ptt.cc/bbs/HatePolitics/index.html" | |
elif selected=='Gossiping': | |
URL = "https://www.ptt.cc/bbs/Gossiping/index.html" | |
elif selected=='Military': | |
URL = "https://www.ptt.cc/bbs/Military/index.html" | |
else: | |
URL = "https://www.ptt.cc/bbs/Stock/index.html" | |
my_headers = {'cookie': 'over18=1;'} | |
response = requests.get(URL, headers = my_headers) | |
soup = bs4.BeautifulSoup(response.text,"html.parser") | |
list_results=[] | |
for t in soup.find_all('div','title'): | |
find_a=t.find('a') | |
find_href="https://www.ptt.cc"+find_a.get("href") | |
title=t.text | |
results={ | |
"title":title, | |
"url":find_href | |
} | |
list_results.append(results) | |
my_df=pd.DataFrame(list_results) | |
print(my_df) | |
st.write(my_df) | |
#~~web crawler~~ | |
#~~sentiment analysis~~ | |
st.subheader("情緒分析") | |
with st.form(key="nlpForm"): | |
raw_text=st.text_area("請輸入句子✏️") | |
submit_button=st.form_submit_button(label="確定") | |
if submit_button: | |
st.info("sentiment") | |
sentiment=SnowNLP(SnowNLP(raw_text).han) #轉簡體 | |
sentiment_han=sentiment.sentiments | |
st.write(sentiment_han) | |
#emoji | |
if sentiment_han>0: | |
st.markdown("Sentiment:: Positive :smiley: ") | |
elif sentiment_han<0: | |
st.markdown("Sentiment:: Negative :angry: ") | |
else: | |
st.markdown("Sentiment:: Neutral :neutral: ") | |
#with col2: | |
#st.info("category") | |
#category=SnowNLP(SnowNLP(raw_text).han) #轉簡體 | |
#category_han=list(category.tags) | |
#st.write(category_han) | |
#~~sentiment analysis~~ | |
st.subheader("中文 NLP 管線處理") | |
input_data = display_data_form() | |
model, pipeline, active_visualizers = visualize_side_bar(ckip_nlp_models) | |
#return model_options, pipeline_options, active_visualizers | |
display_factories = {"CKIP": display_ckip, "CWN": display_cwn} | |
if "input_data" in st.session_state: | |
display_factories[pipeline]( | |
model, active_visualizers, st.session_state["input_data"] | |
) | |
if __name__ == "__main__": | |
ckip_nlp_models = ["bert-base", "albert-tiny", "bert-tiny", "albert-base"] | |
run_app(ckip_nlp_models, cwn_upgrade=False) | |