lowannann commited on
Commit
b43a54e
·
1 Parent(s): 20319f5

Add application file

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #R11142005 紀柔安
2
+ import streamlit as st
3
+ from views.components.spinner import dowload_ckip_package, download_cwn_drivers
4
+ import pandas as pd
5
+ import requests
6
+ import bs4
7
+ from snownlp import SnowNLP
8
+
9
+
10
+ def run_app(ckip_nlp_models, cwn_upgrade) -> None:
11
+ # need to download first because CWN packages will first check whether
12
+ # there is .cwn_graph folder in the root directory.
13
+ download_cwn_drivers(cwn_upgrade)
14
+ dowload_ckip_package(ckip_nlp_models)
15
+
16
+ from views.components.sidebar import visualize_side_bar
17
+ from views.containers import display_cwn, display_ckip, display_data_form
18
+
19
+ st.title("NLP app for PTT")
20
+ st.write("這是一個針對PTT語料的 情緒分析|中文NLP管線處理🔎")
21
+ st.image("/Users/joannechi/nlpWeb/myApp/nlpweb/nlp_assignment_1/img/Mo-PTT-Logo.png", width=200)
22
+
23
+ #menu = ["Text","Sentences"]
24
+ #choice = st.sidebar.selectbox("Menu",menu)
25
+
26
+
27
+ #spectra = st.file_uploader("upload your file", type={"csv", "txt"})
28
+ #if spectra is not None:
29
+ # spectra_df = pd.read_csv(spectra) #讀取csv
30
+ # st.write(spectra_df)
31
+
32
+ #~~web crawler~~
33
+ st.subheader("PTT Crawler 🐛")
34
+ st.text('目前看板有:HatePolitics|Gossiping|Military|Stock')
35
+ selected = st.selectbox('請選擇看板:',
36
+ ['HatePolitics', 'Gossiping','Military','Stock'])
37
+ if selected=='HatePolitics':
38
+ URL = "https://www.ptt.cc/bbs/HatePolitics/index.html"
39
+ elif selected=='Gossiping':
40
+ URL = "https://www.ptt.cc/bbs/Gossiping/index.html"
41
+ elif selected=='Military':
42
+ URL = "https://www.ptt.cc/bbs/Military/index.html"
43
+ else:
44
+ URL = "https://www.ptt.cc/bbs/Stock/index.html"
45
+
46
+ my_headers = {'cookie': 'over18=1;'}
47
+ response = requests.get(URL, headers = my_headers)
48
+ soup = bs4.BeautifulSoup(response.text,"html.parser")
49
+ list_results=[]
50
+ for t in soup.find_all('div','title'):
51
+ find_a=t.find('a')
52
+ find_href="https://www.ptt.cc"+find_a.get("href")
53
+ title=t.text
54
+ results={
55
+ "title":title,
56
+ "url":find_href
57
+ }
58
+ list_results.append(results)
59
+ my_df=pd.DataFrame(list_results)
60
+ print(my_df)
61
+ st.write(my_df)
62
+ #~~web crawler~~
63
+
64
+ #~~sentiment analysis~~
65
+ st.subheader("情緒分析")
66
+ with st.form(key="nlpForm"):
67
+ raw_text=st.text_area("請輸入句子✏️")
68
+ submit_button=st.form_submit_button(label="確定")
69
+
70
+ if submit_button:
71
+
72
+ st.info("sentiment")
73
+ sentiment=SnowNLP(SnowNLP(raw_text).han) #轉簡體
74
+ sentiment_han=sentiment.sentiments
75
+ st.write(sentiment_han)
76
+
77
+ #emoji
78
+ if sentiment_han>0:
79
+ st.markdown("Sentiment:: Positive :smiley: ")
80
+ elif sentiment_han<0:
81
+ st.markdown("Sentiment:: Negative :angry: ")
82
+ else:
83
+ st.markdown("Sentiment:: Neutral :neutral: ")
84
+
85
+
86
+
87
+ #with col2:
88
+ #st.info("category")
89
+ #category=SnowNLP(SnowNLP(raw_text).han) #轉簡體
90
+ #category_han=list(category.tags)
91
+ #st.write(category_han)
92
+
93
+ #~~sentiment analysis~~
94
+
95
+ st.subheader("中文 NLP 管線處理")
96
+
97
+ input_data = display_data_form()
98
+ model, pipeline, active_visualizers = visualize_side_bar(ckip_nlp_models)
99
+ #return model_options, pipeline_options, active_visualizers
100
+
101
+ display_factories = {"CKIP": display_ckip, "CWN": display_cwn}
102
+
103
+ if "input_data" in st.session_state:
104
+ display_factories[pipeline](
105
+ model, active_visualizers, st.session_state["input_data"]
106
+ )
107
+
108
+
109
+ if __name__ == "__main__":
110
+ ckip_nlp_models = ["bert-base", "albert-tiny", "bert-tiny", "albert-base"]
111
+ run_app(ckip_nlp_models, cwn_upgrade=False)