Spaces:
Running
Running
Update pages/1 Scattertext.py
Browse files- pages/1 Scattertext.py +7 -4
pages/1 Scattertext.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
import scattertext as stx
|
3 |
import pandas as pd
|
4 |
import re
|
@@ -117,7 +118,7 @@ def clean_csv(extype):
|
|
117 |
|
118 |
#===stopword removal===
|
119 |
stop = stopwords.words('english')
|
120 |
-
paper[ColCho].apply(lambda x: ' '.join([word for word in x.split() if word not in
|
121 |
|
122 |
#===lemmatize===
|
123 |
lemmatizer = WordNetLemmatizer()
|
@@ -125,7 +126,7 @@ def clean_csv(extype):
|
|
125 |
words = text.split()
|
126 |
words = [lemmatizer.lemmatize(word) for word in words]
|
127 |
return ' '.join(words)
|
128 |
-
paper[ColCho].apply(lemmatize_words)
|
129 |
|
130 |
words_rmv = [word.strip() for word in words_to_remove.split(";")]
|
131 |
remove_set = set(words_rmv)
|
@@ -178,7 +179,7 @@ def running_scattertext(cat_col, catname, noncatname):
|
|
178 |
st.toast('Process completed', icon='🎉')
|
179 |
time.sleep(1)
|
180 |
st.toast('Visualizing', icon='⏳')
|
181 |
-
|
182 |
|
183 |
except ValueError:
|
184 |
st.warning('Please decrease the Minimum term count in the advanced settings.', icon="⚠️")
|
@@ -191,11 +192,13 @@ def df_w2w(search_terms1, search_terms2):
|
|
191 |
for term in search_terms1:
|
192 |
dfs1 = pd.concat([dfs1, paper[paper[selected_col[0]].str.contains(r'\b' + term + r'\b', case=False, na=False)]], ignore_index=True)
|
193 |
dfs1['Topic'] = 'First Term'
|
|
|
194 |
|
195 |
dfs2 = pd.DataFrame()
|
196 |
for term in search_terms2:
|
197 |
dfs2 = pd.concat([dfs2, paper[paper[selected_col[0]].str.contains(r'\b' + term + r'\b', case=False, na=False)]], ignore_index=True)
|
198 |
dfs2['Topic'] = 'Second Term'
|
|
|
199 |
filtered_df = pd.concat([dfs1, dfs2], ignore_index=True)
|
200 |
|
201 |
return dfs1, dfs2, filtered_df
|
@@ -350,7 +353,7 @@ if uploaded_file is not None:
|
|
350 |
st.write('You only have data in ', (MAX))
|
351 |
|
352 |
with tab2:
|
353 |
-
st.markdown('**Kessler
|
354 |
|
355 |
with tab3:
|
356 |
st.markdown('**Marrone, M., & Linnenluecke, M.K. (2020). Interdisciplinary Research Maps: A new technique for visualizing research topics. PLoS ONE, 15.** https://doi.org/10.1371/journal.pone.0242283')
|
|
|
1 |
import streamlit as st
|
2 |
+
import streamlit.components.v1 as components
|
3 |
import scattertext as stx
|
4 |
import pandas as pd
|
5 |
import re
|
|
|
118 |
|
119 |
#===stopword removal===
|
120 |
stop = stopwords.words('english')
|
121 |
+
paper[ColCho] = paper[ColCho].apply(lambda x: ' '.join([word for word in x.split() if word not in stop]))
|
122 |
|
123 |
#===lemmatize===
|
124 |
lemmatizer = WordNetLemmatizer()
|
|
|
126 |
words = text.split()
|
127 |
words = [lemmatizer.lemmatize(word) for word in words]
|
128 |
return ' '.join(words)
|
129 |
+
paper[ColCho] = paper[ColCho].apply(lemmatize_words)
|
130 |
|
131 |
words_rmv = [word.strip() for word in words_to_remove.split(";")]
|
132 |
remove_set = set(words_rmv)
|
|
|
179 |
st.toast('Process completed', icon='🎉')
|
180 |
time.sleep(1)
|
181 |
st.toast('Visualizing', icon='⏳')
|
182 |
+
components.html(html, height = 1200, scrolling = True)
|
183 |
|
184 |
except ValueError:
|
185 |
st.warning('Please decrease the Minimum term count in the advanced settings.', icon="⚠️")
|
|
|
192 |
for term in search_terms1:
|
193 |
dfs1 = pd.concat([dfs1, paper[paper[selected_col[0]].str.contains(r'\b' + term + r'\b', case=False, na=False)]], ignore_index=True)
|
194 |
dfs1['Topic'] = 'First Term'
|
195 |
+
dfs1 = dfs1.drop_duplicates()
|
196 |
|
197 |
dfs2 = pd.DataFrame()
|
198 |
for term in search_terms2:
|
199 |
dfs2 = pd.concat([dfs2, paper[paper[selected_col[0]].str.contains(r'\b' + term + r'\b', case=False, na=False)]], ignore_index=True)
|
200 |
dfs2['Topic'] = 'Second Term'
|
201 |
+
dfs2 = dfs2.drop_duplicates()
|
202 |
filtered_df = pd.concat([dfs1, dfs2], ignore_index=True)
|
203 |
|
204 |
return dfs1, dfs2, filtered_df
|
|
|
353 |
st.write('You only have data in ', (MAX))
|
354 |
|
355 |
with tab2:
|
356 |
+
st.markdown('**Jason Kessler. 2017. Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ. In Proceedings of ACL 2017, System Demonstrations, pages 85–90, Vancouver, Canada. Association for Computational Linguistics.** https://doi.org/10.48550/arXiv.1703.00565')
|
357 |
|
358 |
with tab3:
|
359 |
st.markdown('**Marrone, M., & Linnenluecke, M.K. (2020). Interdisciplinary Research Maps: A new technique for visualizing research topics. PLoS ONE, 15.** https://doi.org/10.1371/journal.pone.0242283')
|