Spaces:

faizhalas
/

coconut

Running

App Files Files Community

faizhalas commited on Jul 5, 2024

Commit

1e9dacc

verified ·

1 Parent(s): e90198a

Update pages/1 Scattertext.py

Browse files

Files changed (1) hide show

pages/1 Scattertext.py +7 -4

pages/1 Scattertext.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import streamlit as st
 import scattertext as stx
 import pandas as pd
 import re
@@ -117,7 +118,7 @@ def clean_csv(extype):
     #===stopword removal===
     stop = stopwords.words('english')
-    paper[ColCho].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
     #===lemmatize===
     lemmatizer = WordNetLemmatizer()
@@ -125,7 +126,7 @@ def clean_csv(extype):
         words = text.split()
         words = [lemmatizer.lemmatize(word) for word in words]
         return ' '.join(words)
-    paper[ColCho].apply(lemmatize_words)
     words_rmv = [word.strip() for word in words_to_remove.split(";")]
     remove_set = set(words_rmv)
@@ -178,7 +179,7 @@ def running_scattertext(cat_col, catname, noncatname):
         st.toast('Process completed', icon='🎉')
         time.sleep(1)
         st.toast('Visualizing', icon='⏳')
-        st.components.v1.html(html, height = 1200, scrolling = True)
     except ValueError:
         st.warning('Please decrease the Minimum term count in the advanced settings.', icon="⚠️")
@@ -191,11 +192,13 @@ def df_w2w(search_terms1, search_terms2):
     for term in search_terms1:
         dfs1 = pd.concat([dfs1, paper[paper[selected_col[0]].str.contains(r'\b' + term + r'\b', case=False, na=False)]], ignore_index=True)
     dfs1['Topic'] = 'First Term'
     dfs2 = pd.DataFrame()
     for term in search_terms2:
         dfs2 = pd.concat([dfs2, paper[paper[selected_col[0]].str.contains(r'\b' + term + r'\b', case=False, na=False)]], ignore_index=True)
     dfs2['Topic'] = 'Second Term'
     filtered_df = pd.concat([dfs1, dfs2], ignore_index=True)
     return dfs1, dfs2, filtered_df
@@ -350,7 +353,7 @@ if uploaded_file is not None:
                 st.write('You only have data in ', (MAX))
     with tab2:
-        st.markdown('**Kessler, J.S. (2017). Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ.** https://doi.org/10.48550/arXiv.1703.00565')
     with tab3:
         st.markdown('**Marrone, M., & Linnenluecke, M.K. (2020). Interdisciplinary Research Maps: A new technique for visualizing research topics. PLoS ONE, 15.** https://doi.org/10.1371/journal.pone.0242283')

 import streamlit as st
+import streamlit.components.v1 as components
 import scattertext as stx
 import pandas as pd
 import re
     #===stopword removal===
     stop = stopwords.words('english')
+    paper[ColCho] = paper[ColCho].apply(lambda x: ' '.join([word for word in x.split() if word not in stop]))
     #===lemmatize===
     lemmatizer = WordNetLemmatizer()
         words = text.split()
         words = [lemmatizer.lemmatize(word) for word in words]
         return ' '.join(words)
+    paper[ColCho] = paper[ColCho].apply(lemmatize_words)
     words_rmv = [word.strip() for word in words_to_remove.split(";")]
     remove_set = set(words_rmv)
         st.toast('Process completed', icon='🎉')
         time.sleep(1)
         st.toast('Visualizing', icon='⏳')
+        components.html(html, height = 1200, scrolling = True)
     except ValueError:
         st.warning('Please decrease the Minimum term count in the advanced settings.', icon="⚠️")
     for term in search_terms1:
         dfs1 = pd.concat([dfs1, paper[paper[selected_col[0]].str.contains(r'\b' + term + r'\b', case=False, na=False)]], ignore_index=True)
     dfs1['Topic'] = 'First Term'
+    dfs1 = dfs1.drop_duplicates()
     dfs2 = pd.DataFrame()
     for term in search_terms2:
         dfs2 = pd.concat([dfs2, paper[paper[selected_col[0]].str.contains(r'\b' + term + r'\b', case=False, na=False)]], ignore_index=True)
     dfs2['Topic'] = 'Second Term'
+    dfs2 = dfs2.drop_duplicates()
     filtered_df = pd.concat([dfs1, dfs2], ignore_index=True)
     return dfs1, dfs2, filtered_df
                 st.write('You only have data in ', (MAX))
     with tab2:
+        st.markdown('**Jason Kessler. 2017. Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ. In Proceedings of ACL 2017, System Demonstrations, pages 85–90, Vancouver, Canada. Association for Computational Linguistics.** https://doi.org/10.48550/arXiv.1703.00565')
     with tab3:
         st.markdown('**Marrone, M., & Linnenluecke, M.K. (2020). Interdisciplinary Research Maps: A new technique for visualizing research topics. PLoS ONE, 15.** https://doi.org/10.1371/journal.pone.0242283')