Spaces:
Build error
Build error
meg-huggingface
commited on
Commit
·
cda45dd
1
Parent(s):
b256a5f
Handling for no words
Browse files
data_measurements/streamlit_utils.py
CHANGED
@@ -111,30 +111,33 @@ def expander_general_stats(dstats, column_id):
|
|
111 |
"Use this widget to check whether the terms you see most represented"
|
112 |
" in the dataset make sense for the goals of the dataset."
|
113 |
)
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
"
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
"[open class words](https://dictionary.apa.org/open-class-words) "
|
122 |
-
"and their counts are: "
|
123 |
-
)
|
124 |
-
st.dataframe(dstats.sorted_top_vocab_df)
|
125 |
-
st.markdown(
|
126 |
-
"There are {0} missing values in the dataset.".format(
|
127 |
-
str(dstats.text_nan_count)
|
128 |
)
|
129 |
-
)
|
130 |
-
if dstats.dedup_total > 0:
|
131 |
st.markdown(
|
132 |
-
"
|
133 |
-
"
|
134 |
-
"
|
135 |
)
|
136 |
-
|
137 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
|
140 |
### Show the label distribution from the datasets
|
|
|
111 |
"Use this widget to check whether the terms you see most represented"
|
112 |
" in the dataset make sense for the goals of the dataset."
|
113 |
)
|
114 |
+
if dstats.total_words == 0:
|
115 |
+
st.markdown("Eh oh...not finding the file I need. 😭 Probably it will be there soon. 🤞 Check back later!")
|
116 |
+
else:
|
117 |
+
st.markdown("There are {0} total words".format(str(dstats.total_words)))
|
118 |
+
st.markdown(
|
119 |
+
"There are {0} words after removing closed "
|
120 |
+
"class words".format(str(dstats.total_open_words))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
)
|
|
|
|
|
122 |
st.markdown(
|
123 |
+
"The most common "
|
124 |
+
"[open class words](https://dictionary.apa.org/open-class-words) "
|
125 |
+
"and their counts are: "
|
126 |
)
|
127 |
+
st.dataframe(dstats.sorted_top_vocab_df)
|
128 |
+
st.markdown(
|
129 |
+
"There are {0} missing values in the dataset.".format(
|
130 |
+
str(dstats.text_nan_count)
|
131 |
+
)
|
132 |
+
)
|
133 |
+
if dstats.dedup_total > 0:
|
134 |
+
st.markdown(
|
135 |
+
"There are {0} duplicate items in the dataset. "
|
136 |
+
"For more information about the duplicates, "
|
137 |
+
"click the 'Duplicates' tab below.".format(str(dstats.dedup_total))
|
138 |
+
)
|
139 |
+
else:
|
140 |
+
st.markdown("There are 0 duplicate items in the dataset. ")
|
141 |
|
142 |
|
143 |
### Show the label distribution from the datasets
|