Spaces:
Build error
Build error
Update Space (evaluate main: d781f85c)
Browse files- word_count.py +12 -7
word_count.py
CHANGED
@@ -12,10 +12,12 @@
|
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
|
15 |
-
import evaluate
|
16 |
import datasets
|
17 |
from sklearn.feature_extraction.text import CountVectorizer
|
18 |
|
|
|
|
|
|
|
19 |
_DESCRIPTION = """
|
20 |
Returns the total number of words, and the number of unique words in the input data.
|
21 |
"""
|
@@ -38,24 +40,27 @@ Examples:
|
|
38 |
"""
|
39 |
_CITATION = ""
|
40 |
|
|
|
41 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
42 |
class WordCount(evaluate.Measurement):
|
43 |
"""This measurement returns the total number of words and the number of unique words
|
44 |
-
|
45 |
|
46 |
def _info(self):
|
47 |
return evaluate.MeasurementInfo(
|
48 |
# This is the description that will appear on the modules page.
|
49 |
module_type="measurement",
|
50 |
description=_DESCRIPTION,
|
51 |
-
citation
|
52 |
inputs_description=_KWARGS_DESCRIPTION,
|
53 |
-
features=datasets.Features(
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
)
|
57 |
|
58 |
-
def _compute(self, data, max_vocab
|
59 |
"""Returns the number of unique words in the input data"""
|
60 |
count_vectorizer = CountVectorizer(max_features=max_vocab)
|
61 |
document_matrix = count_vectorizer.fit_transform(data)
|
|
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
|
|
|
15 |
import datasets
|
16 |
from sklearn.feature_extraction.text import CountVectorizer
|
17 |
|
18 |
+
import evaluate
|
19 |
+
|
20 |
+
|
21 |
_DESCRIPTION = """
|
22 |
Returns the total number of words, and the number of unique words in the input data.
|
23 |
"""
|
|
|
40 |
"""
|
41 |
_CITATION = ""
|
42 |
|
43 |
+
|
44 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
45 |
class WordCount(evaluate.Measurement):
|
46 |
"""This measurement returns the total number of words and the number of unique words
|
47 |
+
in the input string(s)."""
|
48 |
|
49 |
def _info(self):
|
50 |
return evaluate.MeasurementInfo(
|
51 |
# This is the description that will appear on the modules page.
|
52 |
module_type="measurement",
|
53 |
description=_DESCRIPTION,
|
54 |
+
citation=_CITATION,
|
55 |
inputs_description=_KWARGS_DESCRIPTION,
|
56 |
+
features=datasets.Features(
|
57 |
+
{
|
58 |
+
"data": datasets.Value("string"),
|
59 |
+
}
|
60 |
+
),
|
61 |
)
|
62 |
|
63 |
+
def _compute(self, data, max_vocab=None):
|
64 |
"""Returns the number of unique words in the input data"""
|
65 |
count_vectorizer = CountVectorizer(max_features=max_vocab)
|
66 |
document_matrix = count_vectorizer.fit_transform(data)
|