submit app
Browse files- distinct.py +3 -2
distinct.py
CHANGED
@@ -156,16 +156,17 @@ class distinct(evaluate.Measurement):
|
|
156 |
tokens = list(tokenizer.tokenize(prediction))
|
157 |
tokens_2grams = ngrams(list(tokenizer.tokenize(prediction)), 2, left_pad_symbol='<s>')
|
158 |
tokens_3grams = ngrams(list(tokenizer.tokenize(prediction)), 3, left_pad_symbol='<s>')
|
|
|
159 |
except Exception as e:
|
160 |
raise e
|
161 |
-
|
162 |
distinct_tokens = distinct_tokens | set(tokens)
|
163 |
distinct_tokens_2grams = distinct_tokens_2grams | set(tokens_2grams)
|
164 |
distinct_tokens_3grams = distinct_tokens_3grams | set(tokens_3grams)
|
165 |
total_tokens.extend(tokens)
|
166 |
total_tokens_2grams.extend(list(tokens_2grams))
|
167 |
total_tokens_3grams.extend(list(tokens_3grams))
|
168 |
-
|
169 |
Distinct_1 = len(distinct_tokens)/len(total_tokens)
|
170 |
Distinct_2 = len(distinct_tokens_2grams)/len(total_tokens_2grams)
|
171 |
Distinct_3 = len(distinct_tokens_3grams)/len(total_tokens_3grams)
|
|
|
156 |
tokens = list(tokenizer.tokenize(prediction))
|
157 |
tokens_2grams = ngrams(list(tokenizer.tokenize(prediction)), 2, left_pad_symbol='<s>')
|
158 |
tokens_3grams = ngrams(list(tokenizer.tokenize(prediction)), 3, left_pad_symbol='<s>')
|
159 |
+
|
160 |
except Exception as e:
|
161 |
raise e
|
162 |
+
print(tokens_2grams)
|
163 |
distinct_tokens = distinct_tokens | set(tokens)
|
164 |
distinct_tokens_2grams = distinct_tokens_2grams | set(tokens_2grams)
|
165 |
distinct_tokens_3grams = distinct_tokens_3grams | set(tokens_3grams)
|
166 |
total_tokens.extend(tokens)
|
167 |
total_tokens_2grams.extend(list(tokens_2grams))
|
168 |
total_tokens_3grams.extend(list(tokens_3grams))
|
169 |
+
print(distinct_tokens_2grams, total_tokens_2grams)
|
170 |
Distinct_1 = len(distinct_tokens)/len(total_tokens)
|
171 |
Distinct_2 = len(distinct_tokens_2grams)/len(total_tokens_2grams)
|
172 |
Distinct_3 = len(distinct_tokens_3grams)/len(total_tokens_3grams)
|