Spaces:
Running
Running
aliasgerovs
commited on
Commit
•
65029fb
1
Parent(s):
6af6f76
Update app.py
Browse files
app.py
CHANGED
@@ -238,9 +238,9 @@ def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=10,
|
|
238 |
encoded_segments = []
|
239 |
for seg, _ in final_segments:
|
240 |
encoded_segment = tokenizer.encode(' '.join(seg), add_special_tokens=True, max_length=max_length+tolerance, truncation=True)
|
241 |
-
|
242 |
-
|
243 |
-
return
|
244 |
|
245 |
|
246 |
def predict_bc(model, tokenizer, text):
|
@@ -266,11 +266,12 @@ def ai_generated_test(ai_option, input):
|
|
266 |
|
267 |
bc_scores = []
|
268 |
mc_scores = []
|
269 |
-
cleaned_text = remove_special_characters(input)
|
270 |
samples_len = len(split_text_allow_complete_sentences_nltk(input))
|
271 |
-
|
|
|
272 |
for i in samples_len:
|
273 |
-
|
|
|
274 |
mc_score = predict_mc(text_mc_model, text_mc_tokenizer, cleaned_text)
|
275 |
bc_scores.append(bc_score)
|
276 |
mc_scores.append(mc_score)
|
|
|
238 |
encoded_segments = []
|
239 |
for seg, _ in final_segments:
|
240 |
encoded_segment = tokenizer.encode(' '.join(seg), add_special_tokens=True, max_length=max_length+tolerance, truncation=True)
|
241 |
+
decoded_segment = tokenizer.decode(encoded_segment)
|
242 |
+
decoded_segments.append(decoded_segment)
|
243 |
+
return decoded_segments
|
244 |
|
245 |
|
246 |
def predict_bc(model, tokenizer, text):
|
|
|
266 |
|
267 |
bc_scores = []
|
268 |
mc_scores = []
|
|
|
269 |
samples_len = len(split_text_allow_complete_sentences_nltk(input))
|
270 |
+
segments = split_text_allow_complete_sentences_nltk(input)
|
271 |
+
|
272 |
for i in samples_len:
|
273 |
+
cleaned_text = remove_special_characters(segments[i])
|
274 |
+
bc_score = predict_bc(text_bc_model, text_bc_tokenizer,cleaned_text )
|
275 |
mc_score = predict_mc(text_mc_model, text_mc_tokenizer, cleaned_text)
|
276 |
bc_scores.append(bc_score)
|
277 |
mc_scores.append(mc_score)
|