aliasgerovs commited on
Commit
65029fb
1 Parent(s): 6af6f76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -238,9 +238,9 @@ def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=10,
238
  encoded_segments = []
239
  for seg, _ in final_segments:
240
  encoded_segment = tokenizer.encode(' '.join(seg), add_special_tokens=True, max_length=max_length+tolerance, truncation=True)
241
- # decoded_segment = tokenizer.decode(encoded_segment)
242
- encoded_segments.append(encoded_segment)
243
- return encoded_segments
244
 
245
 
246
  def predict_bc(model, tokenizer, text):
@@ -266,11 +266,12 @@ def ai_generated_test(ai_option, input):
266
 
267
  bc_scores = []
268
  mc_scores = []
269
- cleaned_text = remove_special_characters(input)
270
  samples_len = len(split_text_allow_complete_sentences_nltk(input))
271
-
 
272
  for i in samples_len:
273
- bc_score = predict_bc(text_bc_model, text_bc_tokenizer, cleaned_text)
 
274
  mc_score = predict_mc(text_mc_model, text_mc_tokenizer, cleaned_text)
275
  bc_scores.append(bc_score)
276
  mc_scores.append(mc_score)
 
238
  encoded_segments = []
239
  for seg, _ in final_segments:
240
  encoded_segment = tokenizer.encode(' '.join(seg), add_special_tokens=True, max_length=max_length+tolerance, truncation=True)
241
+ decoded_segment = tokenizer.decode(encoded_segment)
242
+ decoded_segments.append(decoded_segment)
243
+ return decoded_segments
244
 
245
 
246
  def predict_bc(model, tokenizer, text):
 
266
 
267
  bc_scores = []
268
  mc_scores = []
 
269
  samples_len = len(split_text_allow_complete_sentences_nltk(input))
270
+ segments = split_text_allow_complete_sentences_nltk(input)
271
+
272
  for i in samples_len:
273
+ cleaned_text = remove_special_characters(segments[i])
274
+ bc_score = predict_bc(text_bc_model, text_bc_tokenizer,cleaned_text )
275
  mc_score = predict_mc(text_mc_model, text_mc_tokenizer, cleaned_text)
276
  bc_scores.append(bc_score)
277
  mc_scores.append(mc_score)