Spaces:

polygraf-ai
/

copyright_checker

Runtime error

Ali Asgarov commited on Feb 28, 2024

Commit

c10af95

1 Parent(s): 42fb43c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -224,9 +224,11 @@ def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=30,
     if type_det == 'bc':
         tokenizer = text_bc_tokenizer
     elif type_det == 'mc':
         tokenizer = text_mc_tokenizer
     for sentence in sentences:
         tokens = tokenizer.tokenize(sentence)
@@ -282,7 +284,7 @@ def predict_bc(model, tokenizer, text):
     with torch.no_grad():
         model.eval()
         tokens = text_bc_tokenizer(
-            text, padding='max_length', truncation=True, max_length=256, return_tensors="pt"
         ).to(device)
         output = model(**tokens)
         output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]

     if type_det == 'bc':
         tokenizer = text_bc_tokenizer
+        max_length = 333
     elif type_det == 'mc':
         tokenizer = text_mc_tokenizer
+        max_length = 256
     for sentence in sentences:
         tokens = tokenizer.tokenize(sentence)
     with torch.no_grad():
         model.eval()
         tokens = text_bc_tokenizer(
+            text, padding='max_length', truncation=True, max_length=333, return_tensors="pt"
         ).to(device)
         output = model(**tokens)
         output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]