Spaces:
Running
Running
aliasgerovs
commited on
Merge branch 'demo'
Browse files
app.py
CHANGED
@@ -213,7 +213,7 @@ def update_character_count(text):
|
|
213 |
return f"{len(text)} characters"
|
214 |
|
215 |
|
216 |
-
def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=
|
217 |
sentences = nltk.sent_tokenize(text)
|
218 |
segments = []
|
219 |
current_segment = []
|
@@ -269,7 +269,7 @@ def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=40,
|
|
269 |
|
270 |
def predict_bc(model, tokenizer, text):
|
271 |
tokens = text_bc_tokenizer(
|
272 |
-
text, padding='max_length', truncation=True, max_length=
|
273 |
).to(device)["input_ids"]
|
274 |
|
275 |
output = model(tokens)
|
@@ -279,7 +279,7 @@ def predict_bc(model, tokenizer, text):
|
|
279 |
|
280 |
def predict_mc(model, tokenizer, text):
|
281 |
tokens = text_mc_tokenizer(
|
282 |
-
text, padding='max_length', truncation=True, return_tensors="pt", max_length=
|
283 |
).to(device)["input_ids"]
|
284 |
output = model(tokens)
|
285 |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
|
|
213 |
return f"{len(text)} characters"
|
214 |
|
215 |
|
216 |
+
def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=30, min_last_segment_length=100, type_det='bc'):
|
217 |
sentences = nltk.sent_tokenize(text)
|
218 |
segments = []
|
219 |
current_segment = []
|
|
|
269 |
|
270 |
def predict_bc(model, tokenizer, text):
|
271 |
tokens = text_bc_tokenizer(
|
272 |
+
text, padding='max_length', truncation=True, max_length=512, return_tensors="pt"
|
273 |
).to(device)["input_ids"]
|
274 |
|
275 |
output = model(tokens)
|
|
|
279 |
|
280 |
def predict_mc(model, tokenizer, text):
|
281 |
tokens = text_mc_tokenizer(
|
282 |
+
text, padding='max_length', truncation=True, return_tensors="pt", max_length=256
|
283 |
).to(device)["input_ids"]
|
284 |
output = model(tokens)
|
285 |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|