Spaces:

numind
/

NuExtract-1.5

Runtime error

Alexandre-Numind commited on Mar 26

Commit

a408e76

verified ·

1 Parent(s): 1a7a373

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,8 +15,8 @@ from nuextract_logging import log_event
 MAX_INPUT_SIZE = 100_000
-MAX_NEW_TOKENS = 8_000
-MAX_WINDOW_SIZE = 1_000
 markdown_description = """
 <!DOCTYPE html>
@@ -74,7 +74,7 @@ def predict_chunk(text, template, current, model, tokenizer):
     input_llm =  f"<|input|>\n### Template:\n{template}\n### Current:\n{current}\n### Text:\n{text}\n\n<|output|>" + "{"
     input_ids = tokenizer(input_llm, return_tensors="pt", truncation=True, max_length=MAX_INPUT_SIZE).to("cuda")
     output = tokenizer.decode(model.generate(**input_ids, max_new_tokens=MAX_NEW_TOKENS, do_sample=False)[0], skip_special_tokens=True)
     return clean_json_text(output.split("<|output|>")[1])
 def sliding_window_prediction(template, text, model, tokenizer, window_size=4000, overlap=128):

 MAX_INPUT_SIZE = 100_000
+MAX_NEW_TOKENS = 4_000
+MAX_WINDOW_SIZE = 10_000
 markdown_description = """
 <!DOCTYPE html>
     input_llm =  f"<|input|>\n### Template:\n{template}\n### Current:\n{current}\n### Text:\n{text}\n\n<|output|>" + "{"
     input_ids = tokenizer(input_llm, return_tensors="pt", truncation=True, max_length=MAX_INPUT_SIZE).to("cuda")
     output = tokenizer.decode(model.generate(**input_ids, max_new_tokens=MAX_NEW_TOKENS, do_sample=False)[0], skip_special_tokens=True)
+    print(output)
     return clean_json_text(output.split("<|output|>")[1])
 def sliding_window_prediction(template, text, model, tokenizer, window_size=4000, overlap=128):