Spaces:

eltorio
/

Llama-3.2-3B-appreciation

Sleeping

[email protected] commited on Nov 29, 2024

Commit

75466a3

1 Parent(s): bb351ec

put timeout as a parameter

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,6 +21,9 @@ from peft import AutoPeftModelForCausalLM
 import torch
 import os
 if os.environ.get("HF_TOKEN") is None:
     raise ValueError(
         "You must set the HF_TOKEN environment variable to use this script, you also need to have access to the Llama 3.2 model family"
@@ -119,7 +122,7 @@ def infere(
     # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
     # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
     streamer = TextIteratorStreamer(
-        tokenizer, timeout=600.0, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = dict(
         input_ids=inputs,

 import torch
 import os
+# Maximum execution time
+thread_timeout = 600
 if os.environ.get("HF_TOKEN") is None:
     raise ValueError(
         "You must set the HF_TOKEN environment variable to use this script, you also need to have access to the Llama 3.2 model family"
     # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
     # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
     streamer = TextIteratorStreamer(
+        tokenizer, timeout=thread_timeout, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = dict(
         input_ids=inputs,