Spaces:

AhmadT198
/

zeroGPUDemo1

Sleeping

AhmadT198 commited on Oct 15, 2024

Commit

0a6540b

1 Parent(s): b7899e7

Trying to empty cache and load the model once

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,13 +7,15 @@ import torch
 # Use a pipeline as a high-level helper
 from transformers import pipeline
 @spaces.GPU(duration=120)
 def llama3_1_8B(question):
-    print("RUNNING PIPE")
-    pipe = pipeline("text-generation", model="NousResearch/Hermes-3-Llama-3.1-8B", max_new_tokens=200, device=0)
-    print("PIPE DONE")
     messages = [
     {"role": "user", "content": question},

 # Use a pipeline as a high-level helper
 from transformers import pipeline
+torch.cuda.empty_cache()
+print("RUNNING PIPE")
+pipe = pipeline("text-generation", model="NousResearch/Hermes-3-Llama-3.1-8B", max_new_tokens=200, device=0)
+print("PIPE DONE")
 @spaces.GPU(duration=120)
 def llama3_1_8B(question):
     messages = [
     {"role": "user", "content": question},