Spaces:

SauravMaheshkar
/

papersai

Sleeping

SauravMaheshkar commited on Dec 2, 2024

Commit

87d2fe8

unverified ·

1 Parent(s): 5e9da0f

feat: decode model output

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,6 +17,9 @@ pipe = pipeline(
     torch_dtype=torch.bfloat16,
     device_map="auto",
 )
 class ChatState:
@@ -68,6 +71,7 @@ def invoke(history: HistoryType):
     response = pipe(input_text, do_sample=True, top_p=0.95, max_new_tokens=1024)[0][
         "generated_text"
     ]
     return response

     torch_dtype=torch.bfloat16,
     device_map="auto",
 )
+pipe.model.generate = torch.compile(
+    pipe.model.generate, mode="reduce-overhead", fullgraph=True
+)
 class ChatState:
     response = pipe(input_text, do_sample=True, top_p=0.95, max_new_tokens=1024)[0][
         "generated_text"
     ]
+    response = response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
     return response