Spaces:

Trickshotblaster
/

mike-chat

Sleeping

Trickshotblaster commited on Jul 23, 2024

Commit

8de17f5

1 Parent(s): a4e819a

inference mode go brr

Files changed (2) hide show

__pycache__/gpt.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/gpt.cpython-310.pyc and b/__pycache__/gpt.cpython-310.pyc differ

gpt.py CHANGED Viewed

@@ -137,26 +137,27 @@ my_GPT.eval()
 eot = enc._special_tokens['<|endoftext|>']
 def get_response(in_text, top_k=50, temperature=1):
-  prompt = "USER: " + in_text + "\nASSISTANT: "
-  input_tokens = enc.encode(prompt)
-  output_tokens = enc.encode(prompt)
-  for x in range(block_size):
-    if len(input_tokens) > block_size:
-      input_tokens = input_tokens[1:]
-    context_tensor = torch.tensor(input_tokens).view(1, -1).to(device)
-    logits, loss = my_GPT(context_tensor)
-    logits = logits[:, -1, :] / temperature
-    if top_k > 0:
-          # Remove all tokens with a probability less than the last token of the top-k
-          indices_to_remove = logits < torch.topk(logits, top_k, dim=1)[0][..., -1, None]
-          logits[indices_to_remove] = float("-inf")
-    probs = F.softmax(logits, dim=-1)
-    result = torch.multinomial(probs, num_samples=1).item()
-    if result == eot:
-      break
-    input_tokens.append(result)
-    output_tokens.append(result)
-    yield enc.decode(output_tokens)
-  yield enc.decode(output_tokens)

 eot = enc._special_tokens['<|endoftext|>']
 def get_response(in_text, top_k=50, temperature=1):
+  with torch.inference_mode():
+    prompt = "USER: " + in_text + "\nASSISTANT: "
+    input_tokens = enc.encode(prompt)
+    output_tokens = enc.encode(prompt)
+    for x in range(block_size):
+      if len(input_tokens) > block_size:
+        input_tokens = input_tokens[1:]
+      context_tensor = torch.tensor(input_tokens).view(1, -1).to(device)
+      logits, loss = my_GPT(context_tensor)
+      logits = logits[:, -1, :] / temperature
+      if top_k > 0:
+            # Remove all tokens with a probability less than the last token of the top-k
+            indices_to_remove = logits < torch.topk(logits, top_k, dim=1)[0][..., -1, None]
+            logits[indices_to_remove] = float("-inf")
+      probs = F.softmax(logits, dim=-1)
+      result = torch.multinomial(probs, num_samples=1).item()
+      if result == eot:
+        break
+      input_tokens.append(result)
+      output_tokens.append(result)
+      yield enc.decode(output_tokens)
+    yield enc.decode(output_tokens)