Sergidev commited on
Commit
e6c544c
1 Parent(s): eb2d5c7

zeroGPU v1

Browse files
Files changed (1) hide show
  1. modules/pmbl.py +8 -2
modules/pmbl.py CHANGED
@@ -1,6 +1,8 @@
1
  import sqlite3
2
  from datetime import datetime
3
  from concurrent.futures import ThreadPoolExecutor
 
 
4
 
5
  class PMBL:
6
  def __init__(self, model_path):
@@ -100,9 +102,11 @@ class PMBL:
100
 
101
  for chunk in response.result():
102
  yield chunk
103
-
 
104
  def generate_response_task(self, system_prompt, prompt, n_ctx):
105
  llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, mlock=True)
 
106
 
107
  response = llm(
108
  system_prompt,
@@ -146,9 +150,11 @@ class PMBL:
146
  conn.commit()
147
 
148
  conn.close()
149
-
 
150
  def generate_topic(self, prompt, response):
151
  llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8, n_gpu_layers=-1, mlock=True)
 
152
 
153
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
154
 
 
1
  import sqlite3
2
  from datetime import datetime
3
  from concurrent.futures import ThreadPoolExecutor
4
+ import torch
5
+ from huggingface_hub import spaces
6
 
7
  class PMBL:
8
  def __init__(self, model_path):
 
102
 
103
  for chunk in response.result():
104
  yield chunk
105
+
106
+ @spaces.gpu
107
  def generate_response_task(self, system_prompt, prompt, n_ctx):
108
  llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, mlock=True)
109
+ llm = llm.to("cuda") # Move the model to the GPU
110
 
111
  response = llm(
112
  system_prompt,
 
150
  conn.commit()
151
 
152
  conn.close()
153
+
154
+ @spaces.gpu
155
  def generate_topic(self, prompt, response):
156
  llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8, n_gpu_layers=-1, mlock=True)
157
+ llm = llm.to("cuda") # Move the model to the GPU
158
 
159
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
160