kjozsa commited on
Commit
a7fd8f6
·
1 Parent(s): 360106d

try to get model working

Browse files
Files changed (1) hide show
  1. transformerschat.py +8 -7
transformerschat.py CHANGED
@@ -5,13 +5,13 @@ import spaces
5
 
6
 
7
  def models():
8
- return ["teknium/OpenHermes-2.5-Mistral-7B"]
9
 
10
 
11
  def load():
12
- torch.set_default_device("cuda")
13
- model = AutoModelForCausalLM.from_pretrained(models()[0], torch_dtype="auto", trust_remote_code=True)
14
- tokenizer = AutoTokenizer.from_pretrained(models()[0], trust_remote_code=True).to("cuda")
15
  return (model, tokenizer)
16
 
17
 
@@ -30,8 +30,9 @@ def ask(_, system_prompt, pre_prompt, question):
30
  },
31
  ]
32
  logger.debug(f"<< openhermes << {question}")
33
- inputs = tokenizer(question, return_tensors="pt", return_attention_mask=False)
34
- outputs = model.generate(**inputs, max_length=200)
35
- answer = tokenizer.batch_decode(outputs)[0]
 
36
  logger.debug(f">> openhermes >> {answer}")
37
  return answer
 
5
 
6
 
7
  def models():
8
+ return ["openhermes-2.5-mistral-7b.Q4_K_M.gguf"]
9
 
10
 
11
  def load():
12
+ # torch.set_default_device("cuda")
13
+ model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", model_file="openhermes-2.5-mistral-7b.Q4_K_M.gguf", model_type="mistral", gpu_layers=50)
14
+ # tokenizer = AutoTokenizer.from_pretrained(models()[0], trust_remote_code=True).to("cuda")
15
  return (model, tokenizer)
16
 
17
 
 
30
  },
31
  ]
32
  logger.debug(f"<< openhermes << {question}")
33
+ # inputs = tokenizer(question, return_tensors="pt", return_attention_mask=False)
34
+ # outputs = model.generate(**inputs, max_length=200)
35
+ # answer = tokenizer.batch_decode(outputs)[0]
36
+ answer = model(question)
37
  logger.debug(f">> openhermes >> {answer}")
38
  return answer