njwright92 commited on
Commit
1cc0294
·
verified ·
1 Parent(s): 43bdc67

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +10 -8
handler.py CHANGED
@@ -1,6 +1,6 @@
1
  from ctransformers import AutoModelForCausalLM, AutoTokenizer
2
  from transformers import pipeline
3
-
4
 
5
  class EndpointHandler:
6
  def __init__(self, model_dir):
@@ -13,14 +13,13 @@ class EndpointHandler:
13
  self.model = AutoModelForCausalLM.from_pretrained(
14
  f"{self.model_dir}/comic_mistral-v5.2.q5_0.gguf",
15
  model_type="mistral",
16
- lib="avx2", # or "avx" or "basic" depending on your CPU
17
- gpu_layers=0, # Set to a number > 0 if you want to use GPU
18
  hf=True
19
  )
20
  self.tokenizer = AutoTokenizer.from_pretrained(self.model)
21
 
22
  def preprocess(self, data):
23
- # You can add any preprocessing steps here if needed
24
  return data
25
 
26
  def __call__(self, data):
@@ -30,20 +29,23 @@ class EndpointHandler:
30
  inputs = self.preprocess(data)
31
  prompt = inputs["inputs"]
32
 
33
- # Generate text using the model directly for streaming
 
34
  for text in self.model(prompt,
35
  max_new_tokens=256,
36
  temperature=0.8,
37
  repetition_penalty=1.1,
38
  do_sample=True,
39
  stream=True):
40
- yield self.postprocess({"generated_text": text})
 
 
 
 
41
 
42
  def postprocess(self, data):
43
- # You can add any postprocessing steps here if needed
44
  return data
45
 
46
-
47
  def get_handler(model_dir):
48
  handler = EndpointHandler(model_dir)
49
  handler.load_model()
 
1
  from ctransformers import AutoModelForCausalLM, AutoTokenizer
2
  from transformers import pipeline
3
+ import json
4
 
5
  class EndpointHandler:
6
  def __init__(self, model_dir):
 
13
  self.model = AutoModelForCausalLM.from_pretrained(
14
  f"{self.model_dir}/comic_mistral-v5.2.q5_0.gguf",
15
  model_type="mistral",
16
+ lib="avx2",
17
+ gpu_layers=0,
18
  hf=True
19
  )
20
  self.tokenizer = AutoTokenizer.from_pretrained(self.model)
21
 
22
  def preprocess(self, data):
 
23
  return data
24
 
25
  def __call__(self, data):
 
29
  inputs = self.preprocess(data)
30
  prompt = inputs["inputs"]
31
 
32
+ # Generate text using the model
33
+ generated_text = ""
34
  for text in self.model(prompt,
35
  max_new_tokens=256,
36
  temperature=0.8,
37
  repetition_penalty=1.1,
38
  do_sample=True,
39
  stream=True):
40
+ generated_text += text
41
+
42
+ # Return a JSON-serializable response
43
+ response = {"generated_text": generated_text}
44
+ return json.dumps(response)
45
 
46
  def postprocess(self, data):
 
47
  return data
48
 
 
49
  def get_handler(model_dir):
50
  handler = EndpointHandler(model_dir)
51
  handler.load_model()