MyNameIsSimon commited on
Commit
c9760a6
·
1 Parent(s): 4b3754b

code cleanup

Browse files
Files changed (1) hide show
  1. app.py +37 -33
app.py CHANGED
@@ -7,7 +7,7 @@ from llama_cpp.llama_chat_format import MoondreamChatHandler
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
9
 
10
- # client = InferenceClient()
11
  class MyModel:
12
  def __init__(self):
13
  self.client = None
@@ -21,18 +21,18 @@ class MyModel:
21
  system_message,
22
  max_tokens,
23
  temperature,
24
- min_p,
25
  ):
26
  if model != self.current_model or self.current_model is None:
 
27
  client = Llama.from_pretrained(
28
- repo_id="lab2-as/lora_model_gguf",
29
- filename='*Q4_K_M.gguf',
30
- n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
31
  )
32
 
33
  self.client = client
34
  self.current_model = model
35
-
36
 
37
  messages = [{"role": "system", "content": system_message}]
38
 
@@ -45,54 +45,58 @@ class MyModel:
45
  messages.append({"role": "user", "content": message})
46
 
47
  response = ""
48
-
49
  for message in self.client.create_chat_completion(
50
- messages,
51
- temperature=temperature,
52
- top_p=min_p,
53
- stream=True,
54
- max_tokens=max_tokens
55
- ):
56
  delta = message["choices"][0]["delta"]
57
  if "content" in delta:
58
  response += delta["content"]
59
  yield response
60
 
61
- # for message in client.chat_completion(
62
- # messages,
63
- # max_tokens=max_tokens,
64
- # stream=True,
65
- # temperature=temperature,
66
- # top_p=top_p,
67
- # model=model,
68
- # ):
69
- # token = message.choices[0].delta.content
70
-
71
- # response += token
72
- # yield response
73
-
74
 
75
  """
76
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
77
  """
78
  my_model = MyModel()
79
  model_choices = [
80
- "lab2-as/lora_model",
81
- "lab2-as/lora_model_no_quant",
82
  ]
83
  demo = gr.ChatInterface(
84
  my_model.respond,
85
  additional_inputs=[
86
- gr.Dropdown(choices=model_choices, value=model_choices[0], label="Select Model"),
87
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
88
- gr.Slider(minimum=1, maximum=2048, value=128, step=1, label="Max new tokens"),
89
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  gr.Slider(
91
  minimum=0.1,
92
  maximum=1.0,
93
  value=0.95,
94
  step=0.05,
95
- label="Min-p (nucleus sampling)",
96
  ),
97
  ],
98
  )
 
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
9
 
10
+
11
  class MyModel:
12
  def __init__(self):
13
  self.client = None
 
21
  system_message,
22
  max_tokens,
23
  temperature,
24
+ top_p,
25
  ):
26
  if model != self.current_model or self.current_model is None:
27
+ model_id, filename = model.split(",")
28
  client = Llama.from_pretrained(
29
+ repo_id=model_id.strip(),
30
+ filename=f"*{filename.strip()}*.gguf",
31
+ n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
32
  )
33
 
34
  self.client = client
35
  self.current_model = model
 
36
 
37
  messages = [{"role": "system", "content": system_message}]
38
 
 
45
  messages.append({"role": "user", "content": message})
46
 
47
  response = ""
 
48
  for message in self.client.create_chat_completion(
49
+ messages,
50
+ temperature=temperature,
51
+ top_p=top_p,
52
+ stream=True,
53
+ max_tokens=max_tokens,
54
+ ):
55
  delta = message["choices"][0]["delta"]
56
  if "content" in delta:
57
  response += delta["content"]
58
  yield response
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  """
62
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
63
  """
64
  my_model = MyModel()
65
  model_choices = [
66
+ "lab2-as/lora_model_gguf, Q4",
 
67
  ]
68
  demo = gr.ChatInterface(
69
  my_model.respond,
70
  additional_inputs=[
71
+ gr.Dropdown(
72
+ choices=model_choices,
73
+ value=model_choices[0],
74
+ label="Select Model",
75
+ ),
76
+ gr.Textbox(
77
+ value="You are a friendly Chatbot.",
78
+ label="System message",
79
+ ),
80
+ gr.Slider(
81
+ minimum=1,
82
+ maximum=2048,
83
+ value=128,
84
+ step=1,
85
+ label="Max new tokens",
86
+ ),
87
+ gr.Slider(
88
+ minimum=0.1,
89
+ maximum=4.0,
90
+ value=0.7,
91
+ step=0.1,
92
+ label="Temperature",
93
+ ),
94
  gr.Slider(
95
  minimum=0.1,
96
  maximum=1.0,
97
  value=0.95,
98
  step=0.05,
99
+ label="Top-p (Nucleus sampling)",
100
  ),
101
  ],
102
  )