gokaygokay commited on
Commit
dfca54f
1 Parent(s): 78c280f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -26
app.py CHANGED
@@ -77,14 +77,21 @@ hf_hub_download(
77
  local_dir="./models"
78
  )
79
 
80
- llm = None
81
- llm_model = None
 
 
 
 
 
 
 
 
82
 
83
  @spaces.GPU(duration=120)
84
  def respond(
85
  message,
86
  history: list[tuple[str, str]],
87
- model,
88
  system_message,
89
  max_tokens,
90
  temperature,
@@ -92,25 +99,8 @@ def respond(
92
  top_k,
93
  repeat_penalty,
94
  ):
95
-
96
-
97
  chat_template = MessagesFormatterType.MISTRAL
98
 
99
- global llm
100
- global llm_model
101
-
102
- if llm is None or llm_model != model:
103
- llm = Llama(
104
- model_path=f"models/{model}",
105
- flash_attn=True,
106
- n_gpu_layers=81,
107
- n_batch=1024,
108
- n_ctx=32768,
109
- )
110
- llm_model = model
111
-
112
- provider = LlamaCppPythonProvider(llm)
113
-
114
  agent = LlamaCppAgent(
115
  provider,
116
  system_prompt=f"{system_message}",
@@ -163,12 +153,6 @@ description = """<p><center>
163
  demo = gr.ChatInterface(
164
  respond,
165
  additional_inputs=[
166
- gr.Dropdown([
167
- 'Mistral-Nemo-Instruct-2407.Q5_K_M.gguf'
168
- ],
169
- value="Mistral-Nemo-Instruct-2407.Q5_K_M.gguf",
170
- label="Model"
171
- ),
172
  gr.Textbox(value="You are a helpful assistant.", label="System message"),
173
  gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
174
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
 
77
  local_dir="./models"
78
  )
79
 
80
+ # Initialize LLM outside the respond function
81
+ llm = Llama(
82
+ model_path="models/Mistral-Nemo-Instruct-2407.Q5_K_M.gguf",
83
+ flash_attn=True,
84
+ n_gpu_layers=81,
85
+ n_batch=1024,
86
+ n_ctx=32768,
87
+ )
88
+
89
+ provider = LlamaCppPythonProvider(llm)
90
 
91
  @spaces.GPU(duration=120)
92
  def respond(
93
  message,
94
  history: list[tuple[str, str]],
 
95
  system_message,
96
  max_tokens,
97
  temperature,
 
99
  top_k,
100
  repeat_penalty,
101
  ):
 
 
102
  chat_template = MessagesFormatterType.MISTRAL
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  agent = LlamaCppAgent(
105
  provider,
106
  system_prompt=f"{system_message}",
 
153
  demo = gr.ChatInterface(
154
  respond,
155
  additional_inputs=[
 
 
 
 
 
 
156
  gr.Textbox(value="You are a helpful assistant.", label="System message"),
157
  gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
158
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),