vilarin commited on
Commit
42681ce
·
verified ·
1 Parent(s): 6b07afa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -22
app.py CHANGED
@@ -98,7 +98,7 @@ def launch():
98
  OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
99
  OLLAMA_SERVICE_THREAD.start()
100
  print("Giving ollama serve a moment")
101
- time.sleep(15)
102
 
103
  @spaces.GPU()
104
  async def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
@@ -111,31 +111,31 @@ async def stream_chat(message: str, history: list, model: str, temperature: floa
111
  {"role": "user", "content": prompt},
112
  {"role": "assistant", "content": answer},
113
  ])
114
- conversation.append({"role": "user", "content": message})
115
 
116
- print(f"Conversation is -\n{conversation}")
117
 
118
- response = client.chat(
119
- model=model,
120
- stream=True,
121
- messages=conversation,
122
- keep_alive="60s",
123
- options={
124
- 'num_predict': max_new_tokens,
125
- 'temperature': temperature,
126
- 'top_p': top_p,
127
- 'top_k': top_k,
128
- 'repeat_penalty': penalty,
129
- 'low_vram': True,
130
- },
131
- )
132
 
133
- print(response)
134
 
135
- buffer = ""
136
- for chunk in response:
137
- buffer += chunk["message"]["content"]
138
- yield buffer
139
 
140
 
141
  async def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
 
98
  OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
99
  OLLAMA_SERVICE_THREAD.start()
100
  print("Giving ollama serve a moment")
101
+ time.sleep(10)
102
 
103
  @spaces.GPU()
104
  async def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
 
111
  {"role": "user", "content": prompt},
112
  {"role": "assistant", "content": answer},
113
  ])
114
+ conversation.append({"role": "user", "content": message})
115
 
116
+ print(f"Conversation is -\n{conversation}")
117
 
118
+ response = client.chat(
119
+ model=model,
120
+ stream=True,
121
+ messages=conversation,
122
+ keep_alive="60s",
123
+ options={
124
+ 'num_predict': max_new_tokens,
125
+ 'temperature': temperature,
126
+ 'top_p': top_p,
127
+ 'top_k': top_k,
128
+ 'repeat_penalty': penalty,
129
+ 'low_vram': True,
130
+ },
131
+ )
132
 
133
+ print(response)
134
 
135
+ buffer = ""
136
+ for chunk in response:
137
+ buffer += chunk["message"]["content"]
138
+ yield buffer
139
 
140
 
141
  async def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):