vilarin commited on
Commit
ab33f5f
·
verified ·
1 Parent(s): 0fc53a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -4,6 +4,7 @@ import threading
4
  import time
5
  import subprocess
6
  import spaces
 
7
 
8
  OLLAMA = os.path.expanduser("~/ollama")
9
  process = None
@@ -100,7 +101,7 @@ def launch():
100
  print("Giving ollama serve a moment")
101
  time.sleep(10)
102
 
103
- def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
104
  print(f"message: {message}")
105
  conversation = []
106
  for prompt, answer in history:
@@ -114,6 +115,7 @@ def stream_chat(message: str, history: list, model: str, temperature: float, max
114
 
115
  response = client.chat(
116
  model=model,
 
117
  messages=conversation,
118
  keep_alive="60s",
119
  options={
@@ -127,8 +129,11 @@ def stream_chat(message: str, history: list, model: str, temperature: float, max
127
  )
128
 
129
  print(response)
130
- return response['message']['content']
131
-
 
 
 
132
 
133
 
134
  def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
@@ -141,8 +146,8 @@ def main(message: str, history: list, model: str, temperature: float, max_new_to
141
  else:
142
  if not process:
143
  launch()
144
-
145
- response = stream_chat(
146
  message,
147
  history,
148
  model,
@@ -152,8 +157,7 @@ def main(message: str, history: list, model: str, temperature: float, max_new_to
152
  top_k,
153
  penalty
154
  )
155
-
156
- terminate()
157
  yield response
158
 
159
 
 
4
  import time
5
  import subprocess
6
  import spaces
7
+ import asynico
8
 
9
  OLLAMA = os.path.expanduser("~/ollama")
10
  process = None
 
101
  print("Giving ollama serve a moment")
102
  time.sleep(10)
103
 
104
+ async def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
105
  print(f"message: {message}")
106
  conversation = []
107
  for prompt, answer in history:
 
115
 
116
  response = client.chat(
117
  model=model,
118
+ stream=True,
119
  messages=conversation,
120
  keep_alive="60s",
121
  options={
 
129
  )
130
 
131
  print(response)
132
+
133
+ buffer = ""
134
+ for chunk in response:
135
+ buffer += chunk["message"]["content"]
136
+ yield buffer
137
 
138
 
139
  def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
 
146
  else:
147
  if not process:
148
  launch()
149
+
150
+ response = await stream_chat(
151
  message,
152
  history,
153
  model,
 
157
  top_k,
158
  penalty
159
  )
160
+
 
161
  yield response
162
 
163