AFischer1985 commited on
Commit
9b74782
1 Parent(s): d3f988e

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +72 -19
run.py CHANGED
@@ -186,17 +186,6 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
186
  def response(
187
  message, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
188
  ):
189
- temperature = float(temperature)
190
- if temperature < 1e-2: temperature = 1e-2
191
- top_p = float(top_p)
192
- generate_kwargs = dict(
193
- temperature=temperature,
194
- max_new_tokens=max_new_tokens,
195
- top_p=top_p,
196
- repetition_penalty=repetition_penalty,
197
- do_sample=True,
198
- seed=42,
199
- )
200
  addon=""
201
  first_message = history[0][0] if history else message #[-1][0]
202
  results=collection.query(
@@ -222,17 +211,81 @@ def response(
222
  system, # system prompt
223
  addon, # RAG-component added to the system prompt
224
  None, # fictive first words of the AI (neither displayed nor stored)
225
- historylimit=4, # number of past messages to consider for response to current message
226
  removeHTML=True # remove HTML-components from History (to prevent bugs with Markdown)
227
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
229
- output = ""
230
- for response in stream:
231
- output += response.token.text
232
- yield output
233
- if(len(history)==0):
234
- output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
235
- yield output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True),title="German AI-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
238
  print("Interface up and running!")
 
186
  def response(
187
  message, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
188
  ):
 
 
 
 
 
 
 
 
 
 
 
189
  addon=""
190
  first_message = history[0][0] if history else message #[-1][0]
191
  results=collection.query(
 
211
  system, # system prompt
212
  addon, # RAG-component added to the system prompt
213
  None, # fictive first words of the AI (neither displayed nor stored)
214
+ historylimit=0, # number of past messages to consider for response to current message
215
  removeHTML=True # remove HTML-components from History (to prevent bugs with Markdown)
216
  )
217
+
218
+ ## Request response from model
219
+ #------------------------------
220
+
221
+ print("AI running on prem!" if(onPrem) else "AI running HFHub!")
222
+ print(prompt)
223
+ if(onPrem==False):
224
+ temperature=float(0.9)
225
+ max_new_tokens=1000
226
+ top_p=0.95
227
+ repetition_penalty=1.0
228
+ if temperature < 1e-2: temperature = 1e-2
229
+ top_p = float(top_p)
230
+ generate_kwargs = dict(
231
+ temperature=temperature,
232
+ max_new_tokens=max_new_tokens,
233
+ top_p=top_p,
234
+ repetition_penalty=repetition_penalty,
235
+ do_sample=True,
236
+ seed=42,
237
+ )
238
  stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
239
+ response = ""
240
+ #print("User: "+message+"\nAI: ")
241
+ for text in stream:
242
+ part=text.token.text
243
+ #print(part, end="", flush=True)
244
+ response += part
245
+ if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
246
+ yield response
247
+ if(True): #len(history)==0):
248
+ response=response+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
249
+ yield response
250
+
251
+ if(onPrem==True):
252
+ # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
253
+ url="http://0.0.0.0:2600/v1/completions"
254
+ body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
255
+ if("Discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
256
+ if("Gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
257
+ response="" #+"("+myType+")\n"
258
+ buffer=""
259
+ #print("URL: "+url)
260
+ #print("User: "+message+"\nAI: ")
261
+ for text in requests.post(url, json=body, stream=True): #-H 'accept: application/json' -H 'Content-Type: application/json'
262
+ if buffer is None: buffer=""
263
+ buffer=str("".join(buffer))
264
+ # print("*** Raw String: "+str(text)+"\n***\n")
265
+ text=text.decode('utf-8')
266
+ if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text)
267
+ # print("\n*** Buffer: "+str(buffer)+"\n***\n")
268
+ buffer=buffer.split('"finish_reason": null}]}')
269
+ if(len(buffer)==1):
270
+ buffer="".join(buffer)
271
+ pass
272
+ if(len(buffer)==2):
273
+ part=buffer[0]+'"finish_reason": null}]}'
274
+ if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
275
+ try:
276
+ part = str(json.loads(part)["choices"][0]["text"])
277
+ #print(part, end="", flush=True)
278
+ response=response+part
279
+ buffer="" # reset buffer
280
+ except Exception as e:
281
+ print("Exception:"+str(e))
282
+ pass
283
+ if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
284
+ yield response
285
+ if(True): #len(history)==0):
286
+ response=response+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
287
+ yield response
288
+ #history.append((message, response)) # add current dialog to history
289
 
290
  gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True),title="German AI-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
291
  print("Interface up and running!")