File size: 4,250 Bytes
caaa800
 
 
 
 
 
 
9045a87
308dd86
 
 
caaa800
 
 
 
 
 
 
 
57817e9
caaa800
 
 
 
 
57817e9
caaa800
 
57817e9
caaa800
 
 
0b07f09
caaa800
0eb163e
caaa800
0eb163e
caaa800
0eb163e
caaa800
0eb163e
caaa800
 
 
 
 
 
 
 
308dd86
 
caaa800
308dd86
 
 
 
caaa800
308dd86
 
 
 
 
caaa800
308dd86
 
 
 
caaa800
308dd86
 
 
9045a87
caaa800
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#############################################################################
# Title:  Gradio Interface to AI hosted on Huggingface-Space
# Author: Andreas Fischer
# Date:   October 7th, 2023
# Last update: December 8th, 2023
#############################################################################

import gradio as gr
import requests
import random
import json

def specifications(message, model,prompt_type,verbose=False):
  url="http://0.0.0.0:2600/v1/completions"  
  body=""
  
  if(model=="Local"): 
    url="http://0.0.0.0:2600/v1/completions"  
    if(prompt_type=="Default"): prompt_type="ChatML (German)"
  if(model=="SauerkrautLM-7B"): 
    url="https://SauerkrautLM-GGUF-API.hf.space/v1/completions"  
    if(prompt_type=="Default"): prompt_type="Vicuna (German)"
  if(model=="WizardLM-13B"): 
    url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"       
    if(prompt_type=="Default"): prompt_type="Vicuna"
  if(model=="OpenHermes2-7B"): 
    url="https://AFischer1985-OpenHermes-2-GGUF-API.hf.space/v1/completions"
    if(prompt_type=="Default"): prompt_type="ChatML"
  if(model=="CollectiveCognition-7B"): 
    url="https://AFischer1985-CollectiveCognition-GGUF-API.hf.space/v1/completions"  
    if(prompt_type=="Default"): prompt_type="ChatML" 
  
  if(prompt_type=="ChatML"):
    body={"prompt":"<|im_start|>system\nYou are a helpful AI-Assistant.<|im_end|>\n<|im_start|>user\n"+message+"<|im_end|>\n<|im_start|>assistant\n","max_tokens":1000,"stop":"<|im_end|>","echo":"False","stream":True}
  if(prompt_type=="ChatML (German)"):
    body={"prompt":"<|im_start|>system\nu bist ein großes Sprachmodell, das höflich und kompetent antwortet. Schreibe deine Gedanken Schritt für Schritt auf, um Probleme sinnvoll zu lösen.<|im_end|>\n<|im_start|>user\n"+message+"<|im_end|>\n<|im_start|>assistant\n","max_tokens":1000,"stop":"User:","echo":"False","stream":True}
  if(prompt_type=="Alpaca"):
    body={"prompt":"###Instruction:\n"+message+"\n\n###Response:\n","max_tokens":1000,"stop":"###","echo":"False","stream":True}
  if(prompt_type=="Vicuna"):
    body={"prompt":"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: "+message+" ASSISTANT:","max_tokens":1000,"stop":"USER:","echo":"False","stream":"True"}
  if(prompt_type=="Vicuna (German)"):
    body={"prompt":"Ein Chat zwischen einem Benutzer und einem KI-Assistenten. Der KI-Assistent gibt hilfreiche, detaillierte und höfliche Antworten.\nUser: "+message+"\nAssistant: ","max_tokens":1000,"stop":"User:","echo":"False","stream":True}
  if(verbose==True):
    print("model: "+model+"\n"+"URL: "+url+"\n"+"prompt_type: "+prompt_type+"\n"+"message: "+message+"\n"+"body: "+str(body)+"\n")
  return([url,body,model,prompt_type])

def response(message, history, model, prompt_type):
  print(model)
  [url,body,model,prompt_type]=specifications(message,model,prompt_type,verbose=True)
  response=""
  buffer=""
  print("URL: "+url)
  print("User: "+message+"\nAI: ")
  for text in requests.post(url, json=body, stream=True):  
    text=text.decode('utf-8')
    if(text.startswith(": ping -")==False):buffer=str(buffer)+str(text)
    buffer=buffer.split('"finish_reason": null}]}')
    if(len(buffer)==1):
      buffer="".join(buffer)
      pass
    if(len(buffer)==2):
      part=buffer[0]+'"finish_reason": null}]}'  
      if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
      try: 
        part = str(json.loads(part)["choices"][0]["text"])
        print(part, end="", flush=True)
        response=response+part
        buffer="" 
      except:
        pass
    yield response 


gr.ChatInterface(response,additional_inputs=[gr.Dropdown(["Local","CollectiveCognition-7B", "OpenHermes2-7B","WizardLM-13B"],value="WizardLM-13B",label="Model"),gr.Dropdown(["Default", "ChatML","ChatML (German)","Vicuna","Vicuna (German)","Alpaca"],value="Default",label="Prompt Type")]).queue().launch(share=False, server_name="0.0.0.0", server_port=7864) 

#import os
#os.system('python3 -m llama_cpp.server --model "/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf" --host 0.0.0.0 --port 2600')