Spaces:
Runtime error
Runtime error
File size: 4,250 Bytes
caaa800 9045a87 308dd86 caaa800 57817e9 caaa800 57817e9 caaa800 57817e9 caaa800 0b07f09 caaa800 0eb163e caaa800 0eb163e caaa800 0eb163e caaa800 0eb163e caaa800 308dd86 caaa800 308dd86 caaa800 308dd86 caaa800 308dd86 caaa800 308dd86 9045a87 caaa800 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
#############################################################################
# Title: Gradio Interface to AI hosted on Huggingface-Space
# Author: Andreas Fischer
# Date: October 7th, 2023
# Last update: December 8th, 2023
#############################################################################
import gradio as gr
import requests
import random
import json
def specifications(message, model,prompt_type,verbose=False):
url="http://0.0.0.0:2600/v1/completions"
body=""
if(model=="Local"):
url="http://0.0.0.0:2600/v1/completions"
if(prompt_type=="Default"): prompt_type="ChatML (German)"
if(model=="SauerkrautLM-7B"):
url="https://SauerkrautLM-GGUF-API.hf.space/v1/completions"
if(prompt_type=="Default"): prompt_type="Vicuna (German)"
if(model=="WizardLM-13B"):
url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
if(prompt_type=="Default"): prompt_type="Vicuna"
if(model=="OpenHermes2-7B"):
url="https://AFischer1985-OpenHermes-2-GGUF-API.hf.space/v1/completions"
if(prompt_type=="Default"): prompt_type="ChatML"
if(model=="CollectiveCognition-7B"):
url="https://AFischer1985-CollectiveCognition-GGUF-API.hf.space/v1/completions"
if(prompt_type=="Default"): prompt_type="ChatML"
if(prompt_type=="ChatML"):
body={"prompt":"<|im_start|>system\nYou are a helpful AI-Assistant.<|im_end|>\n<|im_start|>user\n"+message+"<|im_end|>\n<|im_start|>assistant\n","max_tokens":1000,"stop":"<|im_end|>","echo":"False","stream":True}
if(prompt_type=="ChatML (German)"):
body={"prompt":"<|im_start|>system\nu bist ein großes Sprachmodell, das höflich und kompetent antwortet. Schreibe deine Gedanken Schritt für Schritt auf, um Probleme sinnvoll zu lösen.<|im_end|>\n<|im_start|>user\n"+message+"<|im_end|>\n<|im_start|>assistant\n","max_tokens":1000,"stop":"User:","echo":"False","stream":True}
if(prompt_type=="Alpaca"):
body={"prompt":"###Instruction:\n"+message+"\n\n###Response:\n","max_tokens":1000,"stop":"###","echo":"False","stream":True}
if(prompt_type=="Vicuna"):
body={"prompt":"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: "+message+" ASSISTANT:","max_tokens":1000,"stop":"USER:","echo":"False","stream":"True"}
if(prompt_type=="Vicuna (German)"):
body={"prompt":"Ein Chat zwischen einem Benutzer und einem KI-Assistenten. Der KI-Assistent gibt hilfreiche, detaillierte und höfliche Antworten.\nUser: "+message+"\nAssistant: ","max_tokens":1000,"stop":"User:","echo":"False","stream":True}
if(verbose==True):
print("model: "+model+"\n"+"URL: "+url+"\n"+"prompt_type: "+prompt_type+"\n"+"message: "+message+"\n"+"body: "+str(body)+"\n")
return([url,body,model,prompt_type])
def response(message, history, model, prompt_type):
print(model)
[url,body,model,prompt_type]=specifications(message,model,prompt_type,verbose=True)
response=""
buffer=""
print("URL: "+url)
print("User: "+message+"\nAI: ")
for text in requests.post(url, json=body, stream=True):
text=text.decode('utf-8')
if(text.startswith(": ping -")==False):buffer=str(buffer)+str(text)
buffer=buffer.split('"finish_reason": null}]}')
if(len(buffer)==1):
buffer="".join(buffer)
pass
if(len(buffer)==2):
part=buffer[0]+'"finish_reason": null}]}'
if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
try:
part = str(json.loads(part)["choices"][0]["text"])
print(part, end="", flush=True)
response=response+part
buffer=""
except:
pass
yield response
gr.ChatInterface(response,additional_inputs=[gr.Dropdown(["Local","CollectiveCognition-7B", "OpenHermes2-7B","WizardLM-13B"],value="WizardLM-13B",label="Model"),gr.Dropdown(["Default", "ChatML","ChatML (German)","Vicuna","Vicuna (German)","Alpaca"],value="Default",label="Prompt Type")]).queue().launch(share=False, server_name="0.0.0.0", server_port=7864)
#import os
#os.system('python3 -m llama_cpp.server --model "/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf" --host 0.0.0.0 --port 2600')
|