Mahadih534 commited on
Commit
cf5bb80
·
verified ·
1 Parent(s): 94549b0

added required filles

Browse files
Files changed (2) hide show
  1. app.py +71 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1")
5
+
6
+ def format_prompt(message, history):
7
+ prompt = "<s>"
8
+ for user_prompt, bot_response in history:
9
+ prompt += f"[INST] {user_prompt} [/INST]"
10
+ prompt += f" {bot_response}</s> "
11
+ prompt += f"[INST] {message} [/INST]"
12
+ return prompt
13
+
14
+ def kwargs_get(Temperature, tokens, top_k, top_p, r_p):
15
+ generate_kwargs = dict(
16
+ temperature=Temperature,
17
+ max_new_tokens=tokens,
18
+ top_p=top_p,
19
+ repetition_penalty=r_p,
20
+ do_sample=True,
21
+ top_k=top_k,
22
+ seed=42,
23
+ )
24
+ return generate_kwargs
25
+
26
+
27
+ def inference(message, history, Temperature, tokens, top_k, top_p, r_p, model):
28
+
29
+ prompt = format_prompt(message, history)
30
+ client = InferenceClient(model=model)
31
+ kwargs = kwargs_get(Temperature, tokens, top_k, top_p, r_p)
32
+ partial_message = ""
33
+ for response in client.text_generation(prompt,**kwargs, stream=True, details=True, return_full_text=False):
34
+ partial_message += response.token.text
35
+ yield partial_message
36
+
37
+ with gr.Blocks() as UI:
38
+ with gr.Column():
39
+ gr.Markdown("Model Selection & Configuration")
40
+
41
+ models=gr.Dropdown(value="mistralai/Mixtral-8x7B-Instruct-v0.1",
42
+ choices =["mistralai/Mixtral-8x7B-Instruct-v0.1","codellama/CodeLlama-7b-hf",
43
+ "bigcode/starcoder","bigcode/santacoder","codellama/CodeLlama-70b-Instruct-hf",
44
+ "google/flan-t5-xxl","facebook/opt-66b","tiiuae/falcon-40b", "bigscience/bloom",
45
+ "EleutherAI/gpt-neox-20b"], label="Available models",
46
+ info="default model is Mixtral-8x7B-Instruct-v0.1",interactive=True,)
47
+
48
+
49
+ with gr.Column():
50
+ gr.ChatInterface(
51
+ inference,
52
+ description="This is the demo for Gradio UI consuming TGI endpoint with LLaMA 7B-Chat model.",
53
+ title="Gradio 🤝 TGI",
54
+ additional_inputs_accordion="Additional Configuration to get better response",
55
+ retry_btn=None,
56
+ undo_btn=None,
57
+ clear_btn="Clear",
58
+ theme="soft",
59
+ submit_btn="Send",
60
+ additional_inputs=[
61
+ gr.Slider(value=0.1, maximum=0.99,label="Temperature"),
62
+ gr.Slider(value=352, maximum=1020,label="Max New Tokens"),
63
+ gr.Slider(value=980, maximum=1000,label="Top K"),
64
+ gr.Slider(value=0.90, maximum=0.99,label="Top P"),
65
+ gr.Slider(value=0.99, maximum=1.0,label="Repetition Penalty"),
66
+ models
67
+ ],
68
+ examples=[["Hello", "Am I cool?", "Are tomatoes vegetables?"]],
69
+ )
70
+
71
+ UI.queue().launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # ChatBot_UI
2
+ gradio
3
+
4
+ # Mixtral Inference Endpoint
5
+ huggingface_hub