deepapaikar commited on
Commit
ee6f18e
1 Parent(s): a8760a3

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import transformers
3
+ import torch
4
+ import gradio as gr
5
+
6
+ #Setting device to cuda
7
+ torch.set_default_device("cuda")
8
+
9
+
10
+ model = "deepapaikar/llama_mistral"
11
+
12
+ pipeline = transformers.pipeline(
13
+ "text-generation",
14
+ model=model,
15
+ torch_dtype=torch.float16,
16
+ device_map="auto",
17
+ )
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained(model)
20
+
21
+
22
+ def predict_answer(question, token=25):
23
+
24
+ messages = [{"role": "user", "content": f"{question}"}]
25
+
26
+
27
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
28
+
29
+
30
+ outputs = pipeline(prompt, max_new_tokens=token, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
31
+
32
+ return outputs[0]["generated_text"]
33
+
34
+
35
+
36
+ def gradio_predict(question, token):
37
+ answer = predict_answer(question, token)
38
+ return answer
39
+
40
+ # Define the Gradio interface
41
+ iface = gr.Interface(
42
+ fn=gradio_predict,
43
+ inputs=[gr.Textbox(label="Question", placeholder="e.g. What are the colors of the bus in the image?", scale=4),
44
+ gr.Slider(2, 100, value=25, label="Token Count", info="Choose between 2 and 100")],
45
+ outputs=gr.TextArea(label="Answer"),
46
+ title="KatzBot",
47
+ description="Llama - Mistral Merge",
48
+ )
49
+
50
+ # Launch the app
51
+ iface.queue().launch(debug=True)