Yersel commited on
Commit
1a3530a
·
1 Parent(s): 58bdce3

Implement chatbot

Browse files
Files changed (2) hide show
  1. app.py +72 -4
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,7 +1,75 @@
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+ import os
4
+ from huggingface_hub import login
5
+ import spaces
6
  import gradio as gr
7
 
8
+ token = os.environ.get("HF_TOKEN_READ_LLAMA")
9
+ login(token)
10
 
11
+ model_name = 'meta-llama/Meta-Llama-3.1-8B-Instruct'
12
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype = torch.bfloat16)
13
+ tokenizer = AutoTokenizer.from_pretrained(madel_name)
14
+
15
+ if torch.cuda.is_available():
16
+ device = torch.device('cuda')
17
+ else:
18
+ device = torch.device('cpu')
19
+
20
+ model = model.to(device)
21
+
22
+ @spaces.GPU
23
+ def response(message, history, system_message, max_tokens, temperature, top_p):
24
+ messages = [{"role": "system", "content": system_message}]
25
+
26
+ for value in history:
27
+ if value[0]:
28
+ messages.append({"role": "user", "content": value[0]})
29
+ if value[1]:
30
+ messages.append({"role": "assistant", "content": value[1]})
31
+
32
+ messages.append({"role": "user", "content": message})
33
+
34
+ input_ids = tokenizer.apply_chat_template(
35
+ messages,
36
+ add_generation_prompt=True,
37
+ return_tensors='pt'
38
+ ).to(model.device)
39
+
40
+ terminators = [
41
+ tokenizer.eos_token_id,
42
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
43
+ ]
44
+
45
+ outputs = model.generate(
46
+ input_ids,
47
+ max_new_tokens=max_tokens,
48
+ eos_token_id=terminators,
49
+ do_sample=True,
50
+ temperature=temperature,
51
+ top_p=top_p
52
+ )
53
+
54
+ response = ''
55
+
56
+ for message in tokenizer.decode(
57
+ outputs[0][input_ids.shape[-1]:],
58
+ skip_special_tokens=True
59
+ ):
60
+ response += message
61
+ yield response
62
+
63
+
64
+ demo = gr.ChatInterface(
65
+ response,
66
+ additional_inputs = [
67
+ gr.Textbox(value="You are a friendly assistant", label="System Message"),
68
+ gr.Slider(minimum=1, maximun=2048, value=512, step=1, label="Max new tokens"),
69
+ gr.Slider(minimum=0.1, maximun=4, value=0.7, step=0.1, label="Temperature"),
70
+ gr.Slider(minimum=0.1, maximun=1, value=0.9, step=0.05, label="Top_p"),
71
+ ]
72
+ )
73
+
74
+ if __name__ == "__main__":
75
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ huggingface_hub
4
+ spaces
5
+ gradio