enzer1992 commited on
Commit
4fc42e6
·
verified ·
1 Parent(s): e4dd852

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -131
app.py CHANGED
@@ -1,132 +1,35 @@
1
 
2
- from threading import Thread
3
-
4
- import gradio as gr
5
- import spaces
6
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
-
8
-
9
- TITLE = "<h1><center>Chat with Llama3-8B-Chinese-Chat-v2.1</center></h1>"
10
-
11
- DESCRIPTION = "<h3><center>Visit <a href='https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat' target='_blank'>our model page</a> for details.</center></h3>"
12
-
13
- DEFAULT_SYSTEM = "You are Llama-3, developed by an independent team. You are a helpful assistant."
14
-
15
- TOOL_EXAMPLE = '''You have access to the following tools:
16
- ```python
17
- def generate_password(length: int, include_symbols: Optional[bool]):
18
- """
19
- Generate a random password.
20
- Args:
21
- length (int): The length of the password
22
- include_symbols (Optional[bool]): Include symbols in the password
23
- """
24
- pass
25
- ```
26
- Write "Action:" followed by a list of actions in JSON that you want to call, e.g.
27
- Action:
28
- ```json
29
- [
30
- {
31
- "name": "tool name (one of [generate_password])",
32
- "arguments": "the input to the tool"
33
- }
34
- ]
35
- ```
36
- '''
37
-
38
- CSS = """
39
- .duplicate-button {
40
- margin: auto !important;
41
- color: white !important;
42
- background: black !important;
43
- border-radius: 100vh !important;
44
- }
45
- """
46
-
47
-
48
- tokenizer = AutoTokenizer.from_pretrained("enzer1992/AI-Guru")
49
- #model = AutoModelForCausalLM.from_pretrained("enzer1992/AI-Guru", torch_dtype="auto", device_map="auto")
50
- model = AutoModelForCausalLM.from_pretrained("enzer1992/AI-Guru", torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True)
51
-
52
-
53
- @spaces.GPU
54
- def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
55
- conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
56
- for prompt, answer in history:
57
- conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
58
-
59
- conversation.append({"role": "user", "content": message})
60
-
61
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
62
- model.device
63
- )
64
- streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
65
-
66
- generate_kwargs = dict(
67
- input_ids=input_ids,
68
- streamer=streamer,
69
- max_new_tokens=max_new_tokens,
70
- temperature=temperature,
71
- do_sample=True,
72
- )
73
- if temperature == 0:
74
- generate_kwargs["do_sample"] = False
75
-
76
- t = Thread(target=model.generate, kwargs=generate_kwargs)
77
- t.start()
78
-
79
- output = ""
80
- for new_token in streamer:
81
- output += new_token
82
- yield output
83
-
84
-
85
- chatbot = gr.Chatbot(height=450)
86
-
87
- with gr.Blocks(css=CSS) as demo:
88
- gr.HTML(TITLE)
89
- gr.HTML(DESCRIPTION)
90
- gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
91
- gr.ChatInterface(
92
- fn=stream_chat,
93
- chatbot=chatbot,
94
- fill_height=True,
95
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
96
- additional_inputs=[
97
- gr.Text(
98
- value="",
99
- label="System",
100
- render=False,
101
- ),
102
- gr.Slider(
103
- minimum=0,
104
- maximum=1,
105
- step=0.1,
106
- value=0.8,
107
- label="Temperature",
108
- render=False,
109
- ),
110
- gr.Slider(
111
- minimum=128,
112
- maximum=4096,
113
- step=1,
114
- value=1024,
115
- label="Max new tokens",
116
- render=False,
117
- ),
118
- ],
119
- examples=[
120
- ["我的蓝牙耳机坏了,我该去看牙科还是耳鼻喉科?", ""],
121
- ["7年前,妈妈年龄是儿子的6倍,儿子今年12岁,妈妈今年多少岁?", ""],
122
- ["我的笔记本找不到了。", "扮演诸葛亮和我对话。"],
123
- ["我想要一个新的密码,长度为8位,包含特殊符号。", TOOL_EXAMPLE],
124
- ["How are you today?", "You are Taylor Swift, use beautiful lyrics to answer questions."],
125
- ["用C++实现KMP算法,并加上中文注释", ""],
126
- ],
127
- cache_examples=False,
128
- )
129
-
130
-
131
- if __name__ == "__main__":
132
- demo.launch()
 
1
 
2
+ %cd /content/LLaMA-Factory/
3
+
4
+ args = dict(
5
+ model_name_or_path="unsloth/llama-3-8b-Instruct-bnb-4bit", # use bnb-4bit-quantized Llama-3-8B-Instruct model
6
+ adapter_name_or_path="enzer1992/AI-Guru", # load the saved LoRA adapters
7
+ template="llama3", # same to the one in training
8
+ finetuning_type="lora", # same to the one in training
9
+ quantization_bit=4, # load 4-bit quantized model
10
+ )
11
+ chat_model = ChatModel(args)
12
+
13
+ messages = []
14
+ print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.")
15
+ while True:
16
+ query = input("\nUser: ")
17
+ if query.strip() == "exit":
18
+ break
19
+ if query.strip() == "clear":
20
+ messages = []
21
+ torch_gc()
22
+ print("History has been removed.")
23
+ continue
24
+
25
+ messages.append({"role": "user", "content": query})
26
+ print("Assistant: ", end="", flush=True)
27
+
28
+ response = ""
29
+ for new_text in chat_model.stream_chat(messages):
30
+ print(new_text, end="", flush=True)
31
+ response += new_text
32
+ print()
33
+ messages.append({"role": "assistant", "content": response})
34
+
35
+ torch_gc()