AingHongsin commited on
Commit
cc2cfa7
·
verified ·
1 Parent(s): 82de062

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -0
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig, DataCollatorForLanguageModeling
2
+ from datasets import Dataset, DatasetDict
3
+ from trl import ModelConfig, SFTTrainer, get_kbit_device_map, get_peft_config, get_quantization_config
4
+ # from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
5
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
6
+ from accelerate import PartialState
7
+ import pandas as pd
8
+ import torch
9
+ import optimum
10
+ import bitsandbytes
11
+ import pprint
12
+ import evaluate
13
+ from datasets import load_metric
14
+ import json
15
+ from torcheval.metrics.functional.text import bleu_score
16
+ import gradio as gr
17
+ import spaces
18
+
19
+
20
+
21
+ # Read and preprocess data
22
+ TURN_TEMPLATE = "<|im_start|>{role}\n{content}<eos>\n"
23
+ TURN_PREFIX = "<|im_start|>{role}\n"
24
+
25
+ start_token = "<|im_start|>"
26
+ end_token = "<eos>"
27
+
28
+
29
+ zero = torch.Tensor([0]).cuda()
30
+ print(zero.device) # <-- 'cpu' 🤔
31
+
32
+ # Load your fine-tuned model and tokenizer
33
+ surMuy_model_id = "AingHongsin/SurMuy_v1_512512201"
34
+ model = AutoModelForCausalLM.from_pretrained(surMuy_model_id,
35
+ device_map={'': 0},
36
+ revision="main",
37
+ torch_dtype=torch.bfloat16,
38
+ )
39
+
40
+ tokenizer = AutoTokenizer.from_pretrained(surMuy_model_id)
41
+
42
+
43
+ model.eval()
44
+ model.to(zero.device)
45
+
46
+ def deFormat(data):
47
+
48
+ # Find the start and end indices of each turn in the data
49
+ turn_indices = []
50
+ start_index = data.find(start_token)
51
+ while start_index != -1:
52
+ end_index = data.find(end_token, start_index)
53
+ if end_index != -1:
54
+ turn_indices.append((start_index, end_index + len(end_token)))
55
+ else:
56
+ turn_indices.append((start_index, len(data)))
57
+ start_index = data.find(start_token, start_index + len(start_token))
58
+
59
+ # Extract role and content for each turn
60
+ turns = []
61
+ for i in range(len(turn_indices)):
62
+ turn_start, turn_end = turn_indices[i]
63
+ turn_data = data[turn_start:turn_end].strip()
64
+
65
+ # Extract role and content from turn data using TURN_TEMPLATE
66
+ role_start = len(start_token)
67
+ role_end = turn_data.find("\n", role_start)
68
+ role = turn_data[role_start:role_end]
69
+
70
+ content_start = role_end + 1
71
+ content = turn_data[content_start:]
72
+
73
+ turns.append({'role': role, 'content': content})
74
+
75
+ return turns
76
+
77
+ @spaces.GPU
78
+ def generate(text):
79
+ device = zero.device
80
+
81
+ messages = [
82
+ {"role": "user", "content": text}
83
+ ]
84
+
85
+ encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
86
+ # print(tokenizer.convert_ids_to_tokens(encodeds[0]))
87
+
88
+ model_inputs = encodeds.to(device)
89
+ model.to(device)
90
+
91
+ generated_ids = model.generate(model_inputs, max_new_tokens=512, do_sample=True, pad_token_id=tokenizer.pad_token_id)
92
+ decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
93
+ predict_answer = deFormat(decoded[0])
94
+ return predict_answer
95
+
96
+ @spaces.GPU
97
+ def beam_search(model, start_token, beam_width=3, max_length=10):
98
+ sequences = [[start_token, 0.0]] # Initialize with start_token and score 0.0
99
+
100
+ while len(sequences[0][0]) < max_length:
101
+ all_candidates = []
102
+ for seq, score in sequences:
103
+ if seq[-1] == '<end>': # Assuming '<end>' is the end token
104
+ all_candidates.append((seq, score))
105
+ continue
106
+ next_token_probs = model.predict_next(seq)
107
+ for token, prob in enumerate(next_token_probs):
108
+ candidate = (seq + [token], score - np.log(prob))
109
+ all_candidates.append(candidate)
110
+
111
+ # Order all candidates by score
112
+ ordered = sorted(all_candidates, key=lambda tup: tup[1])
113
+
114
+ # Select k best
115
+ sequences = ordered[:beam_width]
116
+
117
+ return sequences
118
+
119
+ @spaces.GPU
120
+ def beam_search_generate(text, beam_width=8, max_length=512):
121
+ device = "cuda" if torch.cuda.is_available() else "cpu"
122
+
123
+
124
+ messages = []
125
+
126
+ messages.append(
127
+ {
128
+ "role": "user", "content": text
129
+ }
130
+ )
131
+
132
+ encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
133
+ model_inputs = encodeds.to(device)
134
+ model.to(device)
135
+
136
+ generated_ids = model.generate(
137
+ model_inputs,
138
+ max_new_tokens=max_length,
139
+ num_beams=beam_width,
140
+ early_stopping=True,
141
+ pad_token_id=tokenizer.pad_token_id
142
+ )
143
+ decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
144
+ predict_object = deFormat(decoded[0])
145
+
146
+ messages.append(
147
+ {
148
+ "role": "assistent", "content": ''.join(predict_object[1]['content'])
149
+ }
150
+ )
151
+ return ''.join(predict_object[1]['content'])
152
+
153
+
154
+ def yes_man(message, history):
155
+ return beam_search_generate(message)
156
+
157
+ gr.ChatInterface(
158
+ yes_man,
159
+ chatbot=gr.Chatbot(height=650),
160
+ textbox=gr.Textbox(placeholder="Write your message here ", container=False, scale=7),
161
+ # slider=gr.Slider(minimum=6, maximum=8, step=1, label="Beam Width"),
162
+ title="Sur Muy",
163
+ description="I am your assistant",
164
+ # examples=["Hello", "Am I cool?", "Are tomatoes vegetables?"],
165
+ cache_examples=True,
166
+ undo_btn="Delete Previous",
167
+ clear_btn="Clear",
168
+ ).launch()