Spaces:
Runtime error
Runtime error
AingHongsin
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig, DataCollatorForLanguageModeling
|
2 |
+
from datasets import Dataset, DatasetDict
|
3 |
+
from trl import ModelConfig, SFTTrainer, get_kbit_device_map, get_peft_config, get_quantization_config
|
4 |
+
# from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
|
5 |
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
6 |
+
from accelerate import PartialState
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
import optimum
|
10 |
+
import bitsandbytes
|
11 |
+
import pprint
|
12 |
+
import evaluate
|
13 |
+
from datasets import load_metric
|
14 |
+
import json
|
15 |
+
from torcheval.metrics.functional.text import bleu_score
|
16 |
+
import gradio as gr
|
17 |
+
import spaces
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
# Read and preprocess data
|
22 |
+
TURN_TEMPLATE = "<|im_start|>{role}\n{content}<eos>\n"
|
23 |
+
TURN_PREFIX = "<|im_start|>{role}\n"
|
24 |
+
|
25 |
+
start_token = "<|im_start|>"
|
26 |
+
end_token = "<eos>"
|
27 |
+
|
28 |
+
|
29 |
+
zero = torch.Tensor([0]).cuda()
|
30 |
+
print(zero.device) # <-- 'cpu' 🤔
|
31 |
+
|
32 |
+
# Load your fine-tuned model and tokenizer
|
33 |
+
surMuy_model_id = "AingHongsin/SurMuy_v1_512512201"
|
34 |
+
model = AutoModelForCausalLM.from_pretrained(surMuy_model_id,
|
35 |
+
device_map={'': 0},
|
36 |
+
revision="main",
|
37 |
+
torch_dtype=torch.bfloat16,
|
38 |
+
)
|
39 |
+
|
40 |
+
tokenizer = AutoTokenizer.from_pretrained(surMuy_model_id)
|
41 |
+
|
42 |
+
|
43 |
+
model.eval()
|
44 |
+
model.to(zero.device)
|
45 |
+
|
46 |
+
def deFormat(data):
|
47 |
+
|
48 |
+
# Find the start and end indices of each turn in the data
|
49 |
+
turn_indices = []
|
50 |
+
start_index = data.find(start_token)
|
51 |
+
while start_index != -1:
|
52 |
+
end_index = data.find(end_token, start_index)
|
53 |
+
if end_index != -1:
|
54 |
+
turn_indices.append((start_index, end_index + len(end_token)))
|
55 |
+
else:
|
56 |
+
turn_indices.append((start_index, len(data)))
|
57 |
+
start_index = data.find(start_token, start_index + len(start_token))
|
58 |
+
|
59 |
+
# Extract role and content for each turn
|
60 |
+
turns = []
|
61 |
+
for i in range(len(turn_indices)):
|
62 |
+
turn_start, turn_end = turn_indices[i]
|
63 |
+
turn_data = data[turn_start:turn_end].strip()
|
64 |
+
|
65 |
+
# Extract role and content from turn data using TURN_TEMPLATE
|
66 |
+
role_start = len(start_token)
|
67 |
+
role_end = turn_data.find("\n", role_start)
|
68 |
+
role = turn_data[role_start:role_end]
|
69 |
+
|
70 |
+
content_start = role_end + 1
|
71 |
+
content = turn_data[content_start:]
|
72 |
+
|
73 |
+
turns.append({'role': role, 'content': content})
|
74 |
+
|
75 |
+
return turns
|
76 |
+
|
77 |
+
@spaces.GPU
|
78 |
+
def generate(text):
|
79 |
+
device = zero.device
|
80 |
+
|
81 |
+
messages = [
|
82 |
+
{"role": "user", "content": text}
|
83 |
+
]
|
84 |
+
|
85 |
+
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
86 |
+
# print(tokenizer.convert_ids_to_tokens(encodeds[0]))
|
87 |
+
|
88 |
+
model_inputs = encodeds.to(device)
|
89 |
+
model.to(device)
|
90 |
+
|
91 |
+
generated_ids = model.generate(model_inputs, max_new_tokens=512, do_sample=True, pad_token_id=tokenizer.pad_token_id)
|
92 |
+
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
93 |
+
predict_answer = deFormat(decoded[0])
|
94 |
+
return predict_answer
|
95 |
+
|
96 |
+
@spaces.GPU
|
97 |
+
def beam_search(model, start_token, beam_width=3, max_length=10):
|
98 |
+
sequences = [[start_token, 0.0]] # Initialize with start_token and score 0.0
|
99 |
+
|
100 |
+
while len(sequences[0][0]) < max_length:
|
101 |
+
all_candidates = []
|
102 |
+
for seq, score in sequences:
|
103 |
+
if seq[-1] == '<end>': # Assuming '<end>' is the end token
|
104 |
+
all_candidates.append((seq, score))
|
105 |
+
continue
|
106 |
+
next_token_probs = model.predict_next(seq)
|
107 |
+
for token, prob in enumerate(next_token_probs):
|
108 |
+
candidate = (seq + [token], score - np.log(prob))
|
109 |
+
all_candidates.append(candidate)
|
110 |
+
|
111 |
+
# Order all candidates by score
|
112 |
+
ordered = sorted(all_candidates, key=lambda tup: tup[1])
|
113 |
+
|
114 |
+
# Select k best
|
115 |
+
sequences = ordered[:beam_width]
|
116 |
+
|
117 |
+
return sequences
|
118 |
+
|
119 |
+
@spaces.GPU
|
120 |
+
def beam_search_generate(text, beam_width=8, max_length=512):
|
121 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
122 |
+
|
123 |
+
|
124 |
+
messages = []
|
125 |
+
|
126 |
+
messages.append(
|
127 |
+
{
|
128 |
+
"role": "user", "content": text
|
129 |
+
}
|
130 |
+
)
|
131 |
+
|
132 |
+
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
133 |
+
model_inputs = encodeds.to(device)
|
134 |
+
model.to(device)
|
135 |
+
|
136 |
+
generated_ids = model.generate(
|
137 |
+
model_inputs,
|
138 |
+
max_new_tokens=max_length,
|
139 |
+
num_beams=beam_width,
|
140 |
+
early_stopping=True,
|
141 |
+
pad_token_id=tokenizer.pad_token_id
|
142 |
+
)
|
143 |
+
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
144 |
+
predict_object = deFormat(decoded[0])
|
145 |
+
|
146 |
+
messages.append(
|
147 |
+
{
|
148 |
+
"role": "assistent", "content": ''.join(predict_object[1]['content'])
|
149 |
+
}
|
150 |
+
)
|
151 |
+
return ''.join(predict_object[1]['content'])
|
152 |
+
|
153 |
+
|
154 |
+
def yes_man(message, history):
|
155 |
+
return beam_search_generate(message)
|
156 |
+
|
157 |
+
gr.ChatInterface(
|
158 |
+
yes_man,
|
159 |
+
chatbot=gr.Chatbot(height=650),
|
160 |
+
textbox=gr.Textbox(placeholder="Write your message here ", container=False, scale=7),
|
161 |
+
# slider=gr.Slider(minimum=6, maximum=8, step=1, label="Beam Width"),
|
162 |
+
title="Sur Muy",
|
163 |
+
description="I am your assistant",
|
164 |
+
# examples=["Hello", "Am I cool?", "Are tomatoes vegetables?"],
|
165 |
+
cache_examples=True,
|
166 |
+
undo_btn="Delete Previous",
|
167 |
+
clear_btn="Clear",
|
168 |
+
).launch()
|