Spaces:
Runtime error
Runtime error
AingHongsin
commited on
Commit
•
9c903ec
1
Parent(s):
e1d533e
Update app.py
Browse files
app.py
CHANGED
@@ -64,60 +64,15 @@ def deFormat(data):
|
|
64 |
|
65 |
return turns
|
66 |
|
67 |
-
@spaces.GPU
|
68 |
-
def generate(text):
|
69 |
-
device = zero.device
|
70 |
-
|
71 |
-
messages = [
|
72 |
-
{"role": "user", "content": text}
|
73 |
-
]
|
74 |
-
|
75 |
-
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
76 |
-
# print(tokenizer.convert_ids_to_tokens(encodeds[0]))
|
77 |
-
|
78 |
-
model_inputs = encodeds.to(device)
|
79 |
-
model.to(device)
|
80 |
-
|
81 |
-
generated_ids = model.generate(model_inputs, max_new_tokens=512, do_sample=True, pad_token_id=tokenizer.pad_token_id)
|
82 |
-
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
83 |
-
predict_answer = deFormat(decoded[0])
|
84 |
-
return predict_answer
|
85 |
-
|
86 |
-
@spaces.GPU
|
87 |
-
def beam_search(model, start_token, beam_width=3, max_length=10):
|
88 |
-
sequences = [[start_token, 0.0]] # Initialize with start_token and score 0.0
|
89 |
-
|
90 |
-
while len(sequences[0][0]) < max_length:
|
91 |
-
all_candidates = []
|
92 |
-
for seq, score in sequences:
|
93 |
-
if seq[-1] == '<end>': # Assuming '<end>' is the end token
|
94 |
-
all_candidates.append((seq, score))
|
95 |
-
continue
|
96 |
-
next_token_probs = model.predict_next(seq)
|
97 |
-
for token, prob in enumerate(next_token_probs):
|
98 |
-
candidate = (seq + [token], score - np.log(prob))
|
99 |
-
all_candidates.append(candidate)
|
100 |
-
|
101 |
-
# Order all candidates by score
|
102 |
-
ordered = sorted(all_candidates, key=lambda tup: tup[1])
|
103 |
-
|
104 |
-
# Select k best
|
105 |
-
sequences = ordered[:beam_width]
|
106 |
-
|
107 |
-
return sequences
|
108 |
-
|
109 |
-
@spaces.GPU
|
110 |
def beam_search_generate(text, beam_width=8, max_length=512):
|
111 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
112 |
|
113 |
-
|
114 |
-
messages = []
|
115 |
-
|
116 |
-
messages.append(
|
117 |
{
|
118 |
"role": "user", "content": text
|
119 |
}
|
120 |
-
|
121 |
|
122 |
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
123 |
model_inputs = encodeds.to(device)
|
@@ -132,12 +87,7 @@ def beam_search_generate(text, beam_width=8, max_length=512):
|
|
132 |
)
|
133 |
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
134 |
predict_object = deFormat(decoded[0])
|
135 |
-
|
136 |
-
messages.append(
|
137 |
-
{
|
138 |
-
"role": "assistent", "content": ''.join(predict_object[1]['content'])
|
139 |
-
}
|
140 |
-
)
|
141 |
return ''.join(predict_object[1]['content'])
|
142 |
|
143 |
|
|
|
64 |
|
65 |
return turns
|
66 |
|
67 |
+
@spaces.GPU(duration=90)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
def beam_search_generate(text, beam_width=8, max_length=512):
|
69 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
70 |
|
71 |
+
messages = [
|
|
|
|
|
|
|
72 |
{
|
73 |
"role": "user", "content": text
|
74 |
}
|
75 |
+
]
|
76 |
|
77 |
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
78 |
model_inputs = encodeds.to(device)
|
|
|
87 |
)
|
88 |
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
89 |
predict_object = deFormat(decoded[0])
|
90 |
+
|
|
|
|
|
|
|
|
|
|
|
91 |
return ''.join(predict_object[1]['content'])
|
92 |
|
93 |
|