Spaces:

AingHongsin
/

SurMuy

Runtime error

App Files Files Community

AingHongsin commited on Jul 9, 2024

Commit

9c903ec

verified ·

1 Parent(s): e1d533e

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -54

app.py CHANGED Viewed

@@ -64,60 +64,15 @@ def deFormat(data):
     return turns
-@spaces.GPU
-def generate(text):
-    device = zero.device
-    messages = [
-        {"role": "user", "content": text}
-    ]
-    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
-    # print(tokenizer.convert_ids_to_tokens(encodeds[0]))
-    model_inputs = encodeds.to(device)
-    model.to(device)
-    generated_ids = model.generate(model_inputs, max_new_tokens=512, do_sample=True, pad_token_id=tokenizer.pad_token_id)
-    decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
-    predict_answer = deFormat(decoded[0])
-    return predict_answer
-@spaces.GPU
-def beam_search(model, start_token, beam_width=3, max_length=10):
-    sequences = [[start_token, 0.0]]  # Initialize with start_token and score 0.0
-    while len(sequences[0][0]) < max_length:
-        all_candidates = []
-        for seq, score in sequences:
-            if seq[-1] == '<end>':  # Assuming '<end>' is the end token
-                all_candidates.append((seq, score))
-                continue
-            next_token_probs = model.predict_next(seq)
-            for token, prob in enumerate(next_token_probs):
-                candidate = (seq + [token], score - np.log(prob))
-                all_candidates.append(candidate)
-        # Order all candidates by score
-        ordered = sorted(all_candidates, key=lambda tup: tup[1])
-        # Select k best
-        sequences = ordered[:beam_width]
-    return sequences
-@spaces.GPU
 def beam_search_generate(text, beam_width=8, max_length=512):
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    messages = []
-    messages.append(
         {
             "role": "user", "content": text
         }
-    )
     encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
     model_inputs = encodeds.to(device)
@@ -132,12 +87,7 @@ def beam_search_generate(text, beam_width=8, max_length=512):
     )
     decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
     predict_object = deFormat(decoded[0])
-    messages.append(
-        {
-            "role": "assistent", "content": ''.join(predict_object[1]['content'])
-        }
-    )
     return ''.join(predict_object[1]['content'])

     return turns
+@spaces.GPU(duration=90)
 def beam_search_generate(text, beam_width=8, max_length=512):
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    messages = [
         {
             "role": "user", "content": text
         }
+    ]
     encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
     model_inputs = encodeds.to(device)
     )
     decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
     predict_object = deFormat(decoded[0])
     return ''.join(predict_object[1]['content'])