Spaces:

AingHongsin
/

SurMuy

Runtime error

App Files Files Community

AingHongsin commited on Jul 8, 2024

Commit

cc2cfa7

verified ·

1 Parent(s): 82de062

Create app.py

Browse files

Files changed (1) hide show

app.py +168 -0

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig, DataCollatorForLanguageModeling
+from datasets import Dataset, DatasetDict
+from trl import ModelConfig, SFTTrainer, get_kbit_device_map, get_peft_config, get_quantization_config
+# from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
+from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
+from accelerate import PartialState
+import pandas as pd
+import torch
+import optimum
+import bitsandbytes
+import pprint
+import evaluate
+from datasets import load_metric
+import json
+from torcheval.metrics.functional.text import bleu_score
+import gradio as gr
+import spaces
+# Read and preprocess data
+TURN_TEMPLATE = "<|im_start|>{role}\n{content}<eos>\n"
+TURN_PREFIX = "<|im_start|>{role}\n"
+start_token = "<|im_start|>"
+end_token = "<eos>"
+zero = torch.Tensor([0]).cuda()
+print(zero.device) # <-- 'cpu' 🤔
+# Load your fine-tuned model and tokenizer
+surMuy_model_id = "AingHongsin/SurMuy_v1_512512201"
+model = AutoModelForCausalLM.from_pretrained(surMuy_model_id,
+                                    device_map={'': 0},
+                                    revision="main",
+                                    torch_dtype=torch.bfloat16,
+                                    )
+tokenizer = AutoTokenizer.from_pretrained(surMuy_model_id)
+model.eval()
+model.to(zero.device)
+def deFormat(data):
+    # Find the start and end indices of each turn in the data
+    turn_indices = []
+    start_index = data.find(start_token)
+    while start_index != -1:
+        end_index = data.find(end_token, start_index)
+        if end_index != -1:
+            turn_indices.append((start_index, end_index + len(end_token)))
+        else:
+            turn_indices.append((start_index, len(data)))
+        start_index = data.find(start_token, start_index + len(start_token))
+    # Extract role and content for each turn
+    turns = []
+    for i in range(len(turn_indices)):
+        turn_start, turn_end = turn_indices[i]
+        turn_data = data[turn_start:turn_end].strip()
+        # Extract role and content from turn data using TURN_TEMPLATE
+        role_start = len(start_token)
+        role_end = turn_data.find("\n", role_start)
+        role = turn_data[role_start:role_end]
+        content_start = role_end + 1
+        content = turn_data[content_start:]
+        turns.append({'role': role, 'content': content})
+    return turns
+@spaces.GPU
+def generate(text):
+    device = zero.device
+    messages = [
+        {"role": "user", "content": text}
+    ]
+    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
+    # print(tokenizer.convert_ids_to_tokens(encodeds[0]))
+    model_inputs = encodeds.to(device)
+    model.to(device)
+    generated_ids = model.generate(model_inputs, max_new_tokens=512, do_sample=True, pad_token_id=tokenizer.pad_token_id)
+    decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+    predict_answer = deFormat(decoded[0])
+    return predict_answer
+@spaces.GPU
+def beam_search(model, start_token, beam_width=3, max_length=10):
+    sequences = [[start_token, 0.0]]  # Initialize with start_token and score 0.0
+    while len(sequences[0][0]) < max_length:
+        all_candidates = []
+        for seq, score in sequences:
+            if seq[-1] == '<end>':  # Assuming '<end>' is the end token
+                all_candidates.append((seq, score))
+                continue
+            next_token_probs = model.predict_next(seq)
+            for token, prob in enumerate(next_token_probs):
+                candidate = (seq + [token], score - np.log(prob))
+                all_candidates.append(candidate)
+        # Order all candidates by score
+        ordered = sorted(all_candidates, key=lambda tup: tup[1])
+        # Select k best
+        sequences = ordered[:beam_width]
+    return sequences
+@spaces.GPU
+def beam_search_generate(text, beam_width=8, max_length=512):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    messages = []
+    messages.append(
+        {
+            "role": "user", "content": text
+        }
+    )
+    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
+    model_inputs = encodeds.to(device)
+    model.to(device)
+    generated_ids = model.generate(
+        model_inputs,
+        max_new_tokens=max_length,
+        num_beams=beam_width,
+        early_stopping=True,
+        pad_token_id=tokenizer.pad_token_id
+    )
+    decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+    predict_object = deFormat(decoded[0])
+    messages.append(
+        {
+            "role": "assistent", "content": ''.join(predict_object[1]['content'])
+        }
+    )
+    return ''.join(predict_object[1]['content'])
+def yes_man(message, history):
+    return beam_search_generate(message)
+gr.ChatInterface(
+    yes_man,
+    chatbot=gr.Chatbot(height=650),
+    textbox=gr.Textbox(placeholder="Write your message here ", container=False, scale=7),
+    # slider=gr.Slider(minimum=6, maximum=8, step=1, label="Beam Width"),
+    title="Sur Muy",
+    description="I am your assistant",
+    # examples=["Hello", "Am I cool?", "Are tomatoes vegetables?"],
+    cache_examples=True,
+    undo_btn="Delete Previous",
+    clear_btn="Clear",
+).launch()