Spaces:

quancute
/

new_demo

Sleeping

App Files Files Community

quancute commited on Oct 22, 2024

Commit

c0fd4d8

verified ·

1 Parent(s): c031a23

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py CHANGED Viewed

	@@ -0,0 +1,103 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
+import torch
+# Cấu hình mô hình
+MODEL = "Viet-Mistral/Vistral-7B-Chat"
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print('device =', device)
+# Load mô hình và tokenizer
+model = AutoModelForCausalLM.from_pretrained(
+    'Viet-Mistral/Vistral-7B-Chat',
+    torch_dtype=torch.bfloat16, # change to torch.float16 if you're using V100
+    device_map="auto",
+    use_cache=True,
+    cache_dir='./hf_cache'
+)
+tokenizer = AutoTokenizer.from_pretrained(MODEL, cache_dir='./hf_cache')
+# def generate_output(input_text:str,
+#                     top_p:float=0.95,
+#                     top_k:int=40,
+#                     temperature:float=0.1,
+#                     repetition_penalty:float=1.05,
+#                     max_new_tokens:int=768):
+#     system_prompt = "Bạn là một trợ lí ảo Tiếng Việt về lĩnh vực y tế."
+#     conversation = [{"role": "system", "content": system_prompt }]
+#     human = f"Vui lòng trả lời câu hỏi sau: {input_text}"
+#     conversation.append({"role": "user", "content": human })
+#     # Chuyển các tensor đầu vào sang đúng thiết bị
+#     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(device)
+#     # Tạo đầu ra từ mô hình
+#     out_ids = model.generate(
+#         input_ids=input_ids,
+#         max_new_tokens=768,
+#         do_sample=True,
+#         top_p=0.95,
+#         top_k=40,
+#         temperature=0.1,
+#         repetition_penalty=1.05,
+#     )
+#     # Giải mã và in kết quả
+#     assistant = tokenizer.batch_decode(out_ids[:, input_ids.size(1):], skip_special_tokens=True)[0].strip()
+#     return assistant
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message: str,
+    max_tokens,
+    temperature,
+    top_p,
+):
+    sys_prompt = "Bạn là một trợ lí ảo Tiếng Việt về lĩnh vực y tế."
+    conversation = [{"role": "system", "content": sys_prompt}]
+    for val in history:
+        if val[0]:
+            conversation.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    conversation.append({"role": "user", "content": message})
+    input_ids_list = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(device)
+    response = ""
+    for message in tokenizer.batch_decode(model.generate(
+        input_ids=input_ids,
+        max_new_tokens=max_tokens,
+        do_sample=True,
+        top_p=top_p,
+        temperature=temperature,
+    )[:, input_ids_list.size(1):], skip_special_tokens=True):
+        token = message.strip()
+        response += token
+        yield response
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()