Spaces:

GIGAParviz
/

Parviz_Mind

Sleeping

GIGAParviz commited on Aug 22, 2024

Commit

0a30333

verified ·

1 Parent(s): 00c1f43

Upload app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+from unsloth import FastLanguageModel
+import torch
+from transformers import TextStreamer
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = "/content/lora_model",
+    max_seq_length = 2048,
+    dtype = torch.float16,
+    load_in_4bit = True,
+)
+FastLanguageModel.for_inference(model)
+def predict(input_text):
+    messages = [
+        {"from": "human", "value": input_text},
+    ]
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        tokenize=True,
+        add_generation_prompt=True,
+        return_tensors="pt",
+    ).to("cuda")
+    outputs = model.generate(
+        input_ids=inputs,
+        max_new_tokens=128,
+        use_cache=True
+    )
+    decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    assistant_response = decoded_output[0].split('assistant
+')[-1].strip()
+    return assistant_response
+iface = gr.Interface(
+    fn=predict,
+    inputs="text",
+    outputs="text",
+    title="Parviz(eng) Chatbot",
+    description="A simple chatbot interface using FastLanguageModel.",
+)
+iface.launch()