GIGAParviz commited on
Commit
0a30333
·
verified ·
1 Parent(s): 00c1f43

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from unsloth import FastLanguageModel
4
+ import torch
5
+ from transformers import TextStreamer
6
+
7
+ model, tokenizer = FastLanguageModel.from_pretrained(
8
+ model_name = "/content/lora_model",
9
+ max_seq_length = 2048,
10
+ dtype = torch.float16,
11
+ load_in_4bit = True,
12
+ )
13
+ FastLanguageModel.for_inference(model)
14
+
15
+ def predict(input_text):
16
+ messages = [
17
+ {"from": "human", "value": input_text},
18
+ ]
19
+ inputs = tokenizer.apply_chat_template(
20
+ messages,
21
+ tokenize=True,
22
+ add_generation_prompt=True,
23
+ return_tensors="pt",
24
+ ).to("cuda")
25
+
26
+ outputs = model.generate(
27
+ input_ids=inputs,
28
+ max_new_tokens=128,
29
+ use_cache=True
30
+ )
31
+
32
+ decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
33
+ assistant_response = decoded_output[0].split('assistant
34
+ ')[-1].strip()
35
+
36
+ return assistant_response
37
+
38
+ iface = gr.Interface(
39
+ fn=predict,
40
+ inputs="text",
41
+ outputs="text",
42
+ title="Parviz(eng) Chatbot",
43
+ description="A simple chatbot interface using FastLanguageModel.",
44
+ )
45
+
46
+ iface.launch()