Danielrahmai1991 commited on
Commit
ee02a28
·
verified ·
1 Parent(s): 17d047d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from threading import Thread
2
+ from transformers import TextIteratorStreame
3
+ from unsloth import FastLanguageModel
4
+ import torch
5
+ import gradio as gr
6
+
7
+ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
8
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
9
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
10
+
11
+ model_name = "Danielrahmai1991/llama32_ganjoor_adapt_basic_model_16bit_v1"
12
+ model, tokenizer = FastLanguageModel.from_pretrained(
13
+ model_name = model_name,
14
+ max_seq_length = max_seq_length,
15
+ dtype = dtype,
16
+ load_in_4bit = load_in_4bit,
17
+ trust_remote_code=True,
18
+ # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
19
+ )
20
+
21
+ print("model loaded")
22
+
23
+
24
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
25
+
26
+ def generate_text(prompt, max_length, top_p, top_k):
27
+ inputs = tokenizer([prompt], return_tensors="pt")
28
+
29
+ generate_kwargs = dict(
30
+ inputs,
31
+ max_length=int(max_length),top_p=float(top_p), do_sample=True, top_k=int(top_k), streamer=streamer
32
+ )
33
+
34
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
35
+ t.start()
36
+
37
+ generated_text=[]
38
+
39
+ for text in streamer:
40
+ generated_text.append(text)
41
+ yield "".join(generated_text)
42
+
43
+
44
+ description = """
45
+ # Deploy our LLM
46
+ """
47
+ inputs = [
48
+ gr.Textbox(label="Prompt text"),
49
+ gr.Textbox(label="max-lenth generation", value=100),
50
+ gr.Slider(0.0, 1.0, label="top-p value", value=0.95),
51
+ gr.Textbox(label="top-k", value=50,),
52
+ ]
53
+ outputs = [gr.Textbox(label="Generated Text")]
54
+
55
+ demo = gr.Interface(fn=generate_text, inputs=inputs, outputs=outputs, allow_flagging=False, description=description)
56
+
57
+ demo.launch(debug=True, share=True)