tranquilkd commited on
Commit
79cdb88
·
1 Parent(s): 3d629d9
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
File without changes
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from peft import PeftModel, PeftConfig
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
6
+
7
+
8
+ def load_model(model_path):
9
+ config = PeftConfig.from_pretrained(model_path)
10
+
11
+ bnb_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_compute_dtype=torch.float16,
15
+ )
16
+
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ config.base_model_name_or_path,
19
+ quantization_config=bnb_config,
20
+ trust_remote_code=True
21
+ )
22
+ model.config.use_cache = False
23
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
24
+ tokenizer.pad_token = tokenizer.eos_token
25
+
26
+ # Load the Lora model
27
+ model = PeftModel.from_pretrained(model, model_path)
28
+
29
+ return model, tokenizer
30
+
31
+
32
+ def generate_text(prompt):
33
+ prompt = "<user>: " + prompt + " <bot>:"
34
+
35
+ batch = tokenizer(
36
+ prompt,
37
+ padding=True,
38
+ truncation=True,
39
+ return_tensors='pt'
40
+ )
41
+ batch = batch.to(device)
42
+
43
+ with torch.amp.autocast(device):
44
+ output_tokens = model.generate(
45
+ input_ids = batch.input_ids,
46
+ max_new_tokens=200,
47
+ temperature=0.7,
48
+ top_p=0.7,
49
+ num_return_sequences=1,
50
+ pad_token_id=tokenizer.eos_token_id,
51
+ eos_token_id=tokenizer.eos_token_id,
52
+ )
53
+
54
+ generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
55
+
56
+ return generated_text.split("<user>: ")[1].split("<bot>: ")[-1]
57
+
58
+
59
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
60
+ model, tokenizer = load_model(os.path.join(os.getcwd(), "weights"))
61
+
62
+
63
+ iface = gr.Interface(
64
+ fn=generate_text,
65
+ inputs=[
66
+ gr.Textbox(label="Prompt", placeholder="Enter your prompt here..."),
67
+ ],
68
+ outputs=gr.Textbox(label="Generated Text"),
69
+ title="LLaMA-3.2-3B-Instruct-QLoRA",
70
+ description="-3.2-3B-Instruct Finetuned using QLoRA on OpenAssistant/oasst1",
71
+ examples=[
72
+ ["can you describe winter?"],
73
+ ["How about we play a fun game?"],
74
+ ],
75
+ )
76
+
77
+
78
+ if __name__ == "__main__":
79
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ peft
4
+ gradio
weights/adapter_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5edf17dcd1994fbdb99ea77f16640f8ff15b9535da8cc77a7b498689cc77aad2
3
+ size 805
weights/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb09470a5968c342cba08e1b53f26df379cb13ccdc1dfa073aa31407e2989b92
3
+ size 97307544
weights/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b1835caa5b4d70acaa210fa222b0036f1882f9525c4660fd4810fb3e1e40ff8
3
+ size 325
weights/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
weights/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e992b5c11bc0255aa43b342a3943bdf08f88a8ebed72667d290f5a51bb59db8a
3
+ size 54583