Upload load_pretrained_tinyLLAMA_model_finetuned (2).ipynb
Browse files
load_pretrained_tinyLLAMA_model_finetuned (2).ipynb
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"colab": {
|
6 |
+
"provenance": [],
|
7 |
+
"gpuType": "T4"
|
8 |
+
},
|
9 |
+
"kernelspec": {
|
10 |
+
"name": "python3",
|
11 |
+
"display_name": "Python 3"
|
12 |
+
},
|
13 |
+
"language_info": {
|
14 |
+
"name": "python"
|
15 |
+
},
|
16 |
+
"accelerator": "GPU"
|
17 |
+
},
|
18 |
+
"cells": [
|
19 |
+
{
|
20 |
+
"cell_type": "code",
|
21 |
+
"execution_count": 1,
|
22 |
+
"metadata": {
|
23 |
+
"id": "Hyg4prvKsTfC"
|
24 |
+
},
|
25 |
+
"outputs": [],
|
26 |
+
"source": [
|
27 |
+
"#!pip install bitsandbytes\n"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"cell_type": "code",
|
32 |
+
"source": [
|
33 |
+
"#pip install accelerate"
|
34 |
+
],
|
35 |
+
"metadata": {
|
36 |
+
"id": "MVqLMtMt0Uhc"
|
37 |
+
},
|
38 |
+
"execution_count": 15,
|
39 |
+
"outputs": []
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"cell_type": "code",
|
43 |
+
"source": [
|
44 |
+
"#!pip install accelerate\n",
|
45 |
+
"#!pip install bitsandbytes -i https://pypi.org/simple/\n"
|
46 |
+
],
|
47 |
+
"metadata": {
|
48 |
+
"id": "BeHjgLP_0laH"
|
49 |
+
},
|
50 |
+
"execution_count": 16,
|
51 |
+
"outputs": []
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"cell_type": "code",
|
55 |
+
"source": [
|
56 |
+
"# Load model directly\n",
|
57 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
58 |
+
"\n",
|
59 |
+
"#tokenizer = AutoTokenizer.from_pretrained(\"harry85/tokenizer-finetuned-TinyLLAMA\")\n",
|
60 |
+
"#model = AutoModelForCausalLM.from_pretrained(\"unsloth/tinyllama-bnb-4bit\")\n",
|
61 |
+
"# Load model directly\n",
|
62 |
+
"#from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
63 |
+
"# Load model directly\n",
|
64 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
65 |
+
"\n",
|
66 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"harry85/finetuned-TinyLLAMA-own-data-07\")\n",
|
67 |
+
"model = AutoModelForCausalLM.from_pretrained(\"harry85/finetuned-TinyLLAMA-own-data-07\")\n",
|
68 |
+
"\n",
|
69 |
+
"\n",
|
70 |
+
"# Enable native 2x faster inference if supported\n",
|
71 |
+
"# This feature depends on the specific model and framework used; modify as needed.\n",
|
72 |
+
"# For example, in the case of some models, you can use model.half() to convert to FP16 for faster inference.\n",
|
73 |
+
"\n",
|
74 |
+
"# Define the Alpaca prompt\n",
|
75 |
+
"alpaca_prompt = \"\"\"\\\n",
|
76 |
+
"### Instruction:\n",
|
77 |
+
"{0}\n",
|
78 |
+
"\n",
|
79 |
+
"### Input:\n",
|
80 |
+
"\n",
|
81 |
+
"{1}\n",
|
82 |
+
"\n",
|
83 |
+
"### Response:\n",
|
84 |
+
"{2}\"\"\"\n",
|
85 |
+
"\n",
|
86 |
+
"# Prepare the input\n",
|
87 |
+
"inputs = tokenizer(\n",
|
88 |
+
" [\n",
|
89 |
+
" alpaca_prompt.format(\n",
|
90 |
+
" \"Continue the Fibonacci sequence.\", # instruction\n",
|
91 |
+
" \"1, 1, 2, 3, 5, 8\", # input\n",
|
92 |
+
" \"\" # output - leave this blank for generation!\n",
|
93 |
+
" )\n",
|
94 |
+
" ],\n",
|
95 |
+
" return_tensors=\"pt\"\n",
|
96 |
+
").to(\"cuda\")\n",
|
97 |
+
"\n",
|
98 |
+
"# Generate the output\n",
|
99 |
+
"outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)\n",
|
100 |
+
"\n",
|
101 |
+
"# Decode and print the output\n",
|
102 |
+
"response = tokenizer.batch_decode(outputs, skip_special_tokens=True)\n",
|
103 |
+
"print(response)\n"
|
104 |
+
],
|
105 |
+
"metadata": {
|
106 |
+
"colab": {
|
107 |
+
"base_uri": "https://localhost:8080/"
|
108 |
+
},
|
109 |
+
"id": "RxlJ3o75sUrN",
|
110 |
+
"outputId": "1c767815-7def-42a3-f250-69c7bbe802b7"
|
111 |
+
},
|
112 |
+
"execution_count": 24,
|
113 |
+
"outputs": [
|
114 |
+
{
|
115 |
+
"output_type": "stream",
|
116 |
+
"name": "stderr",
|
117 |
+
"text": [
|
118 |
+
"Unused kwargs: ['quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
|
119 |
+
"`low_cpu_mem_usage` was None, now set to True since model is quantized.\n",
|
120 |
+
"Some weights of the model checkpoint at harry85/finetuned-TinyLLAMA-own-data-07 were not used when initializing LlamaForCausalLM: ['base_model.model.model.layers.0.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.0.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.0.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.0.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.0.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.0.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.0.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.0.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.0.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.1.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.1.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.1.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.1.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.1.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.1.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.1.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.1.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.1.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.1.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.1.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.1.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.1.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.1.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.10.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.10.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.10.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.10.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.10.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.10.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.10.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.10.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.10.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.10.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.10.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.10.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.10.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.10.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.11.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.11.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.11.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.11.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.11.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.11.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.11.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.11.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.11.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.11.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.11.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.11.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.11.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.11.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.12.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.12.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.12.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.12.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.12.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.12.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.12.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.12.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.12.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.12.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.12.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.12.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.12.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.12.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.13.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.13.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.13.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.13.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.13.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.13.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.13.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.13.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.13.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.13.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.13.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.13.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.13.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.13.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.14.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.14.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.14.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.14.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.14.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.14.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.14.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.14.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.14.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.14.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.14.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.14.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.14.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.14.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.15.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.15.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.15.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.15.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.15.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.15.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.15.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.15.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.15.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.15.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.15.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.15.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.15.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.15.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.16.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.16.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.16.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.16.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.16.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.16.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.16.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.16.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.16.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.16.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.16.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.16.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.16.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.16.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.17.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.17.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.17.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.17.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.17.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.17.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.17.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.17.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.17.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.17.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.17.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.17.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.17.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.17.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.18.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.18.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.18.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.18.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.18.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.18.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.18.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.18.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.18.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.18.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.18.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.18.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.18.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.18.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.19.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.19.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.19.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.19.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.19.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.19.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.19.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.19.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.19.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.19.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.19.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.19.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.19.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.19.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.2.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.2.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.2.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.2.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.2.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.2.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.2.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.2.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.2.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.2.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.2.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.2.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.2.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.2.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.20.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.20.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.20.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.20.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.20.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.20.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.20.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.20.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.20.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.20.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.20.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.20.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.20.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.20.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.21.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.21.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.21.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.21.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.21.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.21.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.21.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.21.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.21.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.21.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.21.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.21.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.21.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.21.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.3.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.3.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.3.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.3.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.3.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.3.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.3.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.3.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.3.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.3.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.3.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.3.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.3.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.3.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.4.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.4.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.4.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.4.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.4.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.4.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.4.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.4.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.4.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.4.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.4.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.4.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.4.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.4.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.5.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.5.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.5.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.5.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.5.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.5.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.5.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.5.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.5.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.5.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.5.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.5.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.5.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.5.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.6.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.6.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.6.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.6.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.6.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.6.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.6.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.6.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.6.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.6.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.6.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.6.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.6.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.6.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.7.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.7.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.7.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.7.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.7.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.7.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.7.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.7.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.7.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.7.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.7.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.7.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.7.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.7.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.8.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.8.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.8.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.8.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.8.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.8.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.8.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.8.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.8.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.8.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.8.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.8.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.8.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.8.self_attn.v_proj.lora_B.weight', 'base_model.model.model.layers.9.mlp.down_proj.lora_A.weight', 'base_model.model.model.layers.9.mlp.down_proj.lora_B.weight', 'base_model.model.model.layers.9.mlp.gate_proj.lora_A.weight', 'base_model.model.model.layers.9.mlp.gate_proj.lora_B.weight', 'base_model.model.model.layers.9.mlp.up_proj.lora_A.weight', 'base_model.model.model.layers.9.mlp.up_proj.lora_B.weight', 'base_model.model.model.layers.9.self_attn.k_proj.lora_A.weight', 'base_model.model.model.layers.9.self_attn.k_proj.lora_B.weight', 'base_model.model.model.layers.9.self_attn.o_proj.lora_A.weight', 'base_model.model.model.layers.9.self_attn.o_proj.lora_B.weight', 'base_model.model.model.layers.9.self_attn.q_proj.lora_A.weight', 'base_model.model.model.layers.9.self_attn.q_proj.lora_B.weight', 'base_model.model.model.layers.9.self_attn.v_proj.lora_A.weight', 'base_model.model.model.layers.9.self_attn.v_proj.lora_B.weight']\n",
|
121 |
+
"- This IS expected if you are initializing LlamaForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
122 |
+
"- This IS NOT expected if you are initializing LlamaForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
123 |
+
"Some weights of LlamaForCausalLM were not initialized from the model checkpoint at harry85/finetuned-TinyLLAMA-own-data-07 and are newly initialized: ['embed_tokens.weight', 'layers.0.input_layernorm.weight', 'layers.0.mlp.down_proj.weight', 'layers.0.mlp.gate_proj.weight', 'layers.0.mlp.up_proj.weight', 'layers.0.post_attention_layernorm.weight', 'layers.0.self_attn.k_proj.weight', 'layers.0.self_attn.o_proj.weight', 'layers.0.self_attn.q_proj.weight', 'layers.0.self_attn.v_proj.weight', 'layers.1.input_layernorm.weight', 'layers.1.mlp.down_proj.weight', 'layers.1.mlp.gate_proj.weight', 'layers.1.mlp.up_proj.weight', 'layers.1.post_attention_layernorm.weight', 'layers.1.self_attn.k_proj.weight', 'layers.1.self_attn.o_proj.weight', 'layers.1.self_attn.q_proj.weight', 'layers.1.self_attn.v_proj.weight', 'layers.10.input_layernorm.weight', 'layers.10.mlp.down_proj.weight', 'layers.10.mlp.gate_proj.weight', 'layers.10.mlp.up_proj.weight', 'layers.10.post_attention_layernorm.weight', 'layers.10.self_attn.k_proj.weight', 'layers.10.self_attn.o_proj.weight', 'layers.10.self_attn.q_proj.weight', 'layers.10.self_attn.v_proj.weight', 'layers.11.input_layernorm.weight', 'layers.11.mlp.down_proj.weight', 'layers.11.mlp.gate_proj.weight', 'layers.11.mlp.up_proj.weight', 'layers.11.post_attention_layernorm.weight', 'layers.11.self_attn.k_proj.weight', 'layers.11.self_attn.o_proj.weight', 'layers.11.self_attn.q_proj.weight', 'layers.11.self_attn.v_proj.weight', 'layers.12.input_layernorm.weight', 'layers.12.mlp.down_proj.weight', 'layers.12.mlp.gate_proj.weight', 'layers.12.mlp.up_proj.weight', 'layers.12.post_attention_layernorm.weight', 'layers.12.self_attn.k_proj.weight', 'layers.12.self_attn.o_proj.weight', 'layers.12.self_attn.q_proj.weight', 'layers.12.self_attn.v_proj.weight', 'layers.13.input_layernorm.weight', 'layers.13.mlp.down_proj.weight', 'layers.13.mlp.gate_proj.weight', 'layers.13.mlp.up_proj.weight', 'layers.13.post_attention_layernorm.weight', 'layers.13.self_attn.k_proj.weight', 'layers.13.self_attn.o_proj.weight', 'layers.13.self_attn.q_proj.weight', 'layers.13.self_attn.v_proj.weight', 'layers.14.input_layernorm.weight', 'layers.14.mlp.down_proj.weight', 'layers.14.mlp.gate_proj.weight', 'layers.14.mlp.up_proj.weight', 'layers.14.post_attention_layernorm.weight', 'layers.14.self_attn.k_proj.weight', 'layers.14.self_attn.o_proj.weight', 'layers.14.self_attn.q_proj.weight', 'layers.14.self_attn.v_proj.weight', 'layers.15.input_layernorm.weight', 'layers.15.mlp.down_proj.weight', 'layers.15.mlp.gate_proj.weight', 'layers.15.mlp.up_proj.weight', 'layers.15.post_attention_layernorm.weight', 'layers.15.self_attn.k_proj.weight', 'layers.15.self_attn.o_proj.weight', 'layers.15.self_attn.q_proj.weight', 'layers.15.self_attn.v_proj.weight', 'layers.16.input_layernorm.weight', 'layers.16.mlp.down_proj.weight', 'layers.16.mlp.gate_proj.weight', 'layers.16.mlp.up_proj.weight', 'layers.16.post_attention_layernorm.weight', 'layers.16.self_attn.k_proj.weight', 'layers.16.self_attn.o_proj.weight', 'layers.16.self_attn.q_proj.weight', 'layers.16.self_attn.v_proj.weight', 'layers.17.input_layernorm.weight', 'layers.17.mlp.down_proj.weight', 'layers.17.mlp.gate_proj.weight', 'layers.17.mlp.up_proj.weight', 'layers.17.post_attention_layernorm.weight', 'layers.17.self_attn.k_proj.weight', 'layers.17.self_attn.o_proj.weight', 'layers.17.self_attn.q_proj.weight', 'layers.17.self_attn.v_proj.weight', 'layers.18.input_layernorm.weight', 'layers.18.mlp.down_proj.weight', 'layers.18.mlp.gate_proj.weight', 'layers.18.mlp.up_proj.weight', 'layers.18.post_attention_layernorm.weight', 'layers.18.self_attn.k_proj.weight', 'layers.18.self_attn.o_proj.weight', 'layers.18.self_attn.q_proj.weight', 'layers.18.self_attn.v_proj.weight', 'layers.19.input_layernorm.weight', 'layers.19.mlp.down_proj.weight', 'layers.19.mlp.gate_proj.weight', 'layers.19.mlp.up_proj.weight', 'layers.19.post_attention_layernorm.weight', 'layers.19.self_attn.k_proj.weight', 'layers.19.self_attn.o_proj.weight', 'layers.19.self_attn.q_proj.weight', 'layers.19.self_attn.v_proj.weight', 'layers.2.input_layernorm.weight', 'layers.2.mlp.down_proj.weight', 'layers.2.mlp.gate_proj.weight', 'layers.2.mlp.up_proj.weight', 'layers.2.post_attention_layernorm.weight', 'layers.2.self_attn.k_proj.weight', 'layers.2.self_attn.o_proj.weight', 'layers.2.self_attn.q_proj.weight', 'layers.2.self_attn.v_proj.weight', 'layers.20.input_layernorm.weight', 'layers.20.mlp.down_proj.weight', 'layers.20.mlp.gate_proj.weight', 'layers.20.mlp.up_proj.weight', 'layers.20.post_attention_layernorm.weight', 'layers.20.self_attn.k_proj.weight', 'layers.20.self_attn.o_proj.weight', 'layers.20.self_attn.q_proj.weight', 'layers.20.self_attn.v_proj.weight', 'layers.21.input_layernorm.weight', 'layers.21.mlp.down_proj.weight', 'layers.21.mlp.gate_proj.weight', 'layers.21.mlp.up_proj.weight', 'layers.21.post_attention_layernorm.weight', 'layers.21.self_attn.k_proj.weight', 'layers.21.self_attn.o_proj.weight', 'layers.21.self_attn.q_proj.weight', 'layers.21.self_attn.v_proj.weight', 'layers.3.input_layernorm.weight', 'layers.3.mlp.down_proj.weight', 'layers.3.mlp.gate_proj.weight', 'layers.3.mlp.up_proj.weight', 'layers.3.post_attention_layernorm.weight', 'layers.3.self_attn.k_proj.weight', 'layers.3.self_attn.o_proj.weight', 'layers.3.self_attn.q_proj.weight', 'layers.3.self_attn.v_proj.weight', 'layers.4.input_layernorm.weight', 'layers.4.mlp.down_proj.weight', 'layers.4.mlp.gate_proj.weight', 'layers.4.mlp.up_proj.weight', 'layers.4.post_attention_layernorm.weight', 'layers.4.self_attn.k_proj.weight', 'layers.4.self_attn.o_proj.weight', 'layers.4.self_attn.q_proj.weight', 'layers.4.self_attn.v_proj.weight', 'layers.5.input_layernorm.weight', 'layers.5.mlp.down_proj.weight', 'layers.5.mlp.gate_proj.weight', 'layers.5.mlp.up_proj.weight', 'layers.5.post_attention_layernorm.weight', 'layers.5.self_attn.k_proj.weight', 'layers.5.self_attn.o_proj.weight', 'layers.5.self_attn.q_proj.weight', 'layers.5.self_attn.v_proj.weight', 'layers.6.input_layernorm.weight', 'layers.6.mlp.down_proj.weight', 'layers.6.mlp.gate_proj.weight', 'layers.6.mlp.up_proj.weight', 'layers.6.post_attention_layernorm.weight', 'layers.6.self_attn.k_proj.weight', 'layers.6.self_attn.o_proj.weight', 'layers.6.self_attn.q_proj.weight', 'layers.6.self_attn.v_proj.weight', 'layers.7.input_layernorm.weight', 'layers.7.mlp.down_proj.weight', 'layers.7.mlp.gate_proj.weight', 'layers.7.mlp.up_proj.weight', 'layers.7.post_attention_layernorm.weight', 'layers.7.self_attn.k_proj.weight', 'layers.7.self_attn.o_proj.weight', 'layers.7.self_attn.q_proj.weight', 'layers.7.self_attn.v_proj.weight', 'layers.8.input_layernorm.weight', 'layers.8.mlp.down_proj.weight', 'layers.8.mlp.gate_proj.weight', 'layers.8.mlp.up_proj.weight', 'layers.8.post_attention_layernorm.weight', 'layers.8.self_attn.k_proj.weight', 'layers.8.self_attn.o_proj.weight', 'layers.8.self_attn.q_proj.weight', 'layers.8.self_attn.v_proj.weight', 'layers.9.input_layernorm.weight', 'layers.9.mlp.down_proj.weight', 'layers.9.mlp.gate_proj.weight', 'layers.9.mlp.up_proj.weight', 'layers.9.post_attention_layernorm.weight', 'layers.9.self_attn.k_proj.weight', 'layers.9.self_attn.o_proj.weight', 'layers.9.self_attn.q_proj.weight', 'layers.9.self_attn.v_proj.weight', 'lm_head.weight', 'norm.weight']\n",
|
124 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
125 |
+
]
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"output_type": "stream",
|
129 |
+
"name": "stdout",
|
130 |
+
"text": [
|
131 |
+
"['### Instruction:\\nContinue the Fibonacci sequence.\\n\\n### Input:\\n\\n1, 1, 2, 3, 5, 8\\n\\n### Response:\\n']\n"
|
132 |
+
]
|
133 |
+
}
|
134 |
+
]
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"cell_type": "code",
|
138 |
+
"source": [
|
139 |
+
"if False:\n",
|
140 |
+
" from unsloth import FastLanguageModel\n",
|
141 |
+
" model, tokenizer = FastLanguageModel.from_pretrained(\n",
|
142 |
+
" model_name = \"lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
|
143 |
+
" max_seq_length = max_seq_length,\n",
|
144 |
+
" dtype = dtype,\n",
|
145 |
+
" load_in_4bit = load_in_4bit,\n",
|
146 |
+
" )\n",
|
147 |
+
" FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
|
148 |
+
"\n",
|
149 |
+
"# alpaca_prompt = You MUST copy from above!\n",
|
150 |
+
"\n",
|
151 |
+
"inputs = tokenizer(\n",
|
152 |
+
"[\n",
|
153 |
+
" alpaca_prompt.format(\n",
|
154 |
+
" \"which country Haris Hota live\", # instruction\n",
|
155 |
+
" \"Haris Hota\", # input\n",
|
156 |
+
" \"\", # output - leave this blank for generation!\n",
|
157 |
+
" )\n",
|
158 |
+
"], return_tensors = \"pt\").to(\"cuda\")\n",
|
159 |
+
"\n",
|
160 |
+
"from transformers import TextStreamer\n",
|
161 |
+
"text_streamer = TextStreamer(tokenizer)\n",
|
162 |
+
"_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 64)"
|
163 |
+
],
|
164 |
+
"metadata": {
|
165 |
+
"colab": {
|
166 |
+
"base_uri": "https://localhost:8080/"
|
167 |
+
},
|
168 |
+
"id": "IuUufGQz5BBR",
|
169 |
+
"outputId": "07f59e9c-a09e-484c-df1f-406f6e75e36b"
|
170 |
+
},
|
171 |
+
"execution_count": 26,
|
172 |
+
"outputs": [
|
173 |
+
{
|
174 |
+
"output_type": "stream",
|
175 |
+
"name": "stdout",
|
176 |
+
"text": [
|
177 |
+
"<s> ### Instruction:\n",
|
178 |
+
"which country Haris Hota live\n",
|
179 |
+
"\n",
|
180 |
+
"### Input:\n",
|
181 |
+
"\n",
|
182 |
+
"Haris Hota\n",
|
183 |
+
"\n",
|
184 |
+
"### Response:\n",
|
185 |
+
"<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>\n"
|
186 |
+
]
|
187 |
+
}
|
188 |
+
]
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"cell_type": "code",
|
192 |
+
"source": [],
|
193 |
+
"metadata": {
|
194 |
+
"id": "aTqhZ5y1533Y"
|
195 |
+
},
|
196 |
+
"execution_count": null,
|
197 |
+
"outputs": []
|
198 |
+
}
|
199 |
+
]
|
200 |
+
}
|