Mollel commited on
Commit
7d4865c
·
verified ·
1 Parent(s): fef2aa9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -65
README.md CHANGED
@@ -1,6 +1,7 @@
1
  ---
2
  language:
3
  - en
 
4
  license: apache-2.0
5
  tags:
6
  - text-generation-inference
@@ -20,81 +21,23 @@ base_model: unsloth/gemma-7b-bnb-4bit
20
  This gemma model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
21
 
22
 
23
- # Inference With Unsloth on colab
24
-
25
-
26
- ```python3
27
-
28
-
29
- import torch
30
- major_version, minor_version = torch.cuda.get_device_capability()
31
-
32
-
33
- !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
34
- if major_version >= 8:
35
- # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
36
- !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
37
- else:
38
- # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
39
- !pip install --no-deps xformers trl peft accelerate bitsandbytes
40
- pass
41
-
42
-
43
-
44
- from unsloth import FastLanguageModel
45
- import torch
46
- max_seq_length = 2048
47
- dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
48
- load_in_4bit = False
49
- model, tokenizer = FastLanguageModel.from_pretrained(
50
- model_name = "Mollel/Gemma_Swahili_Mollel_1_epoch",
51
- max_seq_length = max_seq_length,
52
- dtype = dtype,
53
- load_in_4bit = load_in_4bit,
54
- device_map="auto"
55
- )
56
- FastLanguageModel.for_inference(model) # Enable native 2x faster inference
57
-
58
- input_prompt = """
59
- ### Instruction:
60
- {}
61
-
62
- ### Input:
63
- {}
64
-
65
- ### Response:
66
- {}"""
67
-
68
- input_text = input_prompt.format(
69
- "دیئے گئے موضوع کے بارے میں ایک مختصر پیراگراف لکھیں۔", # instruction
70
- "قابل تجدید توانائی کے استعمال کی اہمیت", # input
71
- "", # output - leave this blank for generation!
72
- )
73
-
74
- inputs = tokenizer([input_text], return_tensors = "pt").to("cuda")
75
-
76
- outputs = model.generate(**inputs, max_new_tokens = 300, use_cache = True)
77
-
78
- response = tokenizer.batch_decode(outputs)
79
-
80
- ```
81
-
82
 
83
 
84
  # Inference With Inference with HuggingFace transformers
85
 
86
 
87
-
88
-
89
  ```python3
90
 
 
 
91
  from peft import AutoPeftModelForCausalLM
92
  from transformers import AutoTokenizer
93
 
94
  model = AutoPeftModelForCausalLM.from_pretrained(
95
- "Xhaheen/Gemma_Urdu_Shaheen_1_epoch",
96
  load_in_4bit = False
97
  )
 
98
  tokenizer = AutoTokenizer.from_pretrained("Mollel/Gemma_Swahili_Mollel_1_epoch")
99
 
100
 
@@ -111,8 +54,8 @@ input_prompt = """
111
 
112
 
113
  input_text = input_prompt.format(
114
- "دیئے گئے موضوع کے بارے میں ایک مختصر پیراگراف لکھیں۔", # instruction
115
- "قابل تجدید توانائی کے استعمال کی اہمیت", # input
116
  "", # output - leave this blank for generation!
117
  )
118
 
@@ -121,6 +64,8 @@ inputs = tokenizer([input_text], return_tensors = "pt").to("cuda")
121
  outputs = model.generate(**inputs, max_new_tokens = 300, use_cache = True)
122
  response = tokenizer.batch_decode(outputs)[0]
123
 
 
 
124
  ```
125
 
126
- [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
 
1
  ---
2
  language:
3
  - en
4
+ - sw
5
  license: apache-2.0
6
  tags:
7
  - text-generation-inference
 
21
  This gemma model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  # Inference With Inference with HuggingFace transformers
27
 
28
 
 
 
29
  ```python3
30
 
31
+ !pip install transformers peft accelerate bitsandbytes
32
+
33
  from peft import AutoPeftModelForCausalLM
34
  from transformers import AutoTokenizer
35
 
36
  model = AutoPeftModelForCausalLM.from_pretrained(
37
+ "Mollel/Gemma_Swahili_Mollel_1_epoch",
38
  load_in_4bit = False
39
  )
40
+
41
  tokenizer = AutoTokenizer.from_pretrained("Mollel/Gemma_Swahili_Mollel_1_epoch")
42
 
43
 
 
54
 
55
 
56
  input_text = input_prompt.format(
57
+ "Andika aya fupi kuhusu mada iliyotolewa.", # instruction
58
+ "Umuhimu wa kutumia nishati inayoweza kurejeshwa", # input
59
  "", # output - leave this blank for generation!
60
  )
61
 
 
64
  outputs = model.generate(**inputs, max_new_tokens = 300, use_cache = True)
65
  response = tokenizer.batch_decode(outputs)[0]
66
 
67
+ print(response)
68
+
69
  ```
70
 
71
+ [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)