Update README.md
Browse files
README.md
CHANGED
@@ -247,6 +247,7 @@ pip install -r requirements.txt
|
|
247 |
// python setup_env.py --hf-repo your_hf_username/Falcon3-10B-Instruct-1.58bit -q i2_s // You can skip this one
|
248 |
// move the model to the folder models, then
|
249 |
python run_inference.py -m models/Falcon3-10B-Instruct-1.58bit/ggml-model-i2_s.gguf -p "What is 1.58bit quantization in LLM and why its iteresting for gpu poor people?" -cnv
|
|
|
250 |
```
|
251 |
|
252 |
## Evaluation
|
|
|
247 |
// python setup_env.py --hf-repo your_hf_username/Falcon3-10B-Instruct-1.58bit -q i2_s // You can skip this one
|
248 |
// move the model to the folder models, then
|
249 |
python run_inference.py -m models/Falcon3-10B-Instruct-1.58bit/ggml-model-i2_s.gguf -p "What is 1.58bit quantization in LLM and why its iteresting for gpu poor people?" -cnv
|
250 |
+
# 1.58-bit quantization is a method in which floating-point numbers in a neural network are represented using fewer bits, specifically 1.58 bits. This technique reduces the number of bits needed to represent a number, which can lead to improved performance and lower memory usage.
|
251 |
```
|
252 |
|
253 |
## Evaluation
|