Spaces:
Running
Running
[MOD] Quantizied Carballo-Cerebras
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
from gradio.components import Slider
|
|
|
4 |
import torch
|
5 |
-
from transformers import pipeline
|
6 |
|
7 |
# Model, information and examples ----------------------------------------------
|
8 |
-
MODEL_NAMES = ["Carballo-bloom-1.3B","Carballo-cerebras-1.3B"
|
9 |
markdown_description_gl = """
|
10 |
[*English below*]
|
11 |
|
@@ -57,11 +57,21 @@ fronted_theme = 'Soft'
|
|
57 |
# Model charge ---------------------------------------------------------
|
58 |
model_id_bloom = "proxectonos/Carballo-bloom-1.3B"
|
59 |
generator_model_bloom = pipeline("text-generation", model=model_id_bloom)
|
60 |
-
model_id_cerebras = "proxectonos/Carballo-cerebras-1.3B"
|
61 |
-
generator_model_cerebras = pipeline("text-generation", model=model_id_cerebras)
|
62 |
model_id_carvalho = "Nos-PT/Carvalho_pt-gl-1.3B"
|
63 |
generator_model_carvalho = pipeline("text-generation", model=model_id_carvalho)
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
# Generation functions ---------------------------------------------------------
|
66 |
def get_model(model_selection):
|
67 |
if model_selection == "Carballo-bloom-1.3B":
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
from gradio.components import Slider
|
4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
5 |
import torch
|
|
|
6 |
|
7 |
# Model, information and examples ----------------------------------------------
|
8 |
+
MODEL_NAMES = ["Carballo-bloom-1.3B","Carvalho_pt-gl","Carballo-cerebras-1.3B"]
|
9 |
markdown_description_gl = """
|
10 |
[*English below*]
|
11 |
|
|
|
57 |
# Model charge ---------------------------------------------------------
|
58 |
model_id_bloom = "proxectonos/Carballo-bloom-1.3B"
|
59 |
generator_model_bloom = pipeline("text-generation", model=model_id_bloom)
|
|
|
|
|
60 |
model_id_carvalho = "Nos-PT/Carvalho_pt-gl-1.3B"
|
61 |
generator_model_carvalho = pipeline("text-generation", model=model_id_carvalho)
|
62 |
|
63 |
+
#Quantized Carballo-Cerebras (model less used) to save memory
|
64 |
+
model_id_cerebras = "proxectonos/Carballo-cerebras-1.3B"
|
65 |
+
quantization_config = BitsAndBytesConfig(
|
66 |
+
load_in_4bit=True,
|
67 |
+
bnb_4bit_quant_type="nf4",
|
68 |
+
bnb_4bit_compute_dtype=torch.float16,
|
69 |
+
)
|
70 |
+
|
71 |
+
model_quantizied = AutoModelForCausalLM.from_pretrained(model_id_cerebras, quantization_config=quantization_config)
|
72 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id_cerebras)
|
73 |
+
generator_model_cerebras = pipeline("text-generation", model=model_id_cerebras)
|
74 |
+
|
75 |
# Generation functions ---------------------------------------------------------
|
76 |
def get_model(model_selection):
|
77 |
if model_selection == "Carballo-bloom-1.3B":
|