Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,19 @@
|
|
1 |
-
import os
|
2 |
-
os.environ['HF_HOME'] = 'E:/huggingface_cache'
|
3 |
-
|
4 |
-
import gradio as gr
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
6 |
import torch
|
|
|
7 |
|
8 |
-
#
|
9 |
tokenizer = AutoTokenizer.from_pretrained("Hawoly18/llama3.2-3B-Wolof")
|
10 |
-
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
|
|
|
|
|
11 |
|
12 |
if tokenizer.pad_token is None:
|
13 |
-
|
14 |
|
15 |
-
#
|
16 |
def generate_response(question, max_length=512):
|
17 |
input_text = f"Question: {question}\nRéponse:"
|
18 |
input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True)
|
@@ -25,23 +26,24 @@ def generate_response(question, max_length=512):
|
|
25 |
attention_mask=attention_mask,
|
26 |
pad_token_id=tokenizer.eos_token_id,
|
27 |
eos_token_id=tokenizer.eos_token_id,
|
28 |
-
num_beams=5,
|
29 |
-
no_repeat_ngram_size=2,
|
30 |
early_stopping=True
|
31 |
)
|
32 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
33 |
response = response.replace(input_text, "").strip()
|
34 |
return response
|
35 |
|
36 |
-
#
|
|
|
|
|
37 |
interface = gr.Interface(
|
38 |
fn=generate_response,
|
39 |
inputs="text",
|
40 |
outputs="text",
|
41 |
-
title="Model
|
42 |
-
description="
|
43 |
examples=[["yan jumtukaay ci xaral yi BSE moom mën a dimbali ndax moom mën woyal sama liggéey ci entrepreneur yi"]]
|
44 |
)
|
45 |
|
46 |
-
# Launch the interface
|
47 |
interface.launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
1 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
2 |
import torch
|
3 |
+
import bitsandbytes as bnb
|
4 |
|
5 |
+
# Charger le modèle quantifié en 8-bit
|
6 |
tokenizer = AutoTokenizer.from_pretrained("Hawoly18/llama3.2-3B-Wolof")
|
7 |
+
model = AutoModelForCausalLM.from_pretrained(
|
8 |
+
"Hawoly18/llama3.2-3B-Wolof",
|
9 |
+
load_in_8bit=True, # Utilise la quantification en 8-bit
|
10 |
+
device_map="auto" # Permet l'utilisation automatique des ressources (CPU ici)
|
11 |
+
)
|
12 |
|
13 |
if tokenizer.pad_token is None:
|
14 |
+
tokenizer.pad_token = tokenizer.eos_token
|
15 |
|
16 |
+
# Fonction pour générer des réponses
|
17 |
def generate_response(question, max_length=512):
|
18 |
input_text = f"Question: {question}\nRéponse:"
|
19 |
input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True)
|
|
|
26 |
attention_mask=attention_mask,
|
27 |
pad_token_id=tokenizer.eos_token_id,
|
28 |
eos_token_id=tokenizer.eos_token_id,
|
29 |
+
num_beams=5,
|
30 |
+
no_repeat_ngram_size=2,
|
31 |
early_stopping=True
|
32 |
)
|
33 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
34 |
response = response.replace(input_text, "").strip()
|
35 |
return response
|
36 |
|
37 |
+
# Interface Gradio
|
38 |
+
import gradio as gr
|
39 |
+
|
40 |
interface = gr.Interface(
|
41 |
fn=generate_response,
|
42 |
inputs="text",
|
43 |
outputs="text",
|
44 |
+
title="Model Q&A Interface",
|
45 |
+
description="Ask a question related to BSE and entrepreneurship!",
|
46 |
examples=[["yan jumtukaay ci xaral yi BSE moom mën a dimbali ndax moom mën woyal sama liggéey ci entrepreneur yi"]]
|
47 |
)
|
48 |
|
|
|
49 |
interface.launch(share=True)
|