Hawoly18 commited on
Commit
a6c0613
·
verified ·
1 Parent(s): ab296df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -14
app.py CHANGED
@@ -1,18 +1,19 @@
1
- import os
2
- os.environ['HF_HOME'] = 'E:/huggingface_cache'
3
-
4
- import gradio as gr
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
  import torch
 
7
 
8
- # Load the model and tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained("Hawoly18/llama3.2-3B-Wolof")
10
- model = AutoModelForCausalLM.from_pretrained("Hawoly18/llama3.2-3B-Wolof")
 
 
 
 
11
 
12
  if tokenizer.pad_token is None:
13
- tokenizer.pad_token = tokenizer.eos_token
14
 
15
- # Function to generate responses
16
  def generate_response(question, max_length=512):
17
  input_text = f"Question: {question}\nRéponse:"
18
  input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True)
@@ -25,23 +26,24 @@ def generate_response(question, max_length=512):
25
  attention_mask=attention_mask,
26
  pad_token_id=tokenizer.eos_token_id,
27
  eos_token_id=tokenizer.eos_token_id,
28
- num_beams=5, # Beam search for better quality
29
- no_repeat_ngram_size=2, # Prevent n-gram repetition
30
  early_stopping=True
31
  )
32
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
33
  response = response.replace(input_text, "").strip()
34
  return response
35
 
36
- # Define the Gradio interface
 
 
37
  interface = gr.Interface(
38
  fn=generate_response,
39
  inputs="text",
40
  outputs="text",
41
- title="Model Adia Géneration de Réponse en Wolof",
42
- description="Posez une question relative à l'entrepreneuriat en Afrique",
43
  examples=[["yan jumtukaay ci xaral yi BSE moom mën a dimbali ndax moom mën woyal sama liggéey ci entrepreneur yi"]]
44
  )
45
 
46
- # Launch the interface
47
  interface.launch(share=True)
 
 
 
 
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
+ import bitsandbytes as bnb
4
 
5
+ # Charger le modèle quantifié en 8-bit
6
  tokenizer = AutoTokenizer.from_pretrained("Hawoly18/llama3.2-3B-Wolof")
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ "Hawoly18/llama3.2-3B-Wolof",
9
+ load_in_8bit=True, # Utilise la quantification en 8-bit
10
+ device_map="auto" # Permet l'utilisation automatique des ressources (CPU ici)
11
+ )
12
 
13
  if tokenizer.pad_token is None:
14
+ tokenizer.pad_token = tokenizer.eos_token
15
 
16
+ # Fonction pour générer des réponses
17
  def generate_response(question, max_length=512):
18
  input_text = f"Question: {question}\nRéponse:"
19
  input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True)
 
26
  attention_mask=attention_mask,
27
  pad_token_id=tokenizer.eos_token_id,
28
  eos_token_id=tokenizer.eos_token_id,
29
+ num_beams=5,
30
+ no_repeat_ngram_size=2,
31
  early_stopping=True
32
  )
33
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
34
  response = response.replace(input_text, "").strip()
35
  return response
36
 
37
+ # Interface Gradio
38
+ import gradio as gr
39
+
40
  interface = gr.Interface(
41
  fn=generate_response,
42
  inputs="text",
43
  outputs="text",
44
+ title="Model Q&A Interface",
45
+ description="Ask a question related to BSE and entrepreneurship!",
46
  examples=[["yan jumtukaay ci xaral yi BSE moom mën a dimbali ndax moom mën woyal sama liggéey ci entrepreneur yi"]]
47
  )
48
 
 
49
  interface.launch(share=True)