Titobsala commited on
Commit
8fa6632
·
1 Parent(s): f975005

app para avalição do modelo treinado

Browse files
Files changed (2) hide show
  1. app.py +21 -7
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,21 +1,35 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
  # Load model and tokenizer
6
- model_name = "mlabonne/FineLlama-3.1-8B" # Replace with your model's name
 
 
 
 
 
 
 
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
 
 
 
 
9
 
10
- def generate_text(prompt, max_length, temperature):
11
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
12
 
13
  with torch.no_grad():
14
  outputs = model.generate(
15
  **inputs,
16
- max_length=max_length,
17
  temperature=temperature,
18
- num_return_sequences=1
 
19
  )
20
 
21
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -25,7 +39,7 @@ iface = gr.Interface(
25
  fn=generate_text,
26
  inputs=[
27
  gr.Textbox(lines=5, label="Enter your ESG-related prompt"),
28
- gr.Slider(50, 500, value=200, label="Maximum Length"),
29
  gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
30
  ],
31
  outputs=gr.Textbox(label="Generated ESG Report Paragraph"),
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import torch
4
 
5
  # Load model and tokenizer
6
+ model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit" # Replace with your model's name
7
+
8
+ # Configure quantization
9
+ bnb_config = BitsAndBytesConfig(
10
+ load_in_4bit=True,
11
+ bnb_4bit_use_double_quant=True,
12
+ bnb_4bit_quant_type="nf4",
13
+ bnb_4bit_compute_dtype=torch.bfloat16
14
+ )
15
+
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ model_name,
19
+ quantization_config=bnb_config,
20
+ device_map="auto",
21
+ )
22
 
23
+ def generate_text(prompt, max_new_tokens, temperature):
24
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
25
 
26
  with torch.no_grad():
27
  outputs = model.generate(
28
  **inputs,
29
+ max_new_tokens=max_new_tokens,
30
  temperature=temperature,
31
+ num_return_sequences=1,
32
+ do_sample=True,
33
  )
34
 
35
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
39
  fn=generate_text,
40
  inputs=[
41
  gr.Textbox(lines=5, label="Enter your ESG-related prompt"),
42
+ gr.Slider(50, 500, value=200, label="Maximum New Tokens"),
43
  gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
44
  ],
45
  outputs=gr.Textbox(label="Generated ESG Report Paragraph"),
requirements.txt CHANGED
@@ -2,4 +2,6 @@ huggingface_hub==0.25.2
2
 
3
  gradio
4
  transformers
5
- torch
 
 
 
2
 
3
  gradio
4
  transformers
5
+ torch
6
+ accelerate>=0.26.0
7
+ bitsandbytes