Grandediw commited on
Commit
5baa435
·
verified ·
1 Parent(s): 1322782

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import os
2
  import gradio as gr
3
  import torch
4
- from transformers import AutoTokenizer, AutoModel
5
- from safetensors.torch import load_file
6
 
7
  # Load the Hugging Face API token from environment variable
8
  token = os.getenv("HUGGINGFACE_API_TOKEN")
@@ -10,41 +9,41 @@ if not token:
10
  raise ValueError("HUGGINGFACE_API_TOKEN is not set. Please add it in the Secrets section of your Space.")
11
 
12
  # Configure device
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  # Load the tokenizer and model using the token
16
- model_repo = "Grandediw/lora_model"
17
- tokenizer = AutoTokenizer.from_pretrained(model_repo, token=token)
18
- base_model = AutoModel.from_pretrained(model_repo, token=token)
19
-
20
- # Load the LoRA adapter weights
21
- lora_weights_path = "adapter_model.safetensors"
22
- lora_weights = load_file(lora_weights_path)
23
-
24
- # Apply LoRA weights to the base model
25
- for name, param in base_model.named_parameters():
26
- if name in lora_weights:
27
- param.data += lora_weights[name].to(device, dtype=param.dtype)
28
 
29
  # Move the model to the device
30
- base_model = base_model.to(device)
 
31
 
32
  # Define the inference function
33
  def infer(prompt):
34
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
35
- outputs = base_model(**inputs)
36
- # Placeholder return, modify based on your specific model task
37
- return outputs.last_hidden_state.mean(dim=1).cpu().detach().numpy()
 
38
 
39
  # Gradio interface
40
  with gr.Blocks() as demo:
41
- gr.Markdown("## LoRA Model Inference")
42
 
43
  with gr.Row():
44
  prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
45
  generate_button = gr.Button("Generate")
46
 
47
- output = gr.Textbox(label="Output")
48
 
49
  generate_button.click(fn=infer, inputs=[prompt], outputs=[output])
50
 
 
1
  import os
2
  import gradio as gr
3
  import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
5
 
6
  # Load the Hugging Face API token from environment variable
7
  token = os.getenv("HUGGINGFACE_API_TOKEN")
 
9
  raise ValueError("HUGGINGFACE_API_TOKEN is not set. Please add it in the Secrets section of your Space.")
10
 
11
  # Configure device
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
 
14
  # Load the tokenizer and model using the token
15
+ model_repo = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"
16
+ tokenizer = AutoTokenizer.from_pretrained(model_repo, use_auth_token=token)
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ model_repo,
19
+ use_auth_token=token,
20
+ device_map="auto",
21
+ torch_dtype=torch.float16,
22
+ load_in_4bit=True,
23
+ quantization_config={"bnb_4bit_use_double_quant": True, "bnb_4bit_quant_type": "nf4"}
24
+ )
 
 
25
 
26
  # Move the model to the device
27
+ model.to(device)
28
+ model.eval()
29
 
30
  # Define the inference function
31
  def infer(prompt):
32
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
33
+ with torch.no_grad():
34
+ outputs = model.generate(**inputs, max_length=512)
35
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
+ return generated_text
37
 
38
  # Gradio interface
39
  with gr.Blocks() as demo:
40
+ gr.Markdown("## Llama 3.2 3B Instruct Model Inference")
41
 
42
  with gr.Row():
43
  prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
44
  generate_button = gr.Button("Generate")
45
 
46
+ output = gr.Textbox(label="Generated Text")
47
 
48
  generate_button.click(fn=infer, inputs=[prompt], outputs=[output])
49