Spaces:

Grandediw
/

lab2-2024

Runtime error

App Files Files Community

Grandediw commited on Dec 5, 2024

Commit

5baa435

verified ·

1 Parent(s): 1322782

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -21

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import os
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModel
-from safetensors.torch import load_file
 # Load the Hugging Face API token from environment variable
 token = os.getenv("HUGGINGFACE_API_TOKEN")
@@ -10,41 +9,41 @@ if not token:
     raise ValueError("HUGGINGFACE_API_TOKEN is not set. Please add it in the Secrets section of your Space.")
 # Configure device
-device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load the tokenizer and model using the token
-model_repo = "Grandediw/lora_model"
-tokenizer = AutoTokenizer.from_pretrained(model_repo, token=token)
-base_model = AutoModel.from_pretrained(model_repo, token=token)
-# Load the LoRA adapter weights
-lora_weights_path = "adapter_model.safetensors"
-lora_weights = load_file(lora_weights_path)
-# Apply LoRA weights to the base model
-for name, param in base_model.named_parameters():
-    if name in lora_weights:
-        param.data += lora_weights[name].to(device, dtype=param.dtype)
 # Move the model to the device
-base_model = base_model.to(device)
 # Define the inference function
 def infer(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    outputs = base_model(**inputs)
-    # Placeholder return, modify based on your specific model task
-    return outputs.last_hidden_state.mean(dim=1).cpu().detach().numpy()
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("## LoRA Model Inference")
     with gr.Row():
         prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
         generate_button = gr.Button("Generate")
-    output = gr.Textbox(label="Output")
     generate_button.click(fn=infer, inputs=[prompt], outputs=[output])

 import os
 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load the Hugging Face API token from environment variable
 token = os.getenv("HUGGINGFACE_API_TOKEN")
     raise ValueError("HUGGINGFACE_API_TOKEN is not set. Please add it in the Secrets section of your Space.")
 # Configure device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load the tokenizer and model using the token
+model_repo = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"
+tokenizer = AutoTokenizer.from_pretrained(model_repo, use_auth_token=token)
+model = AutoModelForCausalLM.from_pretrained(
+    model_repo,
+    use_auth_token=token,
+    device_map="auto",
+    torch_dtype=torch.float16,
+    load_in_4bit=True,
+    quantization_config={"bnb_4bit_use_double_quant": True, "bnb_4bit_quant_type": "nf4"}
+)
 # Move the model to the device
+model.to(device)
+model.eval()
 # Define the inference function
 def infer(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_length=512)
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_text
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("## Llama 3.2 3B Instruct Model Inference")
     with gr.Row():
         prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
         generate_button = gr.Button("Generate")
+    output = gr.Textbox(label="Generated Text")
     generate_button.click(fn=infer, inputs=[prompt], outputs=[output])