File size: 2,272 Bytes
40b886d 7cb8b0c a534797 40b886d 7cb8b0c 40b886d 5e8b7be 40b886d a534797 40b886d a534797 7cb8b0c 40b886d 7cb8b0c 40b886d 7cb8b0c 40b886d 7cb8b0c 40b886d 7cb8b0c 40b886d 7cb8b0c 40b886d 7cb8b0c 1f2775d 7cb8b0c 1f2775d 40b886d 1f2775d 40b886d 1f2775d 40b886d 1f2775d 7cb8b0c 1f2775d 40b886d 7cb8b0c 40b886d 1f2775d 40b886d 7cb8b0c 40b886d 7cb8b0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import os
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModel
from safetensors.torch import load_file
# Load the Hugging Face API token
token = os.getenv("HUGGINGFACE_API_TOKEN")
if not token:
raise ValueError("HUGGINGFACE_API_TOKEN is not set. Please add it in the Secrets section of your Space.")
# Configure device and data type
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load the tokenizer and model
model_repo = "Grandediw/lora_model"
tokenizer = AutoTokenizer.from_pretrained(model_repo, use_auth_token=True)
base_model = AutoModel.from_pretrained(model_repo, use_auth_token=True)
# Load LoRA adapter weights
lora_weights_path = "adapter_model.safetensors" # Ensure this file is present in the same directory
lora_weights = load_file(lora_weights_path)
# Apply LoRA weights to the base model
for name, param in base_model.named_parameters():
if name in lora_weights:
param.data += lora_weights[name].to(device, dtype=param.dtype)
# Move the model to the device
base_model = base_model.to(device)
# Inference function
def infer(prompt, negative_prompt=None):
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = base_model(**inputs)
return outputs.last_hidden_state.mean(dim=1).cpu().detach().numpy() # Placeholder return
# Gradio Interface
css = """
#interface-container {
margin: 0 auto;
max-width: 700px;
padding: 15px;
border-radius: 10px;
background-color: #f9f9f9;
box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);
}
#header {
text-align: center;
font-size: 1.5em;
font-weight: bold;
margin-bottom: 20px;
color: #333;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Box(elem_id="interface-container"):
gr.Markdown("<div id='header'>LoRA Model Inference</div>")
# Input for prompt and run button
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
run_button = gr.Button("Generate Output", variant="primary")
# Display output
output = gr.Textbox(label="Output")
# Connect button with inference
run_button.click(fn=infer, inputs=[prompt], outputs=[output])
# Launch the app
if __name__ == "__main__":
demo.launch()
|