|
import os |
|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModel |
|
from safetensors.torch import load_file |
|
|
|
|
|
token = os.getenv("HUGGINGFACE_API_TOKEN") |
|
if not token: |
|
raise ValueError("HUGGINGFACE_API_TOKEN is not set. Please add it in the Secrets section of your Space.") |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
model_repo = "Grandediw/lora_model" |
|
tokenizer = AutoTokenizer.from_pretrained(model_repo, token=token) |
|
base_model = AutoModel.from_pretrained(model_repo, token=token) |
|
|
|
|
|
lora_weights_path = "adapter_model.safetensors" |
|
lora_weights = load_file(lora_weights_path) |
|
|
|
|
|
for name, param in base_model.named_parameters(): |
|
if name in lora_weights: |
|
param.data += lora_weights[name].to(device, dtype=param.dtype) |
|
|
|
|
|
base_model = base_model.to(device) |
|
|
|
|
|
def infer(prompt): |
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
outputs = base_model(**inputs) |
|
|
|
return outputs.last_hidden_state.mean(dim=1).cpu().detach().numpy() |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## LoRA Model Inference") |
|
|
|
with gr.Row(): |
|
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...") |
|
generate_button = gr.Button("Generate") |
|
|
|
output = gr.Textbox(label="Output") |
|
|
|
generate_button.click(fn=infer, inputs=[prompt], outputs=[output]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|