|
import os |
|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModel |
|
from safetensors.torch import load_file |
|
|
|
|
|
token = os.getenv("HUGGINGFACE_API_TOKEN") |
|
if not token: |
|
raise ValueError("HUGGINGFACE_API_TOKEN is not set. Please add it in the Secrets section of your Space.") |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
model_repo = "Grandediw/lora_model" |
|
tokenizer = AutoTokenizer.from_pretrained(model_repo, use_auth_token=True) |
|
base_model = AutoModel.from_pretrained(model_repo, use_auth_token=True) |
|
|
|
|
|
lora_weights_path = "adapter_model.safetensors" |
|
lora_weights = load_file(lora_weights_path) |
|
|
|
|
|
for name, param in base_model.named_parameters(): |
|
if name in lora_weights: |
|
param.data += lora_weights[name].to(device, dtype=param.dtype) |
|
|
|
|
|
base_model = base_model.to(device) |
|
|
|
|
|
def infer(prompt, negative_prompt=None): |
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
outputs = base_model(**inputs) |
|
return outputs.last_hidden_state.mean(dim=1).cpu().detach().numpy() |
|
|
|
|
|
css = """ |
|
#interface-container { |
|
margin: 0 auto; |
|
max-width: 700px; |
|
padding: 15px; |
|
border-radius: 10px; |
|
background-color: #f9f9f9; |
|
box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1); |
|
} |
|
#header { |
|
text-align: center; |
|
font-size: 1.5em; |
|
font-weight: bold; |
|
margin-bottom: 20px; |
|
color: #333; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css) as demo: |
|
with gr.Box(elem_id="interface-container"): |
|
gr.Markdown("<div id='header'>LoRA Model Inference</div>") |
|
|
|
|
|
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...") |
|
run_button = gr.Button("Generate Output", variant="primary") |
|
|
|
|
|
output = gr.Textbox(label="Output") |
|
|
|
|
|
run_button.click(fn=infer, inputs=[prompt], outputs=[output]) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|