File size: 2,272 Bytes
40b886d
7cb8b0c
a534797
40b886d
 
7cb8b0c
40b886d
5e8b7be
 
40b886d
a534797
40b886d
a534797
7cb8b0c
40b886d
 
 
 
7cb8b0c
40b886d
 
 
7cb8b0c
40b886d
 
 
 
7cb8b0c
40b886d
 
7cb8b0c
40b886d
 
 
 
 
7cb8b0c
40b886d
7cb8b0c
1f2775d
7cb8b0c
1f2775d
40b886d
1f2775d
 
40b886d
1f2775d
 
 
 
40b886d
1f2775d
 
 
7cb8b0c
 
 
1f2775d
40b886d
 
 
 
 
7cb8b0c
40b886d
 
1f2775d
40b886d
 
7cb8b0c
40b886d
7cb8b0c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModel
from safetensors.torch import load_file

# Load the Hugging Face API token
token = os.getenv("HUGGINGFACE_API_TOKEN")
if not token:
    raise ValueError("HUGGINGFACE_API_TOKEN is not set. Please add it in the Secrets section of your Space.")

# Configure device and data type
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the tokenizer and model
model_repo = "Grandediw/lora_model"
tokenizer = AutoTokenizer.from_pretrained(model_repo, use_auth_token=True)
base_model = AutoModel.from_pretrained(model_repo, use_auth_token=True)

# Load LoRA adapter weights
lora_weights_path = "adapter_model.safetensors"  # Ensure this file is present in the same directory
lora_weights = load_file(lora_weights_path)

# Apply LoRA weights to the base model
for name, param in base_model.named_parameters():
    if name in lora_weights:
        param.data += lora_weights[name].to(device, dtype=param.dtype)

# Move the model to the device
base_model = base_model.to(device)

# Inference function
def infer(prompt, negative_prompt=None):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = base_model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).cpu().detach().numpy()  # Placeholder return

# Gradio Interface
css = """
#interface-container {
    margin: 0 auto;
    max-width: 700px;
    padding: 15px;
    border-radius: 10px;
    background-color: #f9f9f9;
    box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);
}
#header {
    text-align: center;
    font-size: 1.5em;
    font-weight: bold;
    margin-bottom: 20px;
    color: #333;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Box(elem_id="interface-container"):
        gr.Markdown("<div id='header'>LoRA Model Inference</div>")
        
        # Input for prompt and run button
        prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
        run_button = gr.Button("Generate Output", variant="primary")

        # Display output
        output = gr.Textbox(label="Output")

        # Connect button with inference
        run_button.click(fn=infer, inputs=[prompt], outputs=[output])

# Launch the app
if __name__ == "__main__":
    demo.launch()