Spaces:
Runtime error
Runtime error
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from peft import PeftModel | |
import gradio as gr | |
# Step 1: Load base model | |
base_model_name = "meta-llama/Llama-3.3-70B-Instruct" | |
adapter_repo = "daresearch/Llama-3.3-70B-ft-exec-roles" | |
base_model = AutoModelForCausalLM.from_pretrained( | |
base_model_name, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
) | |
# Step 2: Load LoRA adapter | |
model_with_adapter = PeftModel.from_pretrained( | |
base_model, | |
adapter_repo, | |
device_map="auto", | |
) | |
print(f"Loaded LoRA adapter from {adapter_repo}") | |
# Verify adapter configuration | |
print(model_with_adapter.config) | |
# Step 3: Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
# Step 4: Define inference function | |
def generate_text(prompt, max_length=1024): | |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to("cuda") | |
outputs = model_with_adapter.generate(**inputs, max_length=max_length) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Step 5: Create Gradio interface | |
iface = gr.Interface( | |
fn=generate_text, | |
inputs=[ | |
gr.Textbox(label="Prompt", placeholder="Enter your text prompt here..."), | |
gr.Slider(label="Max Length", minimum=50, maximum=1024, step=10, value=256), | |
], | |
outputs="text", | |
title="LLaMA + LoRA Text Generator", | |
description="Generate text using a LLaMA model with LoRA adapters." | |
) | |
# Step 6: Launch Gradio app | |
if __name__ == "__main__": | |
iface.launch() | |