marge / app.py
sbicy's picture
trying to update to use Zero GPU
1e8fe07 verified
raw
history blame
3.04 kB
import spaces
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr
import wandb
# Retrieve Hugging Face and W&B API keys from environment variables
hf_api_key = os.getenv("HF_TOKEN")
wandb_api_key = os.getenv("WANDB_API_KEY")
# Validate API keys
if not hf_api_key:
raise ValueError("Hugging Face API key not found. Please ensure it's set as a secret in the Space.")
if not wandb_api_key:
raise ValueError("Weights & Biases API key not found. Please ensure it's set as a secret in the Space.")
# Configure W&B (if using for tracking)
wandb.login(key=wandb_api_key)
# Define function to load model and pipeline dynamically
def load_pipeline(model_name, fine_tuned=False):
# Set model paths for pre-trained and fine-tuned versions
paths = {
"gpt2": ("gpt2-medium", "path/to/finetuned_gpt2"),
"gpt_neo": ("EleutherAI/gpt-neo-1.3B", "path/to/finetuned_gpt_neo"),
"gpt_j": ("EleutherAI/gpt-j-6B", "path/to/finetuned_gpt_j")
}
pretrained_model_name, finetuned_model_path = paths[model_name]
model_path = finetuned_model_path if fine_tuned else pretrained_model_name
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
# Set up pipeline with GPU
return pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
# Add the GPU decorator to the generate function
@spaces.GPU(duration=120) # Specify duration as needed
def compare_single_model(prompt, model_choice, temperature, top_p, max_length):
pretrained_pipeline = load_pipeline(model_choice, fine_tuned=False)
finetuned_pipeline = load_pipeline(model_choice, fine_tuned=True)
# Generate responses
pretrained_response = pretrained_pipeline(prompt, temperature=temperature, top_p=top_p, max_length=int(max_length))[0]["generated_text"]
finetuned_response = finetuned_pipeline(prompt, temperature=temperature, top_p=top_p, max_length=int(max_length))[0]["generated_text"]
# Free up memory after use
del pretrained_pipeline, finetuned_pipeline
torch.cuda.empty_cache()
return pretrained_response, finetuned_response
# Gradio interface
interface = gr.Interface(
fn=compare_single_model,
inputs=[
"text",
gr.Dropdown(choices=["gpt2", "gpt_neo", "gpt_j"], label="Select Model"),
gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top-p"),
gr.Slider(10, 100, value=50, step=10, label="Max Length")
],
outputs=[
gr.Textbox(label="Pre-trained Response"),
gr.Textbox(label="Fine-tuned Response")
],
title="Single Model Comparison: Pre-trained vs Fine-tuned",
description="Enter a prompt and select a model to compare responses from pre-trained and fine-tuned versions. Adjust parameters with the sliders."
)
# Launch the interface
interface.launch()