Spaces:

sbicy
/

marge

Sleeping

App Files Files Community

sbicy commited on Nov 4, 2024

Commit

1e8fe07

verified ·

1 Parent(s): a830b73

trying to update to use Zero GPU

Browse files

Files changed (1) hide show

app.py +16 -11

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import gradio as gr
@@ -18,27 +19,27 @@ wandb.login(key=wandb_api_key)
 # Define function to load model and pipeline dynamically
 def load_pipeline(model_name, fine_tuned=False):
-    # Define model paths for pre-trained and fine-tuned versions
     paths = {
-        "gpt2": ("gpt2-medium", "sbicy/finetuned-gpt2"),
-        "gpt_neo": ("EleutherAI/gpt-neo-1.3B", "sbicy/finetuned-gpt-neo"),
-        "gpt_j": ("EleutherAI/gpt-j-6B", "sbicy/finetuned-gpt-j")
     }
     pretrained_model_name, finetuned_model_path = paths[model_name]
     model_path = finetuned_model_path if fine_tuned else pretrained_model_name
     # Load model and tokenizer
-    model = AutoModelForCausalLM.from_pretrained(model_path, use_auth_token=hf_api_key)
-    tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=hf_api_key)
     tokenizer.pad_token = tokenizer.eos_token
-    # Set up pipeline with specified device
-    return pipeline("text-generation", model=model, tokenizer=tokenizer)
-# Define Gradio app function
 def compare_single_model(prompt, model_choice, temperature, top_p, max_length):
-    # Load pre-trained and fine-tuned pipelines
     pretrained_pipeline = load_pipeline(model_choice, fine_tuned=False)
     finetuned_pipeline = load_pipeline(model_choice, fine_tuned=True)
@@ -46,9 +47,13 @@ def compare_single_model(prompt, model_choice, temperature, top_p, max_length):
     pretrained_response = pretrained_pipeline(prompt, temperature=temperature, top_p=top_p, max_length=int(max_length))[0]["generated_text"]
     finetuned_response = finetuned_pipeline(prompt, temperature=temperature, top_p=top_p, max_length=int(max_length))[0]["generated_text"]
     return pretrained_response, finetuned_response
-# Gradio interface setup
 interface = gr.Interface(
     fn=compare_single_model,
     inputs=[

+import spaces
 import os
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import gradio as gr
 # Define function to load model and pipeline dynamically
 def load_pipeline(model_name, fine_tuned=False):
+    # Set model paths for pre-trained and fine-tuned versions
     paths = {
+        "gpt2": ("gpt2-medium", "path/to/finetuned_gpt2"),
+        "gpt_neo": ("EleutherAI/gpt-neo-1.3B", "path/to/finetuned_gpt_neo"),
+        "gpt_j": ("EleutherAI/gpt-j-6B", "path/to/finetuned_gpt_j")
     }
     pretrained_model_name, finetuned_model_path = paths[model_name]
     model_path = finetuned_model_path if fine_tuned else pretrained_model_name
     # Load model and tokenizer
+    model = AutoModelForCausalLM.from_pretrained(model_path)
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
     tokenizer.pad_token = tokenizer.eos_token
+    # Set up pipeline with GPU
+    return pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
+# Add the GPU decorator to the generate function
+@spaces.GPU(duration=120)  # Specify duration as needed
 def compare_single_model(prompt, model_choice, temperature, top_p, max_length):
     pretrained_pipeline = load_pipeline(model_choice, fine_tuned=False)
     finetuned_pipeline = load_pipeline(model_choice, fine_tuned=True)
     pretrained_response = pretrained_pipeline(prompt, temperature=temperature, top_p=top_p, max_length=int(max_length))[0]["generated_text"]
     finetuned_response = finetuned_pipeline(prompt, temperature=temperature, top_p=top_p, max_length=int(max_length))[0]["generated_text"]
+    # Free up memory after use
+    del pretrained_pipeline, finetuned_pipeline
+    torch.cuda.empty_cache()
     return pretrained_response, finetuned_response
+# Gradio interface
 interface = gr.Interface(
     fn=compare_single_model,
     inputs=[