Spaces:

jcrissa
/

prompt_optimizer

Runtime error

App Files Files Community

jcrissa commited on Feb 18

Commit

b80a390

1 Parent(s): 3c2f453

edit app.py

Browse files

Files changed (1) hide show

app.py +14 -12

app.py CHANGED Viewed

@@ -7,11 +7,10 @@ from transformers import AutoTokenizer
 # Load your fine-tuned Phi-3 model from Hugging Face
 MODEL_NAME = "jcrissa/phi3-new-t2i"
-# Check if CUDA is available, otherwise fall back to CPU
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Function to load the Phi-3 model and tokenizer
-@spaces.GPU  # Reintroduced spaces.GPU decorator for GPU setup
 def load_phi3_model():
     try:
         # Load the Phi-3 model and tokenizer from Hugging Face
@@ -22,6 +21,9 @@ def load_phi3_model():
         )
         model.to(device)
         # Configure tokenizer settings
         tokenizer.pad_token = tokenizer.eos_token
         tokenizer.padding_side = "left"
@@ -37,6 +39,7 @@ phi3_model, phi3_tokenizer = load_phi3_model()
 if phi3_model is None or phi3_tokenizer is None:
     raise RuntimeError("Model and tokenizer could not be loaded. Please check the Hugging Face model path or network connection.")
 # Function to generate text using Phi-3
 def generate(plain_text):
     try:
@@ -44,12 +47,11 @@ def generate(plain_text):
         input_ids = phi3_tokenizer(plain_text.strip(), return_tensors="pt").input_ids.to(device)
         eos_id = phi3_tokenizer.eos_token_id
-        # Generate the output from the model
         outputs = phi3_model.generate(
             input_ids,
-            do_sample=True,
-            max_new_tokens=75,
-            num_beams=8,
             num_return_sequences=1,
             eos_token_id=eos_id,
             pad_token_id=eos_id,
@@ -62,18 +64,18 @@ def generate(plain_text):
     except Exception as e:
         return f"Error during text generation: {e}"
 # Setup Gradio Interface
 txt = grad.Textbox(lines=1, label="Input Text", placeholder="Enter your prompt")
 out = grad.Textbox(lines=1, label="Generated Text")
-# Launch Gradio Interface with ZeroGPU-compatible setup
-gr.Interface(
     fn=generate,
     inputs=txt,
     outputs=out,
     title="Fine-Tuned Phi-3 Model",
     description="This demo uses a fine-tuned Phi-3 model to optimize text prompts.",
-    allow_flagging="never",
     cache_examples=False,
     theme="default"
-).launch(enable_queue=True, debug=True)

 # Load your fine-tuned Phi-3 model from Hugging Face
 MODEL_NAME = "jcrissa/phi3-new-t2i"
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+device = "cuda"
 def load_phi3_model():
     try:
         # Load the Phi-3 model and tokenizer from Hugging Face
         )
         model.to(device)
+        # Prepare the model for inference
+        model = FastLanguageModel.for_inference(model)  # This is the necessary line
         # Configure tokenizer settings
         tokenizer.pad_token = tokenizer.eos_token
         tokenizer.padding_side = "left"
 if phi3_model is None or phi3_tokenizer is None:
     raise RuntimeError("Model and tokenizer could not be loaded. Please check the Hugging Face model path or network connection.")
+@spaces.GPU(duration=120)
 # Function to generate text using Phi-3
 def generate(plain_text):
     try:
         input_ids = phi3_tokenizer(plain_text.strip(), return_tensors="pt").input_ids.to(device)
         eos_id = phi3_tokenizer.eos_token_id
+        # Generate the output from the model using sampling instead of beam search
         outputs = phi3_model.generate(
             input_ids,
+            do_sample=True,  # Use sampling instead of beam search
+            max_new_tokens=75,
             num_return_sequences=1,
             eos_token_id=eos_id,
             pad_token_id=eos_id,
     except Exception as e:
         return f"Error during text generation: {e}"
 # Setup Gradio Interface
 txt = grad.Textbox(lines=1, label="Input Text", placeholder="Enter your prompt")
 out = grad.Textbox(lines=1, label="Generated Text")
+grad.Interface(
     fn=generate,
     inputs=txt,
     outputs=out,
     title="Fine-Tuned Phi-3 Model",
     description="This demo uses a fine-tuned Phi-3 model to optimize text prompts.",
+    flagging_mode="never",  # Replace `allow_flagging` with `flagging_mode`
     cache_examples=False,
     theme="default"
+).launch(share=True)  # Use `queue=True` instead of `enable_queue`