Spaces:

zhiqiulin
/

VQAScore

Running on Zero

App Files Files Community

zhiqiulin commited on May 14, 2024

Commit

ef0de87

verified ·

1 Parent(s): 68d8a12

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -15

app.py CHANGED Viewed

@@ -1,39 +1,34 @@
 import gradio as gr
 import spaces
-# torch.autocast = lambda device_type, dtype: torch.autocast(device_type, torch.float)
-# Initialize the model only once
-# if torch.cuda.is_available():
-# model_pipe = VQAScore(model="clip-flant5-xl", device="cpu")  # our recommended scoring model
-# print("Model initialized!")
 @spaces.GPU
 def generate(model_name, image, text):
     import torch
     torch.jit.script = lambda f: f
     from t2v_metrics import VQAScore, list_all_vqascore_models
-    print(list_all_vqascore_models())
-    # print("Model_name:", model_name)
     print("Image:", image)
     print("Text:", text)
-    model_pipe = VQAScore(model="clip-flant5-xl")  # our recommended scoring model
-    # print("Model initialized, now moving to cuda")
-    model_pipe.to("cuda")
     print("Generating!")
-    # with torch.autocast(device_type='cuda'):
-    # with torch.autocast(device_type='cuda', dtype=torch.float):
     result = model_pipe(images=[image], texts=[text])
     return result
 iface = gr.Interface(
     fn=generate,  # function to call
     inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="filepath"), gr.Textbox(label="Prompt")],  # define the types of inputs
-    # inputs=[gr.Image(type="filepath"), gr.Textbox(label="Prompt")],  # define the types of inputs
     outputs="number",  # define the type of output
     title="VQAScore",  # title of the app
     description="This model evaluates the similarity between an image and a text prompt."

 import gradio as gr
 import spaces
+# Initialize the model only once, outside of any function
+# Ensure that CUDA initialization happens within the worker process
+model_pipe = None
 @spaces.GPU
 def generate(model_name, image, text):
+    global model_pipe
     import torch
     torch.jit.script = lambda f: f
     from t2v_metrics import VQAScore, list_all_vqascore_models
+    if model_pipe is None:
+        print("Initializing model...")
+        model_pipe = VQAScore(model="clip-flant5-xl", device="cuda")  # our recommended scoring model
+        # model_pipe.to("cuda")
+    print(list_all_vqascore_models())
     print("Image:", image)
     print("Text:", text)
     print("Generating!")
     result = model_pipe(images=[image], texts=[text])
     return result
 iface = gr.Interface(
     fn=generate,  # function to call
     inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="filepath"), gr.Textbox(label="Prompt")],  # define the types of inputs
     outputs="number",  # define the type of output
     title="VQAScore",  # title of the app
     description="This model evaluates the similarity between an image and a text prompt."