# import gradio as gr # import spaces # # Initialize the model only once, outside of any function # # Ensure that CUDA initialization happens within the worker process # model_pipe = None # @spaces.GPU # def generate(model_name, image, text): # global model_pipe # import torch # torch.jit.script = lambda f: f # from t2v_metrics import VQAScore, list_all_vqascore_models # if model_pipe is None: # print("Initializing model...") # model_pipe = VQAScore(model="clip-flant5-xl", device="cuda") # our recommended scoring model # # model_pipe.to("cuda") # print(list_all_vqascore_models()) # print("Image:", image) # print("Text:", text) # print("Generating!") # result = model_pipe(images=[image], texts=[text]) # return result import gradio as gr import spaces import os # Global model variable, but do not initialize or move to CUDA here model_pipe = None @spaces.GPU def generate(model_name, image, text): global model_pipe # Debugging lines to trace CUDA initialization import torch print(f"PID: {os.getpid()}") print(f"Before import: CUDA available: {torch.cuda.is_available()}") torch.jit.script = lambda f: f # Avoid script error in lambda from t2v_metrics import VQAScore, list_all_vqascore_models print(f"After import: CUDA available: {torch.cuda.is_available()}") # Worker Process: Perform all GPU-related initializations here if model_pipe is None: print("Initializing model in PID:", os.getpid()) model_pipe = VQAScore(model="clip-flant5-xl", device="cuda") # our recommended scoring model print(f"Model initialized: CUDA available: {torch.cuda.is_available()}") print(list_all_vqascore_models()) # Debug: List available models print("Image:", image) # Debug: Print image path print("Text:", text) # Debug: Print text input print("Generating!") # Wrap the model call in a try-except block to capture and debug CUDA errors try: result = model_pipe(images=[image], texts=[text]) # Perform the model inference except RuntimeError as e: print(f"RuntimeError during model inference: {e}") raise e return result # Return the result iface = gr.Interface( fn=generate, # function to call inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="filepath"), gr.Textbox(label="Prompt")], # define the types of inputs outputs="number", # define the type of output title="VQAScore", # title of the app description="This model evaluates the similarity between an image and a text prompt." ).launch()