zhiqiulin commited on
Commit
ef0de87
·
verified ·
1 Parent(s): 68d8a12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -1,39 +1,34 @@
1
  import gradio as gr
2
  import spaces
3
 
4
-
5
- # torch.autocast = lambda device_type, dtype: torch.autocast(device_type, torch.float)
6
-
7
- # Initialize the model only once
8
- # if torch.cuda.is_available():
9
- # model_pipe = VQAScore(model="clip-flant5-xl", device="cpu") # our recommended scoring model
10
- # print("Model initialized!")
11
 
12
  @spaces.GPU
13
  def generate(model_name, image, text):
 
14
  import torch
15
  torch.jit.script = lambda f: f
16
 
17
  from t2v_metrics import VQAScore, list_all_vqascore_models
18
 
19
- print(list_all_vqascore_models())
 
 
 
20
 
21
- # print("Model_name:", model_name)
22
  print("Image:", image)
23
  print("Text:", text)
24
- model_pipe = VQAScore(model="clip-flant5-xl") # our recommended scoring model
25
- # print("Model initialized, now moving to cuda")
26
- model_pipe.to("cuda")
27
  print("Generating!")
28
- # with torch.autocast(device_type='cuda'):
29
- # with torch.autocast(device_type='cuda', dtype=torch.float):
30
  result = model_pipe(images=[image], texts=[text])
31
  return result
32
 
33
  iface = gr.Interface(
34
  fn=generate, # function to call
35
  inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="filepath"), gr.Textbox(label="Prompt")], # define the types of inputs
36
- # inputs=[gr.Image(type="filepath"), gr.Textbox(label="Prompt")], # define the types of inputs
37
  outputs="number", # define the type of output
38
  title="VQAScore", # title of the app
39
  description="This model evaluates the similarity between an image and a text prompt."
 
1
  import gradio as gr
2
  import spaces
3
 
4
+ # Initialize the model only once, outside of any function
5
+ # Ensure that CUDA initialization happens within the worker process
6
+ model_pipe = None
 
 
 
 
7
 
8
  @spaces.GPU
9
  def generate(model_name, image, text):
10
+ global model_pipe
11
  import torch
12
  torch.jit.script = lambda f: f
13
 
14
  from t2v_metrics import VQAScore, list_all_vqascore_models
15
 
16
+ if model_pipe is None:
17
+ print("Initializing model...")
18
+ model_pipe = VQAScore(model="clip-flant5-xl", device="cuda") # our recommended scoring model
19
+ # model_pipe.to("cuda")
20
 
21
+ print(list_all_vqascore_models())
22
  print("Image:", image)
23
  print("Text:", text)
24
+
 
 
25
  print("Generating!")
 
 
26
  result = model_pipe(images=[image], texts=[text])
27
  return result
28
 
29
  iface = gr.Interface(
30
  fn=generate, # function to call
31
  inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="filepath"), gr.Textbox(label="Prompt")], # define the types of inputs
 
32
  outputs="number", # define the type of output
33
  title="VQAScore", # title of the app
34
  description="This model evaluates the similarity between an image and a text prompt."