sflindrs commited on
Commit
6133d17
·
verified ·
1 Parent(s): d404fc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -6
app.py CHANGED
@@ -30,7 +30,6 @@ def resolve_model(chosen, custom):
30
  return chosen
31
 
32
  # --- Main inference function ---
33
- # If you are using ZeroGPU on Hugging Face Spaces, make sure to set the environment variable USE_GPU=1.
34
  # The @spaces.GPU() decorator ensures that heavy inference runs on GPU in a ZeroGPU Space.
35
  @spaces.GPU()
36
  def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, model2_choice, model2_custom):
@@ -42,18 +41,15 @@ def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, mo
42
  device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
43
 
44
  # Create pipelines for image-to-text.
45
- # Note: Many instruction-following image models (e.g. BLIP2) accept a text prompt along with an image.
46
- # We use the "image-to-text" task here so that the prompt is taken into account.
47
  pipe1 = pipeline("image-to-text", model=model1_name, device=device)
48
  pipe2 = pipeline("image-to-text", model=model2_name, device=device)
49
 
50
  # Run inference on the image with the provided prompt.
51
- # Depending on the model, the call signature may vary; here we assume a simple call with (image, prompt).
52
  output1 = pipe1(image, prompt)
53
  output2 = pipe2(image, prompt)
54
 
55
  # Extract the generated text.
56
- # (Many pipelines return a list of dicts with key 'generated_text'; if not, we simply convert the output to a string.)
57
  def extract_text(output):
58
  if isinstance(output, list) and len(output) > 0 and isinstance(output[0], dict) and "generated_text" in output[0]:
59
  return output[0]["generated_text"]
@@ -70,7 +66,6 @@ def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, mo
70
  return chat1, chat2
71
 
72
  # --- Build the Gradio interface ---
73
- # Pre-populated sample prompt.
74
  sample_prompt = "Describe the image in explicit detail. Return a nested JSON object in response."
75
 
76
  with gr.Blocks(title="Image Text-to-Text Comparison Tool") as demo:
 
30
  return chosen
31
 
32
  # --- Main inference function ---
 
33
  # The @spaces.GPU() decorator ensures that heavy inference runs on GPU in a ZeroGPU Space.
34
  @spaces.GPU()
35
  def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, model2_choice, model2_custom):
 
41
  device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
42
 
43
  # Create pipelines for image-to-text.
44
+ # These models should support a call signature of (image, prompt)
 
45
  pipe1 = pipeline("image-to-text", model=model1_name, device=device)
46
  pipe2 = pipeline("image-to-text", model=model2_name, device=device)
47
 
48
  # Run inference on the image with the provided prompt.
 
49
  output1 = pipe1(image, prompt)
50
  output2 = pipe2(image, prompt)
51
 
52
  # Extract the generated text.
 
53
  def extract_text(output):
54
  if isinstance(output, list) and len(output) > 0 and isinstance(output[0], dict) and "generated_text" in output[0]:
55
  return output[0]["generated_text"]
 
66
  return chat1, chat2
67
 
68
  # --- Build the Gradio interface ---
 
69
  sample_prompt = "Describe the image in explicit detail. Return a nested JSON object in response."
70
 
71
  with gr.Blocks(title="Image Text-to-Text Comparison Tool") as demo: