Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -30,7 +30,6 @@ def resolve_model(chosen, custom):
|
|
30 |
return chosen
|
31 |
|
32 |
# --- Main inference function ---
|
33 |
-
# If you are using ZeroGPU on Hugging Face Spaces, make sure to set the environment variable USE_GPU=1.
|
34 |
# The @spaces.GPU() decorator ensures that heavy inference runs on GPU in a ZeroGPU Space.
|
35 |
@spaces.GPU()
|
36 |
def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, model2_choice, model2_custom):
|
@@ -42,18 +41,15 @@ def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, mo
|
|
42 |
device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
|
43 |
|
44 |
# Create pipelines for image-to-text.
|
45 |
-
#
|
46 |
-
# We use the "image-to-text" task here so that the prompt is taken into account.
|
47 |
pipe1 = pipeline("image-to-text", model=model1_name, device=device)
|
48 |
pipe2 = pipeline("image-to-text", model=model2_name, device=device)
|
49 |
|
50 |
# Run inference on the image with the provided prompt.
|
51 |
-
# Depending on the model, the call signature may vary; here we assume a simple call with (image, prompt).
|
52 |
output1 = pipe1(image, prompt)
|
53 |
output2 = pipe2(image, prompt)
|
54 |
|
55 |
# Extract the generated text.
|
56 |
-
# (Many pipelines return a list of dicts with key 'generated_text'; if not, we simply convert the output to a string.)
|
57 |
def extract_text(output):
|
58 |
if isinstance(output, list) and len(output) > 0 and isinstance(output[0], dict) and "generated_text" in output[0]:
|
59 |
return output[0]["generated_text"]
|
@@ -70,7 +66,6 @@ def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, mo
|
|
70 |
return chat1, chat2
|
71 |
|
72 |
# --- Build the Gradio interface ---
|
73 |
-
# Pre-populated sample prompt.
|
74 |
sample_prompt = "Describe the image in explicit detail. Return a nested JSON object in response."
|
75 |
|
76 |
with gr.Blocks(title="Image Text-to-Text Comparison Tool") as demo:
|
|
|
30 |
return chosen
|
31 |
|
32 |
# --- Main inference function ---
|
|
|
33 |
# The @spaces.GPU() decorator ensures that heavy inference runs on GPU in a ZeroGPU Space.
|
34 |
@spaces.GPU()
|
35 |
def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, model2_choice, model2_custom):
|
|
|
41 |
device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
|
42 |
|
43 |
# Create pipelines for image-to-text.
|
44 |
+
# These models should support a call signature of (image, prompt)
|
|
|
45 |
pipe1 = pipeline("image-to-text", model=model1_name, device=device)
|
46 |
pipe2 = pipeline("image-to-text", model=model2_name, device=device)
|
47 |
|
48 |
# Run inference on the image with the provided prompt.
|
|
|
49 |
output1 = pipe1(image, prompt)
|
50 |
output2 = pipe2(image, prompt)
|
51 |
|
52 |
# Extract the generated text.
|
|
|
53 |
def extract_text(output):
|
54 |
if isinstance(output, list) and len(output) > 0 and isinstance(output[0], dict) and "generated_text" in output[0]:
|
55 |
return output[0]["generated_text"]
|
|
|
66 |
return chat1, chat2
|
67 |
|
68 |
# --- Build the Gradio interface ---
|
|
|
69 |
sample_prompt = "Describe the image in explicit detail. Return a nested JSON object in response."
|
70 |
|
71 |
with gr.Blocks(title="Image Text-to-Text Comparison Tool") as demo:
|