Spaces:

martinsinnona
/

visdecode

Sleeping

martinsinnona commited on Jun 27, 2024

Commit

e8ba5e8

1 Parent(s): 7b0ea0f

a

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,12 +14,14 @@ processor.image_processor.is_vqa = False
 model = Pix2StructForConditionalGeneration.from_pretrained("martinsinnona/visdecode_B").to(device)
 model.eval()
-def generate_caption(image):
-    inputs = processor(images=image, return_tensors="pt", max_patches=1024).to(device)
-    generated_ids = model.generate(flattened_patches=inputs.flattened_patches, attention_mask=inputs.attention_mask, max_length=600)
-    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
     # Generate the Vega image
     vega = string_to_vega(generated_caption)
     vega_image = draw_vega(vega)
@@ -51,7 +53,7 @@ def string_to_vega(string):
 # Create the Gradio interface
 iface = gr.Interface(
-    fn=generate_caption,
     inputs=gr.Image(type="pil"),
     outputs=[gr.Textbox(), gr.Image(type="pil")],
     title="Image to Vega-Lite",
@@ -60,4 +62,4 @@ iface = gr.Interface(
 # Launch the interface
 if __name__ == "__main__":
-    iface.launch(share=True)

 model = Pix2StructForConditionalGeneration.from_pretrained("martinsinnona/visdecode_B").to(device)
 model.eval()
+def generate(image):
+    #inputs = processor(images=image, return_tensors="pt", max_patches=1024).to(device)
+    #generated_ids = model.generate(flattened_patches=inputs.flattened_patches, attention_mask=inputs.attention_mask, max_length=600)
+    #generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    generated_caption = "{'mark': 'bar', 'encoding': {'x': {'field': '', 'type': 'ordinal'}, 'y': {'field': '', 'type': 'quantitative'}}, 'data': {'values': [{'x': 0, 'y': 5.6}, {'x': 1, 'y': 6.7}, {'x': 2, 'y': 5.0}, {'x': 3, 'y': 18.7}]}}"
     # Generate the Vega image
     vega = string_to_vega(generated_caption)
     vega_image = draw_vega(vega)
 # Create the Gradio interface
 iface = gr.Interface(
+    fn=generate,
     inputs=gr.Image(type="pil"),
     outputs=[gr.Textbox(), gr.Image(type="pil")],
     title="Image to Vega-Lite",
 # Launch the interface
 if __name__ == "__main__":
+    iface.launch()