paligemma

Runtime error

App Files Files Community

Matthias Minderer commited on May 14, 2024

Commit

dea4744

1 Parent(s): 5d6f895

Initial commit.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +42 -6
app.py +251 -0
examples/barsik.jpg +0 -0
examples/barsik.json +7 -0
examples/biennale.jpg +0 -0
examples/biennale.json +7 -0
examples/billard1.jpg +0 -0
examples/billard1.json +7 -0
examples/billard2.jpg +0 -0
examples/billard2.json +7 -0
examples/bowie.jpg +0 -0
examples/bowie.json +7 -0
examples/branch.jpg +0 -0
examples/branch.json +7 -0
examples/cc_fox.jpg +0 -0
examples/cc_fox.json +7 -0
examples/cc_landscape.jpg +0 -0
examples/cc_landscape.json +7 -0
examples/cc_puffin.jpg +0 -0
examples/cc_puffin.json +7 -0
examples/couch.jpg +0 -0
examples/couch.json +7 -0
examples/couch_.json +7 -0
examples/cups.jpg +0 -0
examples/cups.json +7 -0
examples/dice.jpg +0 -0
examples/dice.json +7 -0
examples/emu.jpg +0 -0
examples/emu.json +7 -0
examples/fridge.jpg +0 -0
examples/fridge.json +7 -0
examples/givt.jpg +0 -0
examples/givt.json +7 -0
examples/greenlake.jpg +0 -0
examples/greenlake.json +7 -0
examples/howto.jpg +0 -0
examples/howto.json +7 -0
examples/markers.jpg +0 -0
examples/markers.json +7 -0
examples/mcair.jpg +0 -0
examples/mcair.json +7 -0
examples/mcair_.json +7 -0
examples/minergie.jpg +0 -0
examples/minergie.json +7 -0
examples/morel.jpg +0 -0
examples/morel.json +7 -0
examples/motorcyclists.jpg +0 -0
examples/motorcyclists.json +7 -0
examples/parking.jpg +0 -0
examples/parking.json +7 -0

README.md CHANGED Viewed

@@ -1,13 +1,49 @@
 ---
-title: Paligemma
-emoji: 🌍
 colorFrom: green
-colorTo: gray
 sdk: gradio
-sdk_version: 4.31.2
 app_file: app.py
 pinned: false
-license: gemma
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: PaliGemma Demo
+emoji: 🤲
 colorFrom: green
+colorTo: yellow
 sdk: gradio
+sdk_version: 4.22.0
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
+# PaliGemma Demo
+See [Blogpost] and [`big_vision README.md`] for details about the model.
+[Blogpost]: https://huggingface.co/blog/paligemma
+[`big_vision README.md`]: https://github.com/google-research/big_vision/blob/main/big_vision/configs/proj/paligemma/README.md
+## Development
+Local testing (CPU, Python 3.12):
+```bash
+pip -m venv env
+. env/bin/activate
+pip install -qr requirements-cpu.txt
+python app.py
+```
+Environment variables:
+- `MOCK_MODEL=yes`: For quick UI testing.
+- `RAM_CACHE_GB=18`: Enables caching of 3 bf16 models in memory: a single bf16
+  model is about 5860 MB. Use with care on spaces with little RAM. For example,
+  on a `A10G large` space you can cache five models in RAM, so you would set
+  `RAM_CACHE_GB=30`.
+- `HOST_COLOCATION=4`: If host RAM/disk is shared between 4 processes (e.g. the
+  Huggingface `A10 large` Spaces).
+Loading models:
+- The set of models loaded is defined in `./models.py`.
+- You must first acknowledge usage conditions to access models.
+- When testing locally, you'll have to run `huggingface_cli login`.
+- When running in a Huggingface Space, you'll have to set a `HF_TOKEN` secret.

app.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""PaliGemma demo gradio app."""
+import datetime
+import functools
+import glob
+import json
+import logging
+import os
+import time
+import gradio as gr
+import jax
+import PIL.Image
+import gradio_helpers
+import models
+import paligemma_parse
+INTRO_TEXT = """🤲 PaliGemma demo\n\n
+| [GitHub](https://github.com/google-research/big_vision/blob/main/big_vision/configs/proj/paligemma/README.md)
+| [HF blog post](https://huggingface.co/blog/paligemma)
+| [Google blog post](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024)
+| [Vertex AI Model Garden](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/363)
+| [Demo](https://huggingface.co/spaces/google/paligemma)
+|\n\n
+[PaliGemma](https://ai.google.dev/gemma/docs/paligemma) is an open vision-language model by Google,
+inspired by [PaLI-3](https://arxiv.org/abs/2310.09199) and
+built with open components such as the [SigLIP](https://arxiv.org/abs/2303.15343)
+vision model and the [Gemma](https://arxiv.org/abs/2403.08295) language model. PaliGemma is designed as a versatile
+model for transfer to a wide range of vision-language tasks such as image and short video caption, visual question
+answering, text reading, object detection and object segmentation.
+\n\n
+This space includes models fine-tuned on a mix of downstream tasks.
+See the [blog post](https://huggingface.co/blog/paligemma) and
+[README](https://github.com/google-research/big_vision/blob/main/big_vision/configs/proj/paligemma/README.md)
+for detailed information how to use and fine-tune PaliGemma models.
+\n\n
+**This is an experimental research model.** Make sure to add appropriate guardrails when using the model for applications.
+"""
+make_image = lambda value, visible: gr.Image(
+    value, label='Image', type='filepath', visible=visible)
+make_annotated_image = functools.partial(gr.AnnotatedImage, label='Image')
+make_highlighted_text = functools.partial(gr.HighlightedText, label='Output')
+# https://coolors.co/4285f4-db4437-f4b400-0f9d58-e48ef1
+COLORS = ['#4285f4', '#db4437', '#f4b400', '#0f9d58', '#e48ef1']
+@gradio_helpers.synced
+def compute(image, prompt, model_name, sampler):
+  """Runs model inference."""
+  if image is None:
+    raise gr.Error('Image required')
+  logging.info('prompt="%s"', prompt)
+  if isinstance(image, str):
+    image = PIL.Image.open(image)
+  if gradio_helpers.should_mock():
+    logging.warning('Mocking response')
+    time.sleep(2.)
+    output = paligemma_parse.EXAMPLE_STRING
+  else:
+    if not model_name:
+      raise gr.Error('Models not loaded yet')
+    output = models.generate(model_name, sampler, image, prompt)
+    logging.info('output="%s"', output)
+  width, height = image.size
+  objs = paligemma_parse.extract_objs(output, width, height, unique_labels=True)
+  labels = set(obj.get('name') for obj in objs if obj.get('name'))
+  color_map = {l: COLORS[i % len(COLORS)] for i, l in enumerate(labels)}
+  highlighted_text = [(obj['content'], obj.get('name')) for obj in objs]
+  annotated_image = (
+      image,
+      [
+          (
+              obj['mask'] if obj.get('mask') is not None else obj['xyxy'],
+              obj['name'] or '',
+          )
+          for obj in objs
+          if 'mask' in obj or 'xyxy' in obj
+      ],
+  )
+  has_annotations = bool(annotated_image[1])
+  return (
+      make_highlighted_text(
+          highlighted_text, visible=True, color_map=color_map),
+      make_image(image, visible=not has_annotations),
+      make_annotated_image(
+          annotated_image, visible=has_annotations, width=width, height=height,
+          color_map=color_map),
+  )
+def warmup(model_name):
+  image = PIL.Image.new('RGB', [1, 1])
+  _ = compute(image, '', model_name, 'greedy')
+def reset():
+  return (
+      '', make_highlighted_text('', visible=False),
+      make_image(None, visible=True), make_annotated_image(None, visible=False),
+  )
+def create_app():
+  """Creates demo UI."""
+  make_model = lambda choices: gr.Dropdown(
+      value=(choices + [''])[0],
+      choices=choices,
+      label='Model',
+      visible=bool(choices),
+  )
+  make_prompt = lambda value, visible=True: gr.Textbox(
+      value, label='Prompt', visible=visible)
+  with gr.Blocks() as demo:
+    ##### Main UI structure.
+    gr.Markdown(INTRO_TEXT)
+    with gr.Row():
+      image = make_image(None, visible=True)  # input
+      annotated_image = make_annotated_image(None, visible=False)  # output
+      with gr.Column():
+        with gr.Row():
+          prompt = make_prompt('', visible=True)
+        model_info = gr.Markdown(label='Model Info')
+        with gr.Row():
+          model = make_model([])
+          samplers = [
+              'greedy', 'nucleus(0.1)', 'nucleus(0.3)', 'temperature(0.5)']
+          sampler = gr.Dropdown(
+              value=samplers[0], choices=samplers, label='Decoding'
+          )
+        with gr.Row():
+          run = gr.Button('Run', variant='primary')
+          clear = gr.Button('Clear')
+        highlighted_text = make_highlighted_text('', visible=False)
+    ##### UI logic.
+    def update_ui(model, prompt):
+      prompt = make_prompt(prompt, visible=True)
+      model_info = f'Model `{model}` – {models.MODELS_INFO.get(model, "No info.")}'
+      return [prompt, model_info]
+    gr.on(
+        [model.change],
+        update_ui,
+        [model, prompt],
+        [prompt, model_info],
+    )
+    gr.on(
+        [run.click, prompt.submit],
+        compute,
+        [image, prompt, model, sampler],
+        [highlighted_text, image, annotated_image],
+    )
+    clear.click(
+        reset, None, [prompt, highlighted_text, image, annotated_image]
+    )
+    ##### Examples.
+    gr.set_static_paths(['examples/'])
+    all_examples = [json.load(open(p)) for p in glob.glob('examples/*.json')]
+    logging.info('loaded %d examples', len(all_examples))
+    example_image = gr.Image(
+        label='Image', visible=False)  # proxy, never visible
+    example_model = gr.Text(
+        label='Model', visible=False)  # proxy, never visible
+    example_prompt = gr.Text(
+        label='Prompt', visible=False)  # proxy, never visible
+    example_license = gr.Markdown(
+        label='Image License', visible=False)  # placeholder, never visible
+    gr.Examples(
+        examples=[
+            [
+                f'examples/{ex["name"]}.jpg',
+                ex['prompt'],
+                ex['model'],
+                ex['license'],
+            ]
+            for ex in all_examples
+            if ex['model'] in models.MODELS
+        ],
+        inputs=[example_image, example_prompt, example_model, example_license],
+    )
+    ##### Examples UI logic.
+    example_image.change(
+        lambda image_path: (
+            make_image(image_path, visible=True),
+            make_annotated_image(None, visible=False),
+            make_highlighted_text('', visible=False),
+        ),
+        example_image,
+        [image, annotated_image, highlighted_text],
+    )
+    def example_model_changed(model):
+      if model not in gradio_helpers.get_paths():
+        raise gr.Error(f'Model "{model}" not loaded!')
+      return model
+    example_model.change(example_model_changed, example_model, model)
+    example_prompt.change(make_prompt, example_prompt, prompt)
+    ##### Status.
+    status = gr.Markdown(f'Startup: {datetime.datetime.now()}')
+    gpu_kind = gr.Markdown(f'GPU=?')
+    demo.load(
+        lambda: [
+            gradio_helpers.get_status(),
+            make_model(list(gradio_helpers.get_paths())),
+        ],
+        None,
+        [status, model],
+    )
+    def get_gpu_kind():
+      device = jax.devices()[0]
+      if not gradio_helpers.should_mock() and device.platform != 'gpu':
+        raise gr.Error('GPU not visible to JAX!')
+      return f'GPU={device.device_kind}'
+    demo.load(get_gpu_kind, None, gpu_kind)
+  return demo
+if __name__ == '__main__':
+  logging.basicConfig(level=logging.INFO,
+                      format='%(asctime)s - %(levelname)s - %(message)s')
+  logging.info('JAX devices: %s', jax.devices())
+  for k, v in os.environ.items():
+    logging.info('environ["%s"] = %r', k, v)
+  gradio_helpers.set_warmup_function(warmup)
+  for name, (repo, filename, revision) in models.MODELS.items():
+    gradio_helpers.register_download(name, repo, filename, revision)
+  create_app().queue().launch()

examples/barsik.jpg ADDED Viewed

examples/barsik.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "barsik",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "segment cat",
+  "license": "CC0 by [maximneumann@](https://github.com/maximneumann)"
+}

examples/biennale.jpg ADDED Viewed

examples/biennale.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "biennale",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "In which city is this?",
+  "license": "CC0 by [andsteing@](https://huggingface.co/andsteing)"
+}

examples/billard1.jpg ADDED Viewed

examples/billard1.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "billard1",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many red balls are there?",
+  "license": "CC0 by [mbosnjak@](https://github.com/mbosnjak)"
+}

examples/billard2.jpg ADDED Viewed

examples/billard2.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "billard2",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many balls are there?",
+  "license": "CC0 by [mbosnjak@](https://github.com/mbosnjak)"
+}

examples/bowie.jpg ADDED Viewed

examples/bowie.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "bowie",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Who is this?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/branch.jpg ADDED Viewed

examples/branch.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "branch",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What caused this?",
+  "license": "CC0 by [andsteing@](https://huggingface.co/andsteing)"
+}

examples/cc_fox.jpg ADDED Viewed

examples/cc_fox.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "cc_fox",
+  "comment": "",
+  "model": "paligemma-3b-mix-448",
+  "prompt": "Which breed is this fox?",
+  "license": "CC0 by [XiaohuaZhai@](https://sites.google.com/view/xzhai)"
+}

examples/cc_landscape.jpg ADDED Viewed

examples/cc_landscape.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "cc_landscape",
+  "comment": "",
+  "model": "paligemma-3b-mix-448",
+  "prompt": "What does the image show?",
+  "license": "CC0 by [XiaohuaZhai@](https://sites.google.com/view/xzhai)"
+}

examples/cc_puffin.jpg ADDED Viewed

examples/cc_puffin.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "cc_puffin",
+  "comment": "",
+  "model": "paligemma-3b-mix-448",
+  "prompt": "detect puffin in the back ; puffin in front",
+  "license": "CC0 by [XiaohuaZhai@](https://sites.google.com/view/xzhai)"
+}

examples/couch.jpg ADDED Viewed

examples/couch.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "couch",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many yellow cushions are on the couch?",
+  "license": "CC0"
+}

examples/couch_.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "couch",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many painting do you see in the image?",
+  "license": "CC0"
+}

examples/cups.jpg ADDED Viewed

examples/cups.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "cups",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "how many cups?",
+  "license": "CC0 by [mbosnjak@](https://github.com/mbosnjak)"
+}

examples/dice.jpg ADDED Viewed

examples/dice.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "dice",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "segment dice ; dice",
+  "license": "CC0 by [andresusanopinto@](https://github.com/andresusanopinto)"
+}

examples/emu.jpg ADDED Viewed

examples/emu.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "emu",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What animal is this?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/fridge.jpg ADDED Viewed

examples/fridge.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "fridge",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Describe the image.",
+  "license": "CC0 by [andresusanopinto@](https://github.com/andresusanopinto)"
+}

examples/givt.jpg ADDED Viewed

examples/givt.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "givt",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What does the image show?",
+  "license": "CC-BY [GIVT paper](https://arxiv.org/abs/2312.02116)"
+}

examples/greenlake.jpg ADDED Viewed

examples/greenlake.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "greenlake",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Describe the image.",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/howto.jpg ADDED Viewed

examples/howto.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "howto",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What does this image show?",
+  "license": "CC-BY [How to train your ViT?](https://arxiv.org/abs/2106.10270)"
+}

examples/markers.jpg ADDED Viewed

examples/markers.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "markers",
+  "comment": "answer en How many cups are there?",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many cups are there?",
+  "license": "CC0"
+}

examples/mcair.jpg ADDED Viewed

examples/mcair.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "mcair",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Can you board this airplane?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/mcair_.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "mcair",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Is this a restaurant?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/minergie.jpg ADDED Viewed

examples/minergie.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "minergie",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "ocr",
+  "license": "CC0 by [andsteing@](https://huggingface.co/andsteing)"
+}

examples/morel.jpg ADDED Viewed

examples/morel.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "morel",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "detect morel",
+  "license": "CC0 by [andsteing@](https://huggingface.co/andsteing)"
+}

examples/motorcyclists.jpg ADDED Viewed

examples/motorcyclists.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "motorcyclists",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What does the image show?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/parking.jpg ADDED Viewed

examples/parking.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "parking",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Describe the image.",
+  "license": "CC0 by [xiaohuazhai@](https://huggingface.co/xiaohuazhai)"
+}