Spaces:

remyxai
/

VQASynth

Running on Zero

App Files Files Community

smellslikeml commited on Aug 3, 2024

Commit

22fc8c6

1 Parent(s): 013eddf

Add application file

Browse files

Files changed (2) hide show

app.py +110 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+"""SpaceLlama3.1 demo gradio app."""
+"""SpaceLlama3.1 demo gradio app."""
+import datetime
+import logging
+import os
+import gradio as gr
+import requests
+import torch
+import PIL.Image
+from prismatic import load
+INTRO_TEXT = """SpaceLlama3.1 demo\n\n
+| [Model](https://huggingface.co/remyxai/SpaceLlama3.1)
+| [GitHub](https://github.com/remyxai/VQASynth/tree/main)
+| [Demo](https://huggingface.co/spaces/remyxai/SpaceLlama3.1)
+| [Discord](https://discord.gg/DAy3P5wYJk)
+\n\n
+**This is an experimental research model.** Make sure to add appropriate guardrails when using the model for applications.
+"""
+def compute(image, prompt, model_location):
+    """Runs model inference."""
+    if image is None:
+        raise gr.Error("Image required")
+    logging.info('prompt="%s"', prompt)
+    # Open the image file
+    if isinstance(image, str):
+        image = PIL.Image.open(image).convert("RGB")
+    # Set device and load the model
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    vlm = load(model_location)
+    vlm.to(device, dtype=torch.bfloat16)
+    # Prepare prompt
+    prompt_builder = vlm.get_prompt_builder()
+    prompt_builder.add_turn(role="human", message=prompt)
+    prompt_text = prompt_builder.get_prompt()
+    # Generate the text based on image and prompt
+    generated_text = vlm.generate(
+        image,
+        prompt_text,
+        do_sample=True,
+        temperature=0.1,
+        max_new_tokens=512,
+        min_length=1,
+    )
+    output = generated_text.split("</s>")[0]
+    logging.info('output="%s"', output)
+    return output
+def reset():
+    """Resets the input fields."""
+    return "", None
+def create_app():
+    """Creates demo UI."""
+    with gr.Blocks() as demo:
+        # Main UI structure
+        gr.Markdown(INTRO_TEXT)
+        with gr.Row():
+            image = gr.Image(value=None, label="Image", type="filepath", visible=True)  # input
+            with gr.Column():
+                prompt = gr.Textbox(value="", label="Prompt", visible=True)
+                model_info = gr.Markdown(label="Model Info")
+                run = gr.Button("Run", variant="primary")
+                clear = gr.Button("Clear")
+                highlighted_text = gr.HighlightedText(value="", label="Output", visible=True)
+        # Model location
+        model_location = "remyxai/SpaceLlama3.1"  # Update as needed
+        # Button event handlers
+        run.click(
+            compute,
+            [image, prompt, model_location],
+            highlighted_text,
+        )
+        clear.click(reset, None, [prompt, image])
+        # Status
+        status = gr.Markdown(f"Startup: {datetime.datetime.now()}")
+        gpu_kind = gr.Markdown(f"GPU=?")
+        demo.load(
+            lambda: [f"Model `{model_location}` loaded."],
+            None,
+            model_info,
+        )
+    return demo
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+    for k, v in os.environ.items():
+        logging.info('environ["%s"] = %r', k, v)
+    create_app().queue().launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+Pillow
+torch
+requests
+git+https://github.com/remyxai/prismatic-vlms.git