Spaces:

selfDotOsman
/

BobVLM-demo

Sleeping

App Files Files Community

selfDotOsman commited on Feb 12

Commit

3a40bac

verified ·

1 Parent(s): 399be4f

Create app.py

Browse files

Files changed (1) hide show

app.py +94 -0

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import gradio as gr
+from BobVLM import BobVLMProcessor, load_model, pipeline
+import torch
+# Load model and processor
+model = load_model()
+processor = BobVLMProcessor()
+# Create pipeline
+pipe = pipeline(model, processor)
+def analyze_image(image):
+    """Process the image and return BobVLM's analysis."""
+    response = pipe(
+        chat=[
+            {"role": "system", "content": "You are an image understanding assistant. You can see and interpret images in fine detail. Provide clear, engaging descriptions that highlight the key elements and atmosphere of the image."},
+            {"role": "user", "content": "Describe the image"},
+        ],
+        images=image
+    )
+    return response[0] if response else "I couldn't analyze this image."
+# Create the Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="indigo",
+    neutral_hue="slate",
+)) as demo:
+    gr.Markdown(
+        """
+        # 🤖 BobVLM Image Analyzer
+        Upload an image and let BobVLM describe what it sees. BobVLM combines CLIP's vision capabilities
+        with LLaMA's language understanding to provide detailed, natural descriptions of images.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(
+                label="Upload Image",
+                type="pil",
+                height=400,
+            )
+            analyze_btn = gr.Button(
+                "🔍 Analyze Image",
+                variant="primary",
+                size="lg",
+            )
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(
+                label="BobVLM's Analysis",
+                placeholder="Analysis will appear here...",
+                lines=16,
+                show_copy_button=True,
+            )
+    # Add examples
+    gr.Examples(
+        examples=[
+            ["path/to/example1.jpg"],
+            ["path/to/example2.jpg"],
+        ],
+        inputs=input_image,
+        outputs=output_text,
+        fn=analyze_image,
+        cache_examples=True,
+    )
+    # Set up the click event
+    analyze_btn.click(
+        fn=analyze_image,
+        inputs=input_image,
+        outputs=output_text,
+    )
+    gr.Markdown(
+        """
+        ### About BobVLM
+        BobVLM is a Vision Language Model that combines CLIP's visual understanding with LLaMA's language capabilities.
+        It uses a specialized adapter layer to bridge the gap between vision and language, enabling detailed and natural
+        image descriptions.
+        [View on GitHub](https://github.com/yourusername/BobVLM) | [Hugging Face Model](https://huggingface.co/selfDotOsman/BobVLM-1.5b)
+        """
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        share=True,
+        enable_queue=True,
+        show_error=True,
+    )