Spaces:

linm1
/

vlm

Sleeping

App Files Files Community

linm1 commited on Oct 3, 2024

Commit

1e3ede4

verified ·

1 Parent(s): 1a7b044

Upload 2 files

Browse files

Files changed (2) hide show

app.py +163 -0
requirements.txt +15 -0

app.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import gradio as gr
+import os
+from dotenv import load_dotenv
+import base64
+from io import BytesIO
+from mistralai import Mistral
+from pydantic import BaseModel, Field
+from datasets import load_dataset
+from PIL import Image
+import json
+import sqlite3
+from datetime import datetime
+# Load the dataset
+ds = load_dataset("svjack/pokemon-blip-captions-en-zh")
+ds = ds["train"]
+# Load environment variables
+api_key = os.environ.get('MISTRAL_API_KEY')
+if not api_key:
+    raise ValueError("MISTRAL_API_KEY is not set in the environment variables.")
+# Create sample history
+hist = [str({"en": ds[i]["en_text"], "zh": ds[i]["zh_text"]}) for i in range(8)]
+hist_str = "\n".join(hist)
+# Define the Caption model
+class Caption(BaseModel):
+    en: str = Field(...,
+        description="English caption of image",
+        max_length=84)
+    zh: str = Field(...,
+        description="Chinese caption of image",
+        max_length=64)
+# Initialize the Mistral client
+client = Mistral(api_key=api_key)
+def generate_caption(image):
+    # Convert image to base64
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
+    messages = [
+        {
+            "role": "system",
+            "content": f'''
+            You are a highly accurate image to caption transformer.
+            Describe the image content in English and Chinese respectively. Make sure to FOCUS on item CATEGORY and COLOR!
+            Do NOT provide NAMES! KEEP it SHORT!
+            While adhering to the following JSON schema: {Caption.model_json_schema()}
+            Following are some samples you should adhere to for style and tone:
+            {hist_str}
+            '''
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe the image in English and Chinese"
+                },
+                {
+                    "type": "image_url",
+                    "image_url": f"data:image/jpeg;base64,{base64_image}"
+                }
+            ]
+        }
+    ]
+    chat_response = client.chat.complete(
+        model="pixtral-12b-2409",
+        messages=messages,
+        response_format = {
+          "type": "json_object",
+        }
+    )
+    response_content = chat_response.choices[0].message.content
+    try:
+        caption_dict = json.loads(response_content)
+        return Caption(**caption_dict)
+    except json.JSONDecodeError as e:
+        print(f"Error decoding JSON: {e}")
+        return None
+# Initialize SQLite database
+def init_db():
+    conn = sqlite3.connect('feedback.db')
+    c = conn.cursor()
+    c.execute('''CREATE TABLE IF NOT EXISTS thumbs_up
+                 (id INTEGER PRIMARY KEY AUTOINCREMENT,
+                  timestamp TEXT,
+                  input_data TEXT,
+                  output_data TEXT)''')
+    conn.commit()
+    conn.close()
+init_db()
+def process_image(image):
+    if image is None:
+        return "Please upload an image first."
+    result = generate_caption(image)
+    if result:
+        return f"English caption: {result.en}\nChinese caption: {result.zh}"
+    else:
+        return "Failed to generate caption. Please check the API call or network connectivity."
+def thumbs_up(image, caption):
+    # Convert image to base64 string for storage
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    img_str = base64.b64encode(buffered.getvalue()).decode()
+    conn = sqlite3.connect('feedback.db')
+    c = conn.cursor()
+    c.execute("INSERT INTO thumbs_up (timestamp, input_data, output_data) VALUES (?, ?, ?)",
+              (datetime.now().isoformat(), img_str, caption))
+    conn.commit()
+    conn.close()
+    print(f"Thumbs up data saved to database.")
+    return gr.Notification("Thank you for your feedback!", type="success")
+# Create Gradio interface
+custom_css = """
+    .highlight-btn {
+        background-color: #3498db !important;
+        border-color: #3498db !important;
+        color: white !important;
+    }
+    .highlight-btn:hover {
+        background-color: #2980b9 !important;
+        border-color: #2980b9 !important;
+    }
+"""
+with gr.Blocks() as iface:
+    gr.Markdown("# Image Captioner")
+    gr.Markdown("Upload an image to generate captions in English and Chinese. Use the 'Thumbs Up' button if you like the result!")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(type="pil")
+            with gr.Row():
+                clear_btn = gr.Button("Clear")
+                submit_btn = gr.Button("Submit", elem_classes=["highlight-btn"])
+        with gr.Column(scale=1):
+            output_text = gr.Textbox()
+            thumbs_up_btn = gr.Button("Thumbs Up")
+    clear_btn.click(fn=lambda: None, inputs=None, outputs=input_image)
+    submit_btn.click(fn=process_image, inputs=input_image, outputs=output_text)
+    thumbs_up_btn.click(fn=thumbs_up, inputs=[input_image, output_text], outputs=None)
+# Launch the interface
+iface.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+anthropic
+openai>=1.1.0
+mistralai
+pydantic
+docstring-parser
+rich
+aiohttp
+ruff==0.1.7
+pre-commit==3.5.0
+pyright==1.1.360
+typer
+cohere
+datasets
+gradio
+Pillow