Spaces:

kendrickfff
/

ask-me-anything

Running

App Files Files Community

kendrickfff commited on Dec 3, 2024

Commit

603c4d7

verified ·

1 Parent(s): b05e484

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -49

app.py CHANGED Viewed

@@ -1,12 +1,35 @@
 import os
 import gradio as gr
-import torch
 from transformers import DetrImageProcessor, DetrForObjectDetection
 from PIL import Image
-import requests
 import json
-# Custom Object Labels
 COCO_CLASSES = [
     'airplane', 'apple', 'backpack', 'banana', 'baseball hat', 'baseball glove', 'bear', 'bed', 'bench', 'bicycle',
     'bird', 'boat', 'book', 'bottle', 'bowl', 'broccoli', 'bus', 'cake', 'car', 'carrot', 'cat', 'cell phone', 'chair',
@@ -18,64 +41,108 @@ COCO_CLASSES = [
     'traffic light', 'train', 'truck', 'tv', 'umbrella', 'vase', 'wine glass'
 ]
-# Load the DETR model and processor
-model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
-processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
-# Initialize Gradio interface
 def analyze_image(image_path):
     try:
-        # Open the image
-        image = Image.open(image_path)
-        # Preprocess the image
         inputs = processor(images=image, return_tensors="pt")
-        # Perform object detection
-        outputs = model(**inputs)
-        # Get the logits (class predictions) and boxes (bounding boxes)
-        logits = outputs.logits
-        boxes = outputs.pred_boxes
-        # Get the predicted labels (class IDs)
-        class_ids = logits.argmax(-1)
-        # Filter out detections with low confidence and map to custom labels
-        results = []
-        for idx, class_id in enumerate(class_ids[0]):
-            confidence = logits[0, idx, class_id].item()
-            if confidence > 0.5:  # Confidence threshold
-                label = COCO_CLASSES[class_id]
-                box = boxes[0, idx].tolist()
-                results.append({
-                    'label': label,
-                    'confidence': confidence,
-                    'box': box
-                })
-        if len(results) == 0:
-            return "No objects detected."
-        # Generate a response with the detected objects
-        detected_objects = "\n".join([f"{result['label']} (confidence: {result['confidence']:.2f})" for result in results])
-        return f"Detected Objects:\n{detected_objects}"
-    except Exception as e:
-        return f"Error processing the image: {str(e)}"
-# Gradio Interface Setup
 with gr.Blocks() as demo:
-    gr.Markdown("## Object Detection with Custom Labels")
-    gr.Markdown("Upload an image for analysis!")
     # User input components
     img_upload = gr.Image(type="filepath", label="Upload an image for analysis")
-    output_text = gr.Textbox(label="Detection Results", interactive=False)
-    # Define the interaction
-    img_upload.change(analyze_image, img_upload, output_text)
-# Launch the interface
 demo.launch()

 import os
 import gradio as gr
 from transformers import DetrImageProcessor, DetrForObjectDetection
+from langchain_google_genai.chat_models import ChatGoogleGenerativeAI  # Import Gemini
 from PIL import Image
+import torch
 import json
+import requests
+# Load credentials (stringified JSON) from environment variable for Gemini
+credentials_string = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
+if not credentials_string:
+    raise ValueError("GOOGLE_APPLICATION_CREDENTIALS is not set in the environment!")
+# Parse the stringified JSON back to a Python dictionary
+credentials = json.loads(credentials_string)
+# Save the credentials to a temporary JSON file (required by Google SDKs)
+with open("service_account.json", "w") as f:
+    json.dump(credentials, f)
+# Set the GOOGLE_APPLICATION_CREDENTIALS environment variable to the temporary file
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "service_account.json"
+# Initialize Gemini model (chatbot)
+llm = ChatGoogleGenerativeAI(model='gemini-1.5-pro')
+# Initialize DETR model and processor for object detection
+processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+# Load COCO class labels (from the official COCO dataset)
 COCO_CLASSES = [
     'airplane', 'apple', 'backpack', 'banana', 'baseball hat', 'baseball glove', 'bear', 'bed', 'bench', 'bicycle',
     'bird', 'boat', 'book', 'bottle', 'bowl', 'broccoli', 'bus', 'cake', 'car', 'carrot', 'cat', 'cell phone', 'chair',
     'traffic light', 'train', 'truck', 'tv', 'umbrella', 'vase', 'wine glass'
 ]
+# Global chat history variable
+chat_history = []
+# Function for chatting with Gemini
+def chat_with_gemini(message):
+    global chat_history
+    bot_response = llm.predict(message)  # This will interact with the Gemini model
+    chat_history.append((message, bot_response))
+    return chat_history
+# Function for analyzing the uploaded image
 def analyze_image(image_path):
+    global chat_history
     try:
+        # Open and preprocess the image
+        image = Image.open(image_path).convert("RGB")
         inputs = processor(images=image, return_tensors="pt")
+        # Perform inference
+        with torch.no_grad():
+            outputs = model(**inputs)
+        # Set a target size for post-processing
+        target_sizes = torch.tensor([image.size[::-1]])  # (height, width)
+        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
+        # Collect detected objects (with no minimum confidence filter)
+        detected_objects = []
+        for idx, label in enumerate(results["labels"]):
+            # Get the object label based on label index
+            object_name = COCO_CLASSES[label.item()]  # Assuming COCO_CLASSES is available
+            score = results["scores"][idx].item()  # Confidence score for this detection
+            # Store only objects with a score higher than a threshold (e.g., 0.1)
+            if score > 0.1:
+                detected_objects.append(f"{object_name} (score: {score:.2f})")
+        if detected_objects:
+            bot_response = f"Objects detected: {', '.join(detected_objects)}."
+        else:
+            bot_response = "No objects detected."
+        chat_history.append(("Uploaded an image for analysis", bot_response))
+        return chat_history
+    except Exception as e:
+        error_msg = f"Error processing the image: {str(e)}"
+        chat_history.append(("Error during image analysis", error_msg))
+        return chat_history
+# Build the Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Ken Chatbot")
+    gr.Markdown("Ask me anything or upload an image for analysis!")
+    # Chatbot display without "User" or "Bot" labels
+    chatbot = gr.Chatbot(elem_id="chatbot")
     # User input components
+    msg = gr.Textbox(label="Type your message here...", placeholder="Enter your message...", show_label=False)
+    send_btn = gr.Button("Send")
     img_upload = gr.Image(type="filepath", label="Upload an image for analysis")
+    # Define interactions
+    def handle_text_message(message):
+        return chat_with_gemini(message)
+    def handle_image_upload(image_path):
+        return analyze_image(image_path)
+    # Set up Gradio components with Enter key for sending
+    msg.submit(handle_text_message, msg, chatbot)
+    send_btn.click(handle_text_message, msg, chatbot)
+    send_btn.click(lambda: "", None, msg)  # Clear input field
+    img_upload.change(handle_image_upload, img_upload, chatbot)
+    # Custom CSS for styling without usernames
+    gr.HTML("""
+    <style>
+    #chatbot .message-container {
+        display: flex;
+        flex-direction: column;
+        margin-bottom: 10px;
+        max-width: 70%;
+    }
+    #chatbot .message {
+        border-radius: 15px;
+        padding: 10px;
+        margin: 5px 0;
+        word-wrap: break-word;
+    }
+    #chatbot .message.user {
+        background-color: #DCF8C6;
+        margin-left: auto;
+        text-align: right;
+    }
+    #chatbot .message.bot {
+        background-color: #E1E1E1;
+        margin-right: auto;
+        text-align: left;
+    }
+    </style>
+    """)
+# Launch the Gradio interface
 demo.launch()