Spaces:

kendrickfff
/

ask-me-anything

Running

App Files Files Community

kendrickfff commited on Mar 2

Commit

2101247

verified ·

1 Parent(s): 603c4d7

use yolo to handle multiple detection

Browse files

Files changed (1) hide show

app.py +35 -111

app.py CHANGED Viewed

@@ -1,148 +1,72 @@
 import os
 import gradio as gr
-from transformers import DetrImageProcessor, DetrForObjectDetection
-from langchain_google_genai.chat_models import ChatGoogleGenerativeAI  # Import Gemini
-from PIL import Image
 import torch
-import json
-import requests
-# Load credentials (stringified JSON) from environment variable for Gemini
-credentials_string = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
-if not credentials_string:
-    raise ValueError("GOOGLE_APPLICATION_CREDENTIALS is not set in the environment!")
-# Parse the stringified JSON back to a Python dictionary
-credentials = json.loads(credentials_string)
-# Save the credentials to a temporary JSON file (required by Google SDKs)
-with open("service_account.json", "w") as f:
-    json.dump(credentials, f)
-# Set the GOOGLE_APPLICATION_CREDENTIALS environment variable to the temporary file
-os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "service_account.json"
-# Initialize Gemini model (chatbot)
-llm = ChatGoogleGenerativeAI(model='gemini-1.5-pro')
-# Initialize DETR model and processor for object detection
-processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
-model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
-# Load COCO class labels (from the official COCO dataset)
-COCO_CLASSES = [
-    'airplane', 'apple', 'backpack', 'banana', 'baseball hat', 'baseball glove', 'bear', 'bed', 'bench', 'bicycle',
-    'bird', 'boat', 'book', 'bottle', 'bowl', 'broccoli', 'bus', 'cake', 'car', 'carrot', 'cat', 'cell phone', 'chair',
-    'clock', 'couch', 'cow', 'cup', 'dining table', 'dog', 'donut', 'elephant', 'fire hydrant', 'fork', 'frisbee',
-    'giraffe', 'hair drier', 'handbag', 'horse', 'hot dog', 'keyboard', 'kite', 'knife', 'laptop', 'microwave',
-    'motorcycle', 'mouse', 'orange', 'oven', 'parking meter', 'person', 'pizza', 'potted plant', 'refrigerator',
-    'remote', 'sandwich', 'scissors', 'sheep', 'sink', 'skateboard', 'skis', 'snowboard', 'spoon', 'sports ball',
-    'stop sign', 'suitcase', 'surfboard', 'teddy bear', 'tennis racket', 'tie', 'toaster', 'toilet', 'toothbrush',
-    'traffic light', 'train', 'truck', 'tv', 'umbrella', 'vase', 'wine glass'
-]
-# Global chat history variable
 chat_history = []
-# Function for chatting with Gemini
-def chat_with_gemini(message):
     global chat_history
-    bot_response = llm.predict(message)  # This will interact with the Gemini model
     chat_history.append((message, bot_response))
     return chat_history
-# Function for analyzing the uploaded image
 def analyze_image(image_path):
     global chat_history
     try:
-        # Open and preprocess the image
         image = Image.open(image_path).convert("RGB")
-        inputs = processor(images=image, return_tensors="pt")
-        # Perform inference
-        with torch.no_grad():
-            outputs = model(**inputs)
-        # Set a target size for post-processing
-        target_sizes = torch.tensor([image.size[::-1]])  # (height, width)
-        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
-        # Collect detected objects (with no minimum confidence filter)
         detected_objects = []
-        for idx, label in enumerate(results["labels"]):
-            # Get the object label based on label index
-            object_name = COCO_CLASSES[label.item()]  # Assuming COCO_CLASSES is available
-            score = results["scores"][idx].item()  # Confidence score for this detection
-            # Store only objects with a score higher than a threshold (e.g., 0.1)
-            if score > 0.1:
-                detected_objects.append(f"{object_name} (score: {score:.2f})")
         if detected_objects:
             bot_response = f"Objects detected: {', '.join(detected_objects)}."
         else:
             bot_response = "No objects detected."
         chat_history.append(("Uploaded an image for analysis", bot_response))
-        return chat_history
     except Exception as e:
         error_msg = f"Error processing the image: {str(e)}"
         chat_history.append(("Error during image analysis", error_msg))
-        return chat_history
-# Build the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Ken Chatbot")
     gr.Markdown("Ask me anything or upload an image for analysis!")
-    # Chatbot display without "User" or "Bot" labels
     chatbot = gr.Chatbot(elem_id="chatbot")
-    # User input components
     msg = gr.Textbox(label="Type your message here...", placeholder="Enter your message...", show_label=False)
     send_btn = gr.Button("Send")
     img_upload = gr.Image(type="filepath", label="Upload an image for analysis")
-    # Define interactions
-    def handle_text_message(message):
-        return chat_with_gemini(message)
-    def handle_image_upload(image_path):
-        return analyze_image(image_path)
-    # Set up Gradio components with Enter key for sending
-    msg.submit(handle_text_message, msg, chatbot)
-    send_btn.click(handle_text_message, msg, chatbot)
     send_btn.click(lambda: "", None, msg)  # Clear input field
-    img_upload.change(handle_image_upload, img_upload, chatbot)
-    # Custom CSS for styling without usernames
-    gr.HTML("""
-    <style>
-    #chatbot .message-container {
-        display: flex;
-        flex-direction: column;
-        margin-bottom: 10px;
-        max-width: 70%;
-    }
-    #chatbot .message {
-        border-radius: 15px;
-        padding: 10px;
-        margin: 5px 0;
-        word-wrap: break-word;
-    }
-    #chatbot .message.user {
-        background-color: #DCF8C6;
-        margin-left: auto;
-        text-align: right;
-    }
-    #chatbot .message.bot {
-        background-color: #E1E1E1;
-        margin-right: auto;
-        text-align: left;
-    }
-    </style>
-    """)
-# Launch the Gradio interface
 demo.launch()

 import os
 import gradio as gr
+from ultralytics import YOLO  # Menggunakan YOLOv8 untuk deteksi objek
+from PIL import Image, ImageDraw
 import torch
+# Load model YOLOv8 (pastikan model ini telah di-download)
+model = YOLO("yolov8n.pt")  # Bisa diganti dengan model yang lebih besar jika diperlukan
+# Global chat history
 chat_history = []
+# Fungsi untuk chatting dengan chatbot
+def chat_with_bot(message):
     global chat_history
+    bot_response = f"Bot: Saya menerima pesan Anda: '{message}'"  # Placeholder response
     chat_history.append((message, bot_response))
     return chat_history
+# Fungsi untuk menganalisis gambar
 def analyze_image(image_path):
     global chat_history
     try:
+        # Load gambar
         image = Image.open(image_path).convert("RGB")
+        # Prediksi objek dalam gambar
+        results = model(image)
+        # Ambil hasil deteksi
         detected_objects = []
+        image_draw = image.copy()
+        draw = ImageDraw.Draw(image_draw)
+        for result in results:
+            for box in result.boxes.data:
+                x1, y1, x2, y2, score, class_id = box.tolist()
+                if score > 0.5:  # Hanya tampilkan objek dengan confidence score > 0.5
+                    class_name = model.names[int(class_id)]
+                    detected_objects.append(f"{class_name} (score: {score:.2f})")
+                    draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
+                    draw.text((x1, y1), class_name, fill="red")
         if detected_objects:
             bot_response = f"Objects detected: {', '.join(detected_objects)}."
         else:
             bot_response = "No objects detected."
         chat_history.append(("Uploaded an image for analysis", bot_response))
+        return image_draw, chat_history
     except Exception as e:
         error_msg = f"Error processing the image: {str(e)}"
         chat_history.append(("Error during image analysis", error_msg))
+        return None, chat_history
+# Bangun antarmuka Gradio
 with gr.Blocks() as demo:
     gr.Markdown("# Ken Chatbot")
     gr.Markdown("Ask me anything or upload an image for analysis!")
     chatbot = gr.Chatbot(elem_id="chatbot")
     msg = gr.Textbox(label="Type your message here...", placeholder="Enter your message...", show_label=False)
     send_btn = gr.Button("Send")
     img_upload = gr.Image(type="filepath", label="Upload an image for analysis")
+    img_output = gr.Image(label="Detected Objects")
+    msg.submit(chat_with_bot, msg, chatbot)
+    send_btn.click(chat_with_bot, msg, chatbot)
     send_btn.click(lambda: "", None, msg)  # Clear input field
+    img_upload.change(analyze_image, img_upload, [img_output, chatbot])
 demo.launch()