Spaces:

vsaez
/

object-detection-app

Sleeping

App Files Files Community

Víctor Sáez commited on about 1 month ago

Commit

c2f455f

1 Parent(s): 45307ff

error

Browse files

Files changed (1) hide show

app.py +20 -99

app.py CHANGED Viewed

@@ -2,49 +2,41 @@ import gradio as gr
 import torch
 from PIL import Image, ImageDraw, ImageFont
 from transformers import DetrImageProcessor, DetrForObjectDetection
-import transformers
-# Global variables to cache models
 current_model = None
 current_processor = None
 current_model_name = None
-# Available models with better selection
 available_models = {
-    # DETR Models
     "DETR ResNet-50": "facebook/detr-resnet-50",
     "DETR ResNet-101": "facebook/detr-resnet-101",
     "DETR DC5": "facebook/detr-resnet-50-dc5",
     "DETR ResNet-50 Face Only": "esraakh/detr_fine_tune_face_detection_final"
 }
 def load_model(model_key):
-    """Load model and processor based on selected model key"""
     global current_model, current_processor, current_model_name
     model_name = available_models[model_key]
-    # Only load if it's a different model
     if current_model_name != model_name:
         print(f"Loading model: {model_name}")
         current_processor = DetrImageProcessor.from_pretrained(model_name)
         current_model = DetrForObjectDetection.from_pretrained(model_name)
         current_model_name = model_name
-        print(f"Model loaded: {model_name}")
-        print(f"Available labels: {list(current_model.config.id2label.values())}")
     return current_model, current_processor
-# Fixed font loading - this was the main issue
 def get_font(size=12):
     try:
         return ImageFont.truetype("arial.ttf", size=size)
     except:
         return ImageFont.load_default()
-# Set up translations for the app
 translations = {
     "English": {
         "title": "## Enhanced Object Detection App\nUpload an image to detect objects using various DETR models.",
@@ -90,127 +82,75 @@ translations = {
     }
 }
 def t(language, key):
     return translations.get(language, translations["English"]).get(key, key)
 def get_translated_model_choices(language):
-    """Get model choices translated to the selected language"""
     model_mapping = {
         "DETR ResNet-50": "model_fast",
         "DETR ResNet-101": "model_precision",
         "DETR DC5": "model_small",
         "DETR ResNet-50 Face Only": "model_faces"
     }
     translated_choices = []
     for model_key in available_models.keys():
         if model_key in model_mapping:
             translation_key = model_mapping[model_key]
             translated_name = t(language, translation_key)
         else:
-            translated_name = model_key  # Fallback to original name
         translated_choices.append(translated_name)
     return translated_choices
 def get_model_key_from_translation(translated_name, language):
-    """Get the original model key from translated name"""
     model_mapping = {
         "DETR ResNet-50": "model_fast",
         "DETR ResNet-101": "model_precision",
         "DETR DC5": "model_small",
         "DETR ResNet-50 Face Only": "model_faces"
     }
-    # Reverse lookup
     for model_key, translation_key in model_mapping.items():
         if t(language, translation_key) == translated_name:
             return model_key
-    # If not found, try direct match
     if translated_name in available_models:
         return translated_name
-    # Default fallback
     return "DETR ResNet-50"
-def get_helsinki_model(language_label):
-    """Returns the Helsinki-NLP model name for translating from English to the selected language."""
-    lang_map = {
-        "Spanish": "es",
-        "French": "fr",
-        "English": "en"
-    }
-    target = lang_map.get(language_label)
-    if not target or target == "en":
-        return None
-    return f"Helsinki-NLP/opus-mt-en-{target}"
-# add cache for translations
 translation_cache = {}
 def translate_label(language_label, label):
-    """Translates the given label to the target language."""
-    # Check cache first
     cache_key = f"{language_label}_{label}"
     if cache_key in translation_cache:
         return translation_cache[cache_key]
-    model_name = get_helsinki_model(language_label)
-    if not model_name:
-        return label
-    try:
-        translator = transformers.pipeline("translation", model=model_name)
-        result = translator(label, max_length=40)
-        translated = result[0]['translation_text']
-        # Cache the result
-        translation_cache[cache_key] = translated
-        return translated
-    except Exception as e:
-        print(f"Translation error (429 or other): {e}")
-        return label  # Return original if translation fails
 def detect_objects(image, language_selector, translated_model_selector, threshold):
-    """Enhanced object detection with adjustable threshold and better info"""
     try:
         if image is None:
-            return None, "Por favor, sube una imagen antes de detectar objetos."
         model_selector = get_model_key_from_translation(translated_model_selector, language_selector)
-        print(f"Processing image. Language: {language_selector}, Model: {model_selector}, Threshold: {threshold}")
         model, processor = load_model(model_selector)
         inputs = processor(images=image, return_tensors="pt")
         outputs = model(**inputs)
         target_sizes = torch.tensor([image.size[::-1]])
         results = processor.post_process_object_detection(
             outputs, threshold=threshold, target_sizes=target_sizes
         )[0]
         image_with_boxes = image.copy()
         draw = ImageDraw.Draw(image_with_boxes)
         detection_info = f"Detected {len(results['scores'])} objects with threshold {threshold}\n"
         detection_info += f"Model: {translated_model_selector} ({model_selector})\n\n"
         colors = {
-            'high': 'red',      # > 0.8
-            'medium': 'orange', # 0.5-0.8
-            'low': 'yellow'     # < 0.5
         }
         detected_objects = []
         for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
             confidence = score.item()
             box = [round(x, 2) for x in box.tolist()]
@@ -220,7 +160,6 @@ def detect_objects(image, language_selector, translated_model_selector, threshol
                 color = colors['medium']
             else:
                 color = colors['low']
             draw.rectangle(box, outline=color, width=3)
             label_text = model.config.id2label[label.item()]
             translated_label = translate_label(language_selector, label_text)
@@ -231,7 +170,6 @@ def detect_objects(image, language_selector, translated_model_selector, threshol
                 'confidence': confidence,
                 'box': box
             })
             try:
                 image_width = image.size[0]
                 font_size = max(image_width // 40, 12)
@@ -243,21 +181,18 @@ def detect_objects(image, language_selector, translated_model_selector, threshol
                 font = get_font(12)
                 text_width = 50
                 text_height = 20
             text_bg = [
                 box[0], box[1] - text_height - 4,
                         box[0] + text_width + 4, box[1]
             ]
             draw.rectangle(text_bg, fill="black")
             draw.text((box[0] + 2, box[1] - text_height - 2), display_text, fill="white", font=font)
         if detected_objects:
             detection_info += "Objects found:\n"
             for obj in sorted(detected_objects, key=lambda x: x['confidence'], reverse=True):
                 detection_info += f"- {obj['translated']} ({obj['label']}): {obj['confidence']:.3f}\n"
         else:
             detection_info += "No objects detected. Try lowering the threshold."
         return image_with_boxes, detection_info
     except Exception as e:
         import traceback
@@ -265,12 +200,10 @@ def detect_objects(image, language_selector, translated_model_selector, threshol
         traceback.print_exc()
         return None, f"Error detecting objects: {e}"
 def build_app():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
         with gr.Row():
             title = gr.Markdown(t("English", "title"))
         with gr.Row():
             with gr.Column(scale=1):
                 language_selector = gr.Dropdown(
@@ -281,18 +214,17 @@ def build_app():
             with gr.Column(scale=1):
                 model_selector = gr.Dropdown(
                     choices=get_translated_model_choices("English"),
-                    value=t("English", "model_fast"),  # Default to translated "fast" option
                     label=t("English", "dropdown_detection_model_label")
                 )
             with gr.Column(scale=1):
                 threshold_slider = gr.Slider(
                     minimum=0.1,
                     maximum=0.95,
-                    value=0.5,  # Lowered default threshold
                     step=0.05,
                     label=t("English", "threshold_label")
                 )
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(type="pil", label=t("English", "input_label"))
@@ -304,12 +236,9 @@ def build_app():
                     lines=10,
                     max_lines=15
                 )
-        # Function to update interface when language changes
         def update_interface(selected_language):
             translated_choices = get_translated_model_choices(selected_language)
             default_model = t(selected_language, "model_fast")
             return [
                 gr.update(value=t(selected_language, "title")),
                 gr.update(label=t(selected_language, "dropdown_label")),
@@ -324,8 +253,6 @@ def build_app():
                 gr.update(label=t(selected_language, "output_label")),
                 gr.update(label=t(selected_language, "info_label"))
             ]
-        # Connect language change event
         language_selector.change(
             fn=update_interface,
             inputs=language_selector,
@@ -333,21 +260,15 @@ def build_app():
                      input_image, button, output_image, detection_info],
             queue=False
         )
-        # Connect detection button click event
         button.click(
             fn=detect_objects,
             inputs=[input_image, language_selector, model_selector, threshold_slider],
             outputs=[output_image, detection_info]
         )
     return app
-# Initialize with default model
 load_model("DETR ResNet-50")
-# Launch the application
 if __name__ == "__main__":
     app = build_app()
     app.launch()

 import torch
 from PIL import Image, ImageDraw, ImageFont
 from transformers import DetrImageProcessor, DetrForObjectDetection
+# Only import pipeline if translation is enabled
+ENABLE_TRANSLATION = False  # Cambia a True solo si puedes cargar modelos Helsinki localmente
+if ENABLE_TRANSLATION:
+    from transformers import pipeline
+# Global variables
 current_model = None
 current_processor = None
 current_model_name = None
 available_models = {
     "DETR ResNet-50": "facebook/detr-resnet-50",
     "DETR ResNet-101": "facebook/detr-resnet-101",
     "DETR DC5": "facebook/detr-resnet-50-dc5",
     "DETR ResNet-50 Face Only": "esraakh/detr_fine_tune_face_detection_final"
 }
 def load_model(model_key):
     global current_model, current_processor, current_model_name
     model_name = available_models[model_key]
     if current_model_name != model_name:
         print(f"Loading model: {model_name}")
         current_processor = DetrImageProcessor.from_pretrained(model_name)
         current_model = DetrForObjectDetection.from_pretrained(model_name)
         current_model_name = model_name
     return current_model, current_processor
 def get_font(size=12):
     try:
         return ImageFont.truetype("arial.ttf", size=size)
     except:
         return ImageFont.load_default()
 translations = {
     "English": {
         "title": "## Enhanced Object Detection App\nUpload an image to detect objects using various DETR models.",
     }
 }
 def t(language, key):
     return translations.get(language, translations["English"]).get(key, key)
 def get_translated_model_choices(language):
     model_mapping = {
         "DETR ResNet-50": "model_fast",
         "DETR ResNet-101": "model_precision",
         "DETR DC5": "model_small",
         "DETR ResNet-50 Face Only": "model_faces"
     }
     translated_choices = []
     for model_key in available_models.keys():
         if model_key in model_mapping:
             translation_key = model_mapping[model_key]
             translated_name = t(language, translation_key)
         else:
+            translated_name = model_key
         translated_choices.append(translated_name)
     return translated_choices
 def get_model_key_from_translation(translated_name, language):
     model_mapping = {
         "DETR ResNet-50": "model_fast",
         "DETR ResNet-101": "model_precision",
         "DETR DC5": "model_small",
         "DETR ResNet-50 Face Only": "model_faces"
     }
     for model_key, translation_key in model_mapping.items():
         if t(language, translation_key) == translated_name:
             return model_key
     if translated_name in available_models:
         return translated_name
     return "DETR ResNet-50"
+# Translation logic (only if ENABLE_TRANSLATION and model is local)
 translation_cache = {}
 def translate_label(language_label, label):
+    if language_label == "English" or not ENABLE_TRANSLATION:
+        return label
     cache_key = f"{language_label}_{label}"
     if cache_key in translation_cache:
         return translation_cache[cache_key]
+    # Dummy fallback in Spaces, or if not preloaded, just warn
+    translation_cache[cache_key] = f"{label} (no translation)"
+    return translation_cache[cache_key]
 def detect_objects(image, language_selector, translated_model_selector, threshold):
     try:
         if image is None:
+            return None, "Please upload an image before detecting objects."
         model_selector = get_model_key_from_translation(translated_model_selector, language_selector)
         model, processor = load_model(model_selector)
         inputs = processor(images=image, return_tensors="pt")
         outputs = model(**inputs)
         target_sizes = torch.tensor([image.size[::-1]])
         results = processor.post_process_object_detection(
             outputs, threshold=threshold, target_sizes=target_sizes
         )[0]
         image_with_boxes = image.copy()
         draw = ImageDraw.Draw(image_with_boxes)
         detection_info = f"Detected {len(results['scores'])} objects with threshold {threshold}\n"
         detection_info += f"Model: {translated_model_selector} ({model_selector})\n\n"
         colors = {
+            'high': 'red',
+            'medium': 'orange',
+            'low': 'yellow'
         }
         detected_objects = []
         for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
             confidence = score.item()
             box = [round(x, 2) for x in box.tolist()]
                 color = colors['medium']
             else:
                 color = colors['low']
             draw.rectangle(box, outline=color, width=3)
             label_text = model.config.id2label[label.item()]
             translated_label = translate_label(language_selector, label_text)
                 'confidence': confidence,
                 'box': box
             })
             try:
                 image_width = image.size[0]
                 font_size = max(image_width // 40, 12)
                 font = get_font(12)
                 text_width = 50
                 text_height = 20
             text_bg = [
                 box[0], box[1] - text_height - 4,
                         box[0] + text_width + 4, box[1]
             ]
             draw.rectangle(text_bg, fill="black")
             draw.text((box[0] + 2, box[1] - text_height - 2), display_text, fill="white", font=font)
         if detected_objects:
             detection_info += "Objects found:\n"
             for obj in sorted(detected_objects, key=lambda x: x['confidence'], reverse=True):
                 detection_info += f"- {obj['translated']} ({obj['label']}): {obj['confidence']:.3f}\n"
         else:
             detection_info += "No objects detected. Try lowering the threshold."
         return image_with_boxes, detection_info
     except Exception as e:
         import traceback
         traceback.print_exc()
         return None, f"Error detecting objects: {e}"
 def build_app():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
         with gr.Row():
             title = gr.Markdown(t("English", "title"))
         with gr.Row():
             with gr.Column(scale=1):
                 language_selector = gr.Dropdown(
             with gr.Column(scale=1):
                 model_selector = gr.Dropdown(
                     choices=get_translated_model_choices("English"),
+                    value=t("English", "model_fast"),
                     label=t("English", "dropdown_detection_model_label")
                 )
             with gr.Column(scale=1):
                 threshold_slider = gr.Slider(
                     minimum=0.1,
                     maximum=0.95,
+                    value=0.5,
                     step=0.05,
                     label=t("English", "threshold_label")
                 )
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(type="pil", label=t("English", "input_label"))
                     lines=10,
                     max_lines=15
                 )
         def update_interface(selected_language):
             translated_choices = get_translated_model_choices(selected_language)
             default_model = t(selected_language, "model_fast")
             return [
                 gr.update(value=t(selected_language, "title")),
                 gr.update(label=t(selected_language, "dropdown_label")),
                 gr.update(label=t(selected_language, "output_label")),
                 gr.update(label=t(selected_language, "info_label"))
             ]
         language_selector.change(
             fn=update_interface,
             inputs=language_selector,
                      input_image, button, output_image, detection_info],
             queue=False
         )
         button.click(
             fn=detect_objects,
             inputs=[input_image, language_selector, model_selector, threshold_slider],
             outputs=[output_image, detection_info]
         )
     return app
 load_model("DETR ResNet-50")
 if __name__ == "__main__":
     app = build_app()
     app.launch()