Revrse commited on
Commit
4c7840d
·
verified ·
1 Parent(s): 60602f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -128
app.py CHANGED
@@ -7,11 +7,30 @@ import os
7
  import spaces
8
  import json
9
  import re
 
 
10
 
11
- # Initialize object detection using proven working YOLO models
12
- class AdvancedYOLODetector:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def __init__(self):
14
- # Using proven working YOLO models on Hugging Face Inference API
15
  self.api_url = "https://api-inference.huggingface.co/models/hustvl/yolos-small"
16
  # Fallback models in order of preference (all tested and working):
17
  self.fallback_models = [
@@ -93,7 +112,7 @@ class AdvancedYOLODetector:
93
  "2. High API traffic - try again in a few minutes\n" +
94
  "3. Check your HF token is valid and has sufficient quota")
95
 
96
- object_detector = AdvancedYOLODetector()
97
 
98
  # Extended object class names including common variations and synonyms
99
  COMMON_OBJECTS = [
@@ -189,13 +208,13 @@ def fuzzy_match_object(user_input, detected_labels):
189
 
190
  def detect_objects(image, target_object, confidence_threshold, hf_token=None):
191
  """
192
- Detect any object in the image using advanced YOLO and return bounding boxes
193
  """
194
  try:
195
  if not target_object or not target_object.strip():
196
  raise gr.Error("Please enter an object name to detect and remove")
197
 
198
- # Use advanced YOLO for object detection
199
  results = object_detector.detect(image, hf_token)
200
 
201
  if not results or not isinstance(results, list):
@@ -280,9 +299,56 @@ def create_mask_from_detections(image, detections, mask_expansion=10):
280
  return mask
281
 
282
  @spaces.GPU
283
- def remove_objects(image, object_name, confidence_threshold, mask_expansion, inpaint_prompt, hf_token):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  """
285
- Main function to remove any specified object from image using advanced YOLO + SDXL
286
  """
287
  try:
288
  if image is None:
@@ -296,7 +362,7 @@ def remove_objects(image, object_name, confidence_threshold, mask_expansion, inp
296
  if not token:
297
  raise gr.Error("Please provide your Hugging Face token or set HF_TOKEN in Space secrets")
298
 
299
- # Step 1: Detect objects using advanced YOLO
300
  detections = detect_objects(image, object_name, confidence_threshold, token)
301
 
302
  if not detections:
@@ -308,119 +374,49 @@ def remove_objects(image, object_name, confidence_threshold, mask_expansion, inp
308
  suggestion_msg += "• Checking if the object is clearly visible in the image"
309
  return image, None, suggestion_msg
310
 
311
- # Step 2: Create mask with adaptive expansion
312
  mask = create_mask_from_detections(image, detections, mask_expansion)
313
 
314
- # Step 3: Use reliable inpainting model
315
- # Try multiple inpainting models in order of preference
316
- inpaint_models = [
317
- "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-inpainting",
318
- "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-inpainting",
319
- "https://api-inference.huggingface.co/models/diffusers/stable-diffusion-xl-1.0-inpainting-0.1"
320
- ]
321
-
322
- headers = {"Authorization": f"Bearer {token}"}
323
-
324
- # Convert images to bytes
325
- img_buffer = io.BytesIO()
326
- image.save(img_buffer, format='PNG')
327
- img_bytes = img_buffer.getvalue()
328
-
329
- mask_buffer = io.BytesIO()
330
- mask.save(mask_buffer, format='PNG')
331
- mask_bytes = mask_buffer.getvalue()
332
-
333
- # Prepare multipart form data
334
- files = {
335
- 'image': ('image.png', img_bytes, 'image/png'),
336
- 'mask': ('mask.png', mask_bytes, 'image/png')
337
- }
338
-
339
- # Enhanced inpainting prompt
340
- enhanced_prompt = f"{inpaint_prompt}, photorealistic, high quality, detailed, natural lighting"
341
-
342
- data = {
343
- 'prompt': enhanced_prompt,
344
- 'negative_prompt': 'blurry, low quality, distorted, artifacts, unrealistic, pixelated, noise',
345
- 'num_inference_steps': 25,
346
- 'guidance_scale': 7.5,
347
- 'strength': 0.99
348
- }
349
-
350
- # Try multiple inpainting models
351
- inpainting_success = False
352
- last_error = ""
353
-
354
- for i, inpaint_api_url in enumerate(inpaint_models):
355
- try:
356
- print(f"Trying inpainting model {i+1}/{len(inpaint_models)}: {inpaint_api_url.split('/')[-1]}")
357
-
358
- response = requests.post(inpaint_api_url, headers=headers, files=files, data=data, timeout=120)
359
-
360
- if response.status_code == 503:
361
- # Model is loading, wait and retry once
362
- import time
363
- time.sleep(10)
364
- response = requests.post(inpaint_api_url, headers=headers, files=files, data=data, timeout=120)
365
-
366
- if response.status_code == 200:
367
- result_image = Image.open(io.BytesIO(response.content))
368
- detected_labels = [d.get('label', 'unknown') for d in detections]
369
- status_msg = f"✅ Successfully removed {len(detections)} '{object_name}' object(s)\n"
370
- status_msg += f"🎯 Detected as: {', '.join(detected_labels)}\n"
371
- status_msg += f"🔧 Used: {inpaint_api_url.split('/')[-1]} for inpainting"
372
- inpainting_success = True
373
- break
374
- else:
375
- last_error = f"HTTP {response.status_code}: {response.text[:200]}"
376
- print(f"Model {i+1} failed: {last_error}")
377
- continue
378
-
379
- except Exception as e:
380
- last_error = str(e)
381
- print(f"Model {i+1} error: {last_error}")
382
- continue
383
-
384
- if not inpainting_success:
385
- # Fallback: return original with mask overlay for debugging
386
  result_image = create_mask_overlay(image, mask)
387
- status_msg = f"⚠️ All inpainting models failed. Last error: {last_error}\n"
388
- status_msg += f"🎯 Found {len(detections)} '{object_name}' object(s) - detection was successful\n"
389
- status_msg += f"📍 Showing detected areas in red overlay"
390
-
391
- return result_image, mask, status_msg
392
 
393
  except Exception as e:
394
  return image, None, f"❌ Error: {str(e)}"
395
 
396
- def create_mask_overlay(image, mask):
397
- """Create a visualization showing the mask overlay on the original image"""
398
- result_image = image.copy()
399
- overlay = Image.new('RGBA', image.size, (255, 0, 0, 100))
400
- mask_rgba = mask.convert('RGBA')
401
- result_image = Image.alpha_composite(
402
- result_image.convert('RGBA'),
403
- Image.composite(overlay, Image.new('RGBA', image.size, (0,0,0,0)), mask)
404
- )
405
- return result_image.convert('RGB')
406
-
407
  # Create Gradio interface
408
  with gr.Blocks(
409
  fill_height=True,
410
- title="Advanced Object Removal with YOLOv8",
411
  theme=gr.themes.Soft()
412
  ) as demo:
413
 
414
  gr.Markdown("""
415
- # 🚀 Advanced Object Removal using Proven Detection Models + SDXL Inpainting
416
 
417
- Upload an image and specify **ANY object** you want to remove - no limitations!
418
 
419
- **How it works:**
420
  1. 🔍 **Multi-Model Detection**: Uses multiple proven object detection models with auto-fallback
421
  2. 🧠 **Smart Matching**: Handles synonyms, plurals, and fuzzy object name matching
422
- 3. 🎭 **Adaptive Masking**: Creates intelligent removal masks
423
- 4. 🎨 **SDXL Inpainting**: Uses state-of-the-art AI to fill removed areas seamlessly
424
  """)
425
 
426
  with gr.Row():
@@ -468,18 +464,29 @@ with gr.Blocks(
468
  value=20,
469
  step=5,
470
  label="📏 Mask Expansion (pixels)",
471
- info="Expand mask around detected objects for better removal"
 
 
 
 
 
 
 
 
 
472
  )
473
 
474
- inpaint_prompt = gr.Textbox(
475
- label="✨ Inpainting Prompt",
476
- value="natural background, seamless, realistic environment",
477
- placeholder="Describe what should replace the removed object",
478
- info="Be specific about the desired background/replacement"
 
 
479
  )
480
 
481
  hf_token = gr.Textbox(
482
- label="🔑 Hugging Face Token (Optional)",
483
  type="password",
484
  placeholder="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
485
  info="Get token from https://huggingface.co/settings/tokens (or set HF_TOKEN in Space secrets)"
@@ -498,7 +505,7 @@ with gr.Blocks(
498
  height=300
499
  )
500
  mask_image = gr.Image(
501
- label="🎭 Generated Mask",
502
  type="pil",
503
  height=300
504
  )
@@ -506,7 +513,7 @@ with gr.Blocks(
506
  status_text = gr.Textbox(
507
  label="📊 Status & Detection Info",
508
  interactive=False,
509
- max_lines=4
510
  )
511
 
512
  # Event handlers
@@ -517,7 +524,8 @@ with gr.Blocks(
517
  object_name,
518
  confidence_threshold,
519
  mask_expansion,
520
- inpaint_prompt,
 
521
  hf_token
522
  ],
523
  outputs=[output_image, mask_image, status_text]
@@ -531,11 +539,13 @@ with gr.Blocks(
531
 
532
  1. **Upload an image** containing objects you want to remove
533
  2. **Enter ANY object name** in the text box - no restrictions!
534
- 3. **Adjust settings** if needed:
535
  - **Confidence**: Start with 0.3, increase if too many false detections
536
- - **Mask expansion**: Larger values ensure complete object removal
537
- - **Inpainting prompt**: Describe the desired replacement scene
538
- 4. **Click "Remove Objects"** and wait for AI processing
 
 
539
 
540
  ### 💡 Smart Object Recognition:
541
  - **Handles variations**: "car" = "vehicle" = "automobile"
@@ -558,17 +568,17 @@ with gr.Blocks(
558
  - **Nature**: tree, flower, rock, cloud, mountain
559
  - **And literally thousands more!**
560
 
561
- ### ⚠️ System Info:
562
- - **🔍 Detection**: YOLOS-Small + DETR + OWL-ViT (auto-fallback)
563
- - **🎨 Inpainting**: Multiple SD models with auto-fallback
564
- - **⏱️ Processing**: 30-120 seconds (first request may be slower)
565
- - **🔧 Auto-retry**: Tries multiple models if one is busy
566
- - **Token Required**: HF token needed for API access
567
 
568
- **If you get "models unavailable" error:**
569
- - Wait 2-3 minutes and try again (models loading)
570
- - Check your HF token is valid and has API quota
571
- - Try during off-peak hours for better performance
572
  """)
573
 
574
  if __name__ == "__main__":
 
7
  import spaces
8
  import json
9
  import re
10
+ import torch
11
+ from diffusers import FluxKontextPipeline
12
 
13
+ # Initialize FLUX model for advanced inpainting
14
+ @spaces.GPU
15
+ def load_flux_model():
16
+ """Load FLUX.1 Kontext model for high-quality object removal"""
17
+ try:
18
+ pipe = FluxKontextPipeline.from_pretrained(
19
+ "black-forest-labs/FLUX.1-Kontext-dev",
20
+ torch_dtype=torch.bfloat16
21
+ ).to("cuda")
22
+ return pipe
23
+ except Exception as e:
24
+ print(f"Failed to load FLUX model: {e}")
25
+ return None
26
+
27
+ # Global variable to store the model (loaded once)
28
+ flux_pipe = None
29
+
30
+ # Initialize object detection using proven working models
31
+ class AdvancedObjectDetector:
32
  def __init__(self):
33
+ # Using proven working object detection models on Hugging Face Inference API
34
  self.api_url = "https://api-inference.huggingface.co/models/hustvl/yolos-small"
35
  # Fallback models in order of preference (all tested and working):
36
  self.fallback_models = [
 
112
  "2. High API traffic - try again in a few minutes\n" +
113
  "3. Check your HF token is valid and has sufficient quota")
114
 
115
+ object_detector = AdvancedObjectDetector()
116
 
117
  # Extended object class names including common variations and synonyms
118
  COMMON_OBJECTS = [
 
208
 
209
  def detect_objects(image, target_object, confidence_threshold, hf_token=None):
210
  """
211
+ Detect any object in the image using advanced detection models and return bounding boxes
212
  """
213
  try:
214
  if not target_object or not target_object.strip():
215
  raise gr.Error("Please enter an object name to detect and remove")
216
 
217
+ # Use advanced detection for object detection
218
  results = object_detector.detect(image, hf_token)
219
 
220
  if not results or not isinstance(results, list):
 
299
  return mask
300
 
301
  @spaces.GPU
302
+ def flux_inpainting(image, object_name, guidance_scale=2.5, steps=28):
303
+ """
304
+ Use FLUX.1 Kontext for intelligent object removal
305
+ """
306
+ global flux_pipe
307
+
308
+ try:
309
+ # Load FLUX model if not already loaded
310
+ if flux_pipe is None:
311
+ print("Loading FLUX.1 Kontext model...")
312
+ flux_pipe = load_flux_model()
313
+
314
+ if flux_pipe is None:
315
+ raise Exception("Failed to load FLUX model")
316
+
317
+ # Create intelligent removal prompt
318
+ removal_prompt = f"Remove the {object_name} from this image, fill with natural background that matches the surrounding environment, photorealistic, seamless, high quality"
319
+
320
+ # Use FLUX for contextual editing
321
+ result = flux_pipe(
322
+ image=image.convert("RGB"),
323
+ prompt=removal_prompt,
324
+ guidance_scale=guidance_scale,
325
+ width=image.size[0],
326
+ height=image.size[1],
327
+ num_inference_steps=steps,
328
+ generator=torch.Generator().manual_seed(42),
329
+ ).images[0]
330
+
331
+ return result, True
332
+
333
+ except Exception as e:
334
+ print(f"FLUX inpainting error: {str(e)}")
335
+ return None, False
336
+
337
+ def create_mask_overlay(image, mask):
338
+ """Create a visualization showing the mask overlay on the original image"""
339
+ result_image = image.copy()
340
+ overlay = Image.new('RGBA', image.size, (255, 0, 0, 100))
341
+ mask_rgba = mask.convert('RGBA')
342
+ result_image = Image.alpha_composite(
343
+ result_image.convert('RGBA'),
344
+ Image.composite(overlay, Image.new('RGBA', image.size, (0,0,0,0)), mask)
345
+ )
346
+ return result_image.convert('RGB')
347
+
348
+ @spaces.GPU
349
+ def remove_objects(image, object_name, confidence_threshold, mask_expansion, guidance_scale, steps, hf_token):
350
  """
351
+ Main function to remove any specified object using advanced detection + FLUX inpainting
352
  """
353
  try:
354
  if image is None:
 
362
  if not token:
363
  raise gr.Error("Please provide your Hugging Face token or set HF_TOKEN in Space secrets")
364
 
365
+ # Step 1: Detect objects
366
  detections = detect_objects(image, object_name, confidence_threshold, token)
367
 
368
  if not detections:
 
374
  suggestion_msg += "• Checking if the object is clearly visible in the image"
375
  return image, None, suggestion_msg
376
 
377
+ # Step 2: Create mask for debugging/visualization
378
  mask = create_mask_from_detections(image, detections, mask_expansion)
379
 
380
+ # Step 3: Use FLUX.1 Kontext for intelligent object removal
381
+ print("Using FLUX.1 Kontext for advanced object removal...")
382
+ result_image, flux_success = flux_inpainting(image, object_name, guidance_scale, steps)
383
+
384
+ if flux_success and result_image:
385
+ detected_labels = [d.get('label', 'unknown') for d in detections]
386
+ status_msg = f"✅ Successfully removed {len(detections)} '{object_name}' object(s)\n"
387
+ status_msg += f"🎯 Detected as: {', '.join(detected_labels)}\n"
388
+ status_msg += f"🚀 Used: FLUX.1 Kontext for professional-quality removal\n"
389
+ status_msg += f"⚙️ Settings: Guidance={guidance_scale}, Steps={steps}"
390
+ return result_image, mask, status_msg
391
+ else:
392
+ # Fallback: show detection areas
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  result_image = create_mask_overlay(image, mask)
394
+ status_msg = f"⚠️ FLUX inpainting failed, but detection was successful\n"
395
+ status_msg += f"🎯 Found {len(detections)} '{object_name}' object(s)\n"
396
+ status_msg += f"📍 Showing detected areas in red overlay\n"
397
+ status_msg += f"💡 Try adjusting guidance scale or steps, or check GPU availability"
398
+ return result_image, mask, status_msg
399
 
400
  except Exception as e:
401
  return image, None, f"❌ Error: {str(e)}"
402
 
 
 
 
 
 
 
 
 
 
 
 
403
  # Create Gradio interface
404
  with gr.Blocks(
405
  fill_height=True,
406
+ title="Professional Object Removal with FLUX",
407
  theme=gr.themes.Soft()
408
  ) as demo:
409
 
410
  gr.Markdown("""
411
+ # 🚀 Professional Object Removal using Advanced AI
412
 
413
+ Upload an image and specify **ANY object** you want to remove with professional results!
414
 
415
+ **Powered by cutting-edge AI:**
416
  1. 🔍 **Multi-Model Detection**: Uses multiple proven object detection models with auto-fallback
417
  2. 🧠 **Smart Matching**: Handles synonyms, plurals, and fuzzy object name matching
418
+ 3. 🎭 **Intelligent Masking**: Creates precise removal masks
419
+ 4. 🚀 **FLUX.1 Kontext**: Uses state-of-the-art contextual editing for professional results
420
  """)
421
 
422
  with gr.Row():
 
464
  value=20,
465
  step=5,
466
  label="📏 Mask Expansion (pixels)",
467
+ info="Expand mask around detected objects for debugging"
468
+ )
469
+
470
+ guidance_scale = gr.Slider(
471
+ minimum=1.0,
472
+ maximum=10.0,
473
+ value=2.5,
474
+ step=0.1,
475
+ label="🎯 FLUX Guidance Scale",
476
+ info="Higher = more faithful to prompt, lower = more creative"
477
  )
478
 
479
+ steps = gr.Slider(
480
+ minimum=10,
481
+ maximum=50,
482
+ value=28,
483
+ step=2,
484
+ label="🔄 FLUX Steps",
485
+ info="More steps = higher quality but slower processing"
486
  )
487
 
488
  hf_token = gr.Textbox(
489
+ label="🔑 Hugging Face Token (Required)",
490
  type="password",
491
  placeholder="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
492
  info="Get token from https://huggingface.co/settings/tokens (or set HF_TOKEN in Space secrets)"
 
505
  height=300
506
  )
507
  mask_image = gr.Image(
508
+ label="🎭 Detection Mask (Debug)",
509
  type="pil",
510
  height=300
511
  )
 
513
  status_text = gr.Textbox(
514
  label="📊 Status & Detection Info",
515
  interactive=False,
516
+ max_lines=5
517
  )
518
 
519
  # Event handlers
 
524
  object_name,
525
  confidence_threshold,
526
  mask_expansion,
527
+ guidance_scale,
528
+ steps,
529
  hf_token
530
  ],
531
  outputs=[output_image, mask_image, status_text]
 
539
 
540
  1. **Upload an image** containing objects you want to remove
541
  2. **Enter ANY object name** in the text box - no restrictions!
542
+ 3. **Adjust detection settings** if needed:
543
  - **Confidence**: Start with 0.3, increase if too many false detections
544
+ - **Mask expansion**: For debugging - shows detection areas
545
+ 4. **Fine-tune FLUX settings**:
546
+ - **Guidance Scale**: 2.5 is optimal for most cases
547
+ - **Steps**: 28 gives good quality/speed balance
548
+ 5. **Click "Remove Objects"** and wait for professional AI processing
549
 
550
  ### 💡 Smart Object Recognition:
551
  - **Handles variations**: "car" = "vehicle" = "automobile"
 
568
  - **Nature**: tree, flower, rock, cloud, mountain
569
  - **And literally thousands more!**
570
 
571
+ ### FLUX.1 Kontext Advantages:
572
+ - **🎨 Professional Quality**: State-of-the-art contextual editing
573
+ - **🧠 Intelligent Fill**: Understands scene context and lighting
574
+ - **⚡ GPU Accelerated**: Fast processing with high quality
575
+ - **🎯 Precise Control**: Fine-tunable guidance and steps
576
+ - **🔧 No API Limits**: Runs locally without external dependencies
577
 
578
+ **System Requirements:**
579
+ - GPU-enabled environment (automatically handled in Spaces)
580
+ - HF token for object detection API access
581
+ - Processing time: 30-90 seconds depending on image size
582
  """)
583
 
584
  if __name__ == "__main__":