Update app.py
Browse files
app.py
CHANGED
@@ -7,11 +7,30 @@ import os
|
|
7 |
import spaces
|
8 |
import json
|
9 |
import re
|
|
|
|
|
10 |
|
11 |
-
# Initialize
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def __init__(self):
|
14 |
-
# Using proven working
|
15 |
self.api_url = "https://api-inference.huggingface.co/models/hustvl/yolos-small"
|
16 |
# Fallback models in order of preference (all tested and working):
|
17 |
self.fallback_models = [
|
@@ -93,7 +112,7 @@ class AdvancedYOLODetector:
|
|
93 |
"2. High API traffic - try again in a few minutes\n" +
|
94 |
"3. Check your HF token is valid and has sufficient quota")
|
95 |
|
96 |
-
object_detector =
|
97 |
|
98 |
# Extended object class names including common variations and synonyms
|
99 |
COMMON_OBJECTS = [
|
@@ -189,13 +208,13 @@ def fuzzy_match_object(user_input, detected_labels):
|
|
189 |
|
190 |
def detect_objects(image, target_object, confidence_threshold, hf_token=None):
|
191 |
"""
|
192 |
-
Detect any object in the image using advanced
|
193 |
"""
|
194 |
try:
|
195 |
if not target_object or not target_object.strip():
|
196 |
raise gr.Error("Please enter an object name to detect and remove")
|
197 |
|
198 |
-
# Use advanced
|
199 |
results = object_detector.detect(image, hf_token)
|
200 |
|
201 |
if not results or not isinstance(results, list):
|
@@ -280,9 +299,56 @@ def create_mask_from_detections(image, detections, mask_expansion=10):
|
|
280 |
return mask
|
281 |
|
282 |
@spaces.GPU
|
283 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
"""
|
285 |
-
Main function to remove any specified object
|
286 |
"""
|
287 |
try:
|
288 |
if image is None:
|
@@ -296,7 +362,7 @@ def remove_objects(image, object_name, confidence_threshold, mask_expansion, inp
|
|
296 |
if not token:
|
297 |
raise gr.Error("Please provide your Hugging Face token or set HF_TOKEN in Space secrets")
|
298 |
|
299 |
-
# Step 1: Detect objects
|
300 |
detections = detect_objects(image, object_name, confidence_threshold, token)
|
301 |
|
302 |
if not detections:
|
@@ -308,119 +374,49 @@ def remove_objects(image, object_name, confidence_threshold, mask_expansion, inp
|
|
308 |
suggestion_msg += "• Checking if the object is clearly visible in the image"
|
309 |
return image, None, suggestion_msg
|
310 |
|
311 |
-
# Step 2: Create mask
|
312 |
mask = create_mask_from_detections(image, detections, mask_expansion)
|
313 |
|
314 |
-
# Step 3: Use
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
img_bytes = img_buffer.getvalue()
|
328 |
-
|
329 |
-
mask_buffer = io.BytesIO()
|
330 |
-
mask.save(mask_buffer, format='PNG')
|
331 |
-
mask_bytes = mask_buffer.getvalue()
|
332 |
-
|
333 |
-
# Prepare multipart form data
|
334 |
-
files = {
|
335 |
-
'image': ('image.png', img_bytes, 'image/png'),
|
336 |
-
'mask': ('mask.png', mask_bytes, 'image/png')
|
337 |
-
}
|
338 |
-
|
339 |
-
# Enhanced inpainting prompt
|
340 |
-
enhanced_prompt = f"{inpaint_prompt}, photorealistic, high quality, detailed, natural lighting"
|
341 |
-
|
342 |
-
data = {
|
343 |
-
'prompt': enhanced_prompt,
|
344 |
-
'negative_prompt': 'blurry, low quality, distorted, artifacts, unrealistic, pixelated, noise',
|
345 |
-
'num_inference_steps': 25,
|
346 |
-
'guidance_scale': 7.5,
|
347 |
-
'strength': 0.99
|
348 |
-
}
|
349 |
-
|
350 |
-
# Try multiple inpainting models
|
351 |
-
inpainting_success = False
|
352 |
-
last_error = ""
|
353 |
-
|
354 |
-
for i, inpaint_api_url in enumerate(inpaint_models):
|
355 |
-
try:
|
356 |
-
print(f"Trying inpainting model {i+1}/{len(inpaint_models)}: {inpaint_api_url.split('/')[-1]}")
|
357 |
-
|
358 |
-
response = requests.post(inpaint_api_url, headers=headers, files=files, data=data, timeout=120)
|
359 |
-
|
360 |
-
if response.status_code == 503:
|
361 |
-
# Model is loading, wait and retry once
|
362 |
-
import time
|
363 |
-
time.sleep(10)
|
364 |
-
response = requests.post(inpaint_api_url, headers=headers, files=files, data=data, timeout=120)
|
365 |
-
|
366 |
-
if response.status_code == 200:
|
367 |
-
result_image = Image.open(io.BytesIO(response.content))
|
368 |
-
detected_labels = [d.get('label', 'unknown') for d in detections]
|
369 |
-
status_msg = f"✅ Successfully removed {len(detections)} '{object_name}' object(s)\n"
|
370 |
-
status_msg += f"🎯 Detected as: {', '.join(detected_labels)}\n"
|
371 |
-
status_msg += f"🔧 Used: {inpaint_api_url.split('/')[-1]} for inpainting"
|
372 |
-
inpainting_success = True
|
373 |
-
break
|
374 |
-
else:
|
375 |
-
last_error = f"HTTP {response.status_code}: {response.text[:200]}"
|
376 |
-
print(f"Model {i+1} failed: {last_error}")
|
377 |
-
continue
|
378 |
-
|
379 |
-
except Exception as e:
|
380 |
-
last_error = str(e)
|
381 |
-
print(f"Model {i+1} error: {last_error}")
|
382 |
-
continue
|
383 |
-
|
384 |
-
if not inpainting_success:
|
385 |
-
# Fallback: return original with mask overlay for debugging
|
386 |
result_image = create_mask_overlay(image, mask)
|
387 |
-
status_msg = f"⚠️
|
388 |
-
status_msg += f"🎯 Found {len(detections)} '{object_name}' object(s)
|
389 |
-
status_msg += f"📍 Showing detected areas in red overlay"
|
390 |
-
|
391 |
-
|
392 |
|
393 |
except Exception as e:
|
394 |
return image, None, f"❌ Error: {str(e)}"
|
395 |
|
396 |
-
def create_mask_overlay(image, mask):
|
397 |
-
"""Create a visualization showing the mask overlay on the original image"""
|
398 |
-
result_image = image.copy()
|
399 |
-
overlay = Image.new('RGBA', image.size, (255, 0, 0, 100))
|
400 |
-
mask_rgba = mask.convert('RGBA')
|
401 |
-
result_image = Image.alpha_composite(
|
402 |
-
result_image.convert('RGBA'),
|
403 |
-
Image.composite(overlay, Image.new('RGBA', image.size, (0,0,0,0)), mask)
|
404 |
-
)
|
405 |
-
return result_image.convert('RGB')
|
406 |
-
|
407 |
# Create Gradio interface
|
408 |
with gr.Blocks(
|
409 |
fill_height=True,
|
410 |
-
title="
|
411 |
theme=gr.themes.Soft()
|
412 |
) as demo:
|
413 |
|
414 |
gr.Markdown("""
|
415 |
-
# 🚀
|
416 |
|
417 |
-
Upload an image and specify **ANY object** you want to remove
|
418 |
|
419 |
-
**
|
420 |
1. 🔍 **Multi-Model Detection**: Uses multiple proven object detection models with auto-fallback
|
421 |
2. 🧠 **Smart Matching**: Handles synonyms, plurals, and fuzzy object name matching
|
422 |
-
3. 🎭 **
|
423 |
-
4.
|
424 |
""")
|
425 |
|
426 |
with gr.Row():
|
@@ -468,18 +464,29 @@ with gr.Blocks(
|
|
468 |
value=20,
|
469 |
step=5,
|
470 |
label="📏 Mask Expansion (pixels)",
|
471 |
-
info="Expand mask around detected objects for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
)
|
473 |
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
|
|
|
|
479 |
)
|
480 |
|
481 |
hf_token = gr.Textbox(
|
482 |
-
label="🔑 Hugging Face Token (
|
483 |
type="password",
|
484 |
placeholder="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
485 |
info="Get token from https://huggingface.co/settings/tokens (or set HF_TOKEN in Space secrets)"
|
@@ -498,7 +505,7 @@ with gr.Blocks(
|
|
498 |
height=300
|
499 |
)
|
500 |
mask_image = gr.Image(
|
501 |
-
label="🎭
|
502 |
type="pil",
|
503 |
height=300
|
504 |
)
|
@@ -506,7 +513,7 @@ with gr.Blocks(
|
|
506 |
status_text = gr.Textbox(
|
507 |
label="📊 Status & Detection Info",
|
508 |
interactive=False,
|
509 |
-
max_lines=
|
510 |
)
|
511 |
|
512 |
# Event handlers
|
@@ -517,7 +524,8 @@ with gr.Blocks(
|
|
517 |
object_name,
|
518 |
confidence_threshold,
|
519 |
mask_expansion,
|
520 |
-
|
|
|
521 |
hf_token
|
522 |
],
|
523 |
outputs=[output_image, mask_image, status_text]
|
@@ -531,11 +539,13 @@ with gr.Blocks(
|
|
531 |
|
532 |
1. **Upload an image** containing objects you want to remove
|
533 |
2. **Enter ANY object name** in the text box - no restrictions!
|
534 |
-
3. **Adjust settings** if needed:
|
535 |
- **Confidence**: Start with 0.3, increase if too many false detections
|
536 |
-
- **Mask expansion**:
|
537 |
-
|
538 |
-
|
|
|
|
|
539 |
|
540 |
### 💡 Smart Object Recognition:
|
541 |
- **Handles variations**: "car" = "vehicle" = "automobile"
|
@@ -558,17 +568,17 @@ with gr.Blocks(
|
|
558 |
- **Nature**: tree, flower, rock, cloud, mountain
|
559 |
- **And literally thousands more!**
|
560 |
|
561 |
-
###
|
562 |
-
-
|
563 |
-
-
|
564 |
-
-
|
565 |
-
-
|
566 |
-
-
|
567 |
|
568 |
-
**
|
569 |
-
-
|
570 |
-
-
|
571 |
-
-
|
572 |
""")
|
573 |
|
574 |
if __name__ == "__main__":
|
|
|
7 |
import spaces
|
8 |
import json
|
9 |
import re
|
10 |
+
import torch
|
11 |
+
from diffusers import FluxKontextPipeline
|
12 |
|
13 |
+
# Initialize FLUX model for advanced inpainting
|
14 |
+
@spaces.GPU
|
15 |
+
def load_flux_model():
|
16 |
+
"""Load FLUX.1 Kontext model for high-quality object removal"""
|
17 |
+
try:
|
18 |
+
pipe = FluxKontextPipeline.from_pretrained(
|
19 |
+
"black-forest-labs/FLUX.1-Kontext-dev",
|
20 |
+
torch_dtype=torch.bfloat16
|
21 |
+
).to("cuda")
|
22 |
+
return pipe
|
23 |
+
except Exception as e:
|
24 |
+
print(f"Failed to load FLUX model: {e}")
|
25 |
+
return None
|
26 |
+
|
27 |
+
# Global variable to store the model (loaded once)
|
28 |
+
flux_pipe = None
|
29 |
+
|
30 |
+
# Initialize object detection using proven working models
|
31 |
+
class AdvancedObjectDetector:
|
32 |
def __init__(self):
|
33 |
+
# Using proven working object detection models on Hugging Face Inference API
|
34 |
self.api_url = "https://api-inference.huggingface.co/models/hustvl/yolos-small"
|
35 |
# Fallback models in order of preference (all tested and working):
|
36 |
self.fallback_models = [
|
|
|
112 |
"2. High API traffic - try again in a few minutes\n" +
|
113 |
"3. Check your HF token is valid and has sufficient quota")
|
114 |
|
115 |
+
object_detector = AdvancedObjectDetector()
|
116 |
|
117 |
# Extended object class names including common variations and synonyms
|
118 |
COMMON_OBJECTS = [
|
|
|
208 |
|
209 |
def detect_objects(image, target_object, confidence_threshold, hf_token=None):
|
210 |
"""
|
211 |
+
Detect any object in the image using advanced detection models and return bounding boxes
|
212 |
"""
|
213 |
try:
|
214 |
if not target_object or not target_object.strip():
|
215 |
raise gr.Error("Please enter an object name to detect and remove")
|
216 |
|
217 |
+
# Use advanced detection for object detection
|
218 |
results = object_detector.detect(image, hf_token)
|
219 |
|
220 |
if not results or not isinstance(results, list):
|
|
|
299 |
return mask
|
300 |
|
301 |
@spaces.GPU
|
302 |
+
def flux_inpainting(image, object_name, guidance_scale=2.5, steps=28):
|
303 |
+
"""
|
304 |
+
Use FLUX.1 Kontext for intelligent object removal
|
305 |
+
"""
|
306 |
+
global flux_pipe
|
307 |
+
|
308 |
+
try:
|
309 |
+
# Load FLUX model if not already loaded
|
310 |
+
if flux_pipe is None:
|
311 |
+
print("Loading FLUX.1 Kontext model...")
|
312 |
+
flux_pipe = load_flux_model()
|
313 |
+
|
314 |
+
if flux_pipe is None:
|
315 |
+
raise Exception("Failed to load FLUX model")
|
316 |
+
|
317 |
+
# Create intelligent removal prompt
|
318 |
+
removal_prompt = f"Remove the {object_name} from this image, fill with natural background that matches the surrounding environment, photorealistic, seamless, high quality"
|
319 |
+
|
320 |
+
# Use FLUX for contextual editing
|
321 |
+
result = flux_pipe(
|
322 |
+
image=image.convert("RGB"),
|
323 |
+
prompt=removal_prompt,
|
324 |
+
guidance_scale=guidance_scale,
|
325 |
+
width=image.size[0],
|
326 |
+
height=image.size[1],
|
327 |
+
num_inference_steps=steps,
|
328 |
+
generator=torch.Generator().manual_seed(42),
|
329 |
+
).images[0]
|
330 |
+
|
331 |
+
return result, True
|
332 |
+
|
333 |
+
except Exception as e:
|
334 |
+
print(f"FLUX inpainting error: {str(e)}")
|
335 |
+
return None, False
|
336 |
+
|
337 |
+
def create_mask_overlay(image, mask):
|
338 |
+
"""Create a visualization showing the mask overlay on the original image"""
|
339 |
+
result_image = image.copy()
|
340 |
+
overlay = Image.new('RGBA', image.size, (255, 0, 0, 100))
|
341 |
+
mask_rgba = mask.convert('RGBA')
|
342 |
+
result_image = Image.alpha_composite(
|
343 |
+
result_image.convert('RGBA'),
|
344 |
+
Image.composite(overlay, Image.new('RGBA', image.size, (0,0,0,0)), mask)
|
345 |
+
)
|
346 |
+
return result_image.convert('RGB')
|
347 |
+
|
348 |
+
@spaces.GPU
|
349 |
+
def remove_objects(image, object_name, confidence_threshold, mask_expansion, guidance_scale, steps, hf_token):
|
350 |
"""
|
351 |
+
Main function to remove any specified object using advanced detection + FLUX inpainting
|
352 |
"""
|
353 |
try:
|
354 |
if image is None:
|
|
|
362 |
if not token:
|
363 |
raise gr.Error("Please provide your Hugging Face token or set HF_TOKEN in Space secrets")
|
364 |
|
365 |
+
# Step 1: Detect objects
|
366 |
detections = detect_objects(image, object_name, confidence_threshold, token)
|
367 |
|
368 |
if not detections:
|
|
|
374 |
suggestion_msg += "• Checking if the object is clearly visible in the image"
|
375 |
return image, None, suggestion_msg
|
376 |
|
377 |
+
# Step 2: Create mask for debugging/visualization
|
378 |
mask = create_mask_from_detections(image, detections, mask_expansion)
|
379 |
|
380 |
+
# Step 3: Use FLUX.1 Kontext for intelligent object removal
|
381 |
+
print("Using FLUX.1 Kontext for advanced object removal...")
|
382 |
+
result_image, flux_success = flux_inpainting(image, object_name, guidance_scale, steps)
|
383 |
+
|
384 |
+
if flux_success and result_image:
|
385 |
+
detected_labels = [d.get('label', 'unknown') for d in detections]
|
386 |
+
status_msg = f"✅ Successfully removed {len(detections)} '{object_name}' object(s)\n"
|
387 |
+
status_msg += f"🎯 Detected as: {', '.join(detected_labels)}\n"
|
388 |
+
status_msg += f"🚀 Used: FLUX.1 Kontext for professional-quality removal\n"
|
389 |
+
status_msg += f"⚙️ Settings: Guidance={guidance_scale}, Steps={steps}"
|
390 |
+
return result_image, mask, status_msg
|
391 |
+
else:
|
392 |
+
# Fallback: show detection areas
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
result_image = create_mask_overlay(image, mask)
|
394 |
+
status_msg = f"⚠️ FLUX inpainting failed, but detection was successful\n"
|
395 |
+
status_msg += f"🎯 Found {len(detections)} '{object_name}' object(s)\n"
|
396 |
+
status_msg += f"📍 Showing detected areas in red overlay\n"
|
397 |
+
status_msg += f"💡 Try adjusting guidance scale or steps, or check GPU availability"
|
398 |
+
return result_image, mask, status_msg
|
399 |
|
400 |
except Exception as e:
|
401 |
return image, None, f"❌ Error: {str(e)}"
|
402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
# Create Gradio interface
|
404 |
with gr.Blocks(
|
405 |
fill_height=True,
|
406 |
+
title="Professional Object Removal with FLUX",
|
407 |
theme=gr.themes.Soft()
|
408 |
) as demo:
|
409 |
|
410 |
gr.Markdown("""
|
411 |
+
# 🚀 Professional Object Removal using Advanced AI
|
412 |
|
413 |
+
Upload an image and specify **ANY object** you want to remove with professional results!
|
414 |
|
415 |
+
**Powered by cutting-edge AI:**
|
416 |
1. 🔍 **Multi-Model Detection**: Uses multiple proven object detection models with auto-fallback
|
417 |
2. 🧠 **Smart Matching**: Handles synonyms, plurals, and fuzzy object name matching
|
418 |
+
3. 🎭 **Intelligent Masking**: Creates precise removal masks
|
419 |
+
4. 🚀 **FLUX.1 Kontext**: Uses state-of-the-art contextual editing for professional results
|
420 |
""")
|
421 |
|
422 |
with gr.Row():
|
|
|
464 |
value=20,
|
465 |
step=5,
|
466 |
label="📏 Mask Expansion (pixels)",
|
467 |
+
info="Expand mask around detected objects for debugging"
|
468 |
+
)
|
469 |
+
|
470 |
+
guidance_scale = gr.Slider(
|
471 |
+
minimum=1.0,
|
472 |
+
maximum=10.0,
|
473 |
+
value=2.5,
|
474 |
+
step=0.1,
|
475 |
+
label="🎯 FLUX Guidance Scale",
|
476 |
+
info="Higher = more faithful to prompt, lower = more creative"
|
477 |
)
|
478 |
|
479 |
+
steps = gr.Slider(
|
480 |
+
minimum=10,
|
481 |
+
maximum=50,
|
482 |
+
value=28,
|
483 |
+
step=2,
|
484 |
+
label="🔄 FLUX Steps",
|
485 |
+
info="More steps = higher quality but slower processing"
|
486 |
)
|
487 |
|
488 |
hf_token = gr.Textbox(
|
489 |
+
label="🔑 Hugging Face Token (Required)",
|
490 |
type="password",
|
491 |
placeholder="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
492 |
info="Get token from https://huggingface.co/settings/tokens (or set HF_TOKEN in Space secrets)"
|
|
|
505 |
height=300
|
506 |
)
|
507 |
mask_image = gr.Image(
|
508 |
+
label="🎭 Detection Mask (Debug)",
|
509 |
type="pil",
|
510 |
height=300
|
511 |
)
|
|
|
513 |
status_text = gr.Textbox(
|
514 |
label="📊 Status & Detection Info",
|
515 |
interactive=False,
|
516 |
+
max_lines=5
|
517 |
)
|
518 |
|
519 |
# Event handlers
|
|
|
524 |
object_name,
|
525 |
confidence_threshold,
|
526 |
mask_expansion,
|
527 |
+
guidance_scale,
|
528 |
+
steps,
|
529 |
hf_token
|
530 |
],
|
531 |
outputs=[output_image, mask_image, status_text]
|
|
|
539 |
|
540 |
1. **Upload an image** containing objects you want to remove
|
541 |
2. **Enter ANY object name** in the text box - no restrictions!
|
542 |
+
3. **Adjust detection settings** if needed:
|
543 |
- **Confidence**: Start with 0.3, increase if too many false detections
|
544 |
+
- **Mask expansion**: For debugging - shows detection areas
|
545 |
+
4. **Fine-tune FLUX settings**:
|
546 |
+
- **Guidance Scale**: 2.5 is optimal for most cases
|
547 |
+
- **Steps**: 28 gives good quality/speed balance
|
548 |
+
5. **Click "Remove Objects"** and wait for professional AI processing
|
549 |
|
550 |
### 💡 Smart Object Recognition:
|
551 |
- **Handles variations**: "car" = "vehicle" = "automobile"
|
|
|
568 |
- **Nature**: tree, flower, rock, cloud, mountain
|
569 |
- **And literally thousands more!**
|
570 |
|
571 |
+
### ⚡ FLUX.1 Kontext Advantages:
|
572 |
+
- **🎨 Professional Quality**: State-of-the-art contextual editing
|
573 |
+
- **🧠 Intelligent Fill**: Understands scene context and lighting
|
574 |
+
- **⚡ GPU Accelerated**: Fast processing with high quality
|
575 |
+
- **🎯 Precise Control**: Fine-tunable guidance and steps
|
576 |
+
- **🔧 No API Limits**: Runs locally without external dependencies
|
577 |
|
578 |
+
**System Requirements:**
|
579 |
+
- GPU-enabled environment (automatically handled in Spaces)
|
580 |
+
- HF token for object detection API access
|
581 |
+
- Processing time: 30-90 seconds depending on image size
|
582 |
""")
|
583 |
|
584 |
if __name__ == "__main__":
|