Spaces:

jiten6555
/

Imagemodel

Runtime error

App Files Files Community

jiten6555 commited on Nov 30, 2024

Commit

3bfc3b0

verified ·

1 Parent(s): 347327e

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -91

app.py CHANGED Viewed

@@ -1,144 +1,188 @@
 import torch
 import gradio as gr
 import numpy as np
 import open3d as o3d
 from PIL import Image
 import cv2
-class CPUFriendlyAIDepthTo3DConverter:
     def __init__(self):
-        # Load MiDaS depth estimation model with explicit CPU configuration
-        self.model = torch.hub.load('intel-isl/MiDaS', 'MiDaS_small', force_reload=False)
-        self.model.to('cpu')  # Ensure model runs on CPU
         self.model.eval()
-        # Preprocessing transforms
-        self.transform = torch.hub.load('intel-isl/MiDaS', 'transforms').small_transform
-    def estimate_depth(self, input_image):
         """
-        CPU-optimized depth estimation
         """
-        # Convert PIL Image to numpy
-        img = np.array(input_image)
-        # Ensure image is in RGB
-        if img.shape[-1] == 4:  # If RGBA, convert to RGB
-            img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
-        # Preprocess image
-        input_batch = self.transform(img).unsqueeze(0).to('cpu')
-        # Estimate depth with minimal memory usage
-        with torch.no_grad():
-            prediction = self.model(input_batch)
-            depth = prediction.squeeze().cpu().numpy()
-        # Free up memory
-        torch.cuda.empty_cache()
-        # Normalize depth
-        depth_normalized = cv2.normalize(depth, None, 0, 255,
-                                         norm_type=cv2.NORM_MINMAX,
-                                         dtype=cv2.CV_8U)
-        return depth_normalized
     def create_point_cloud(self, image, depth_map):
         """
-        Efficient point cloud creation
         """
-        img_array = np.array(image)
-        height, width = img_array.shape[:2]
-        # More aggressive downsampling for memory efficiency
-        step = 4
-        points = []
-        colors = []
-        for y in range(0, height, step):
-            for x in range(0, width, step):
-                # Use depth as Z coordinate
-                z = depth_map[y, x] / 255.0 * 5  # Scaled depth
-                points.append([x, y, z])
-                # Safely get color
-                try:
-                    color = img_array[y, x] / 255.0
                     colors.append(color)
-                except IndexError:
-                    colors.append([0.5, 0.5, 0.5])  # Default color if out of bounds
-        pcd = o3d.geometry.PointCloud()
-        pcd.points = o3d.utility.Vector3dVector(points)
-        pcd.colors = o3d.utility.Vector3dVector(colors)
-        return pcd
     def convert_to_mesh(self, point_cloud):
         """
-        Memory-efficient mesh conversion
         """
-        # Estimate and orient normals
-        point_cloud.estimate_normals()
-        point_cloud.orient_normals_consistent_tangent_plane(100)
-        # Lower depth for less memory consumption
-        mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
-            point_cloud, depth=7  # Reduced from previous version
-        )
-        # Simplified color handling
-        mesh.vertex_colors = point_cloud.colors
-        return mesh
     def process_image(self, input_image):
         """
-        CPU-friendly full pipeline
         """
-        # Resize image to reduce memory usage
-        max_size = (800, 800)
-        input_image.thumbnail(max_size, Image.LANCZOS)
-        # Estimate depth using AI model
-        depth_map = self.estimate_depth(input_image)
-        # Create point cloud
-        point_cloud = self.create_point_cloud(np.array(input_image), depth_map)
-        # Convert to mesh
-        mesh = self.convert_to_mesh(point_cloud)
-        # Save mesh
-        output_path = "/tmp/cpu_optimized_3d_model.obj"
-        o3d.io.write_triangle_mesh(output_path, mesh)
-        return output_path
 def create_huggingface_space():
     # Initialize converter
-    converter = CPUFriendlyAIDepthTo3DConverter()
     def convert_image(input_image):
         try:
-            # Ensure image is in PIL format
-            if not isinstance(input_image, Image.Image):
-                input_image = Image.fromarray(input_image)
-            # Process image
             output_model = converter.process_image(input_image)
             return output_model
         except Exception as e:
-            return f"Error during conversion: {str(e)}"
     # Gradio Interface
     iface = gr.Interface(
         fn=convert_image,
         inputs=gr.Image(type="pil", label="Input Image"),
         outputs=gr.File(label="3D Model (OBJ)"),
-        title="CPU-Friendly AI Image to 3D Converter",
-        description="Convert images to 3D models using lightweight AI depth estimation."
     )
     return iface

 import torch
+import torchvision.transforms as transforms
 import gradio as gr
 import numpy as np
 import open3d as o3d
 from PIL import Image
 import cv2
+class RobustDepthTo3DConverter:
     def __init__(self):
+        # Load MiDaS model with explicit configuration
+        self.device = torch.device("cpu")
+        self.model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small", pretrained=True)
+        self.model.to(self.device)
         self.model.eval()
+        # Create transformation pipeline
+        self.transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+            )
+        ])
+    def preprocess_image(self, input_image):
         """
+        Standardize image input
         """
+        # Ensure input is PIL Image
+        if not isinstance(input_image, Image.Image):
+            input_image = Image.fromarray(input_image)
+        # Resize image
+        max_size = (800, 800)
+        input_image.thumbnail(max_size, Image.LANCZOS)
+        # Convert to RGB if needed
+        if input_image.mode != 'RGB':
+            input_image = input_image.convert('RGB')
+        return input_image
+    def estimate_depth(self, input_image):
+        """
+        Robust depth estimation
+        """
+        try:
+            # Preprocess image
+            img = self.preprocess_image(input_image)
+            # Convert to tensor
+            img_tensor = self.transform(img).unsqueeze(0).to(self.device)
+            # Estimate depth
+            with torch.no_grad():
+                prediction = self.model(img_tensor)
+                depth = prediction.squeeze().cpu().numpy()
+            # Normalize depth
+            depth_normalized = cv2.normalize(
+                depth, None, 0, 255,
+                norm_type=cv2.NORM_MINMAX,
+                dtype=cv2.CV_8U
+            )
+            return depth_normalized
+        except Exception as e:
+            print(f"Depth estimation error: {e}")
+            return None
     def create_point_cloud(self, image, depth_map):
         """
+        Create point cloud with error handling
         """
+        if depth_map is None:
+            return None
+        try:
+            img_array = np.array(image)
+            height, width = img_array.shape[:2]
+            # Adaptive sampling based on image size
+            step = max(1, min(height, width) // 200)
+            points = []
+            colors = []
+            for y in range(0, height, step):
+                for x in range(0, width, step):
+                    z = depth_map[y, x] / 255.0 * 5  # Scaled depth
+                    points.append([x, y, z])
+                    # Safe color extraction
+                    color = img_array[y, x][:3] / 255.0 if len(img_array[y, x]) >= 3 else [0.5, 0.5, 0.5]
                     colors.append(color)
+            pcd = o3d.geometry.PointCloud()
+            pcd.points = o3d.utility.Vector3dVector(points)
+            pcd.colors = o3d.utility.Vector3dVector(colors)
+            return pcd
+        except Exception as e:
+            print(f"Point cloud creation error: {e}")
+            return None
     def convert_to_mesh(self, point_cloud):
         """
+        Mesh conversion with error handling
         """
+        if point_cloud is None:
+            return None
+        try:
+            # Estimate normals
+            point_cloud.estimate_normals()
+            point_cloud.orient_normals_consistent_tangent_plane(100)
+            # Mesh reconstruction
+            mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+                point_cloud, depth=7
+            )
+            # Color the mesh
+            mesh.vertex_colors = point_cloud.colors
+            return mesh
+        except Exception as e:
+            print(f"Mesh conversion error: {e}")
+            return None
     def process_image(self, input_image):
         """
+        Full pipeline with comprehensive error handling
         """
+        try:
+            # Preprocess and validate input
+            input_image = self.preprocess_image(input_image)
+            # Estimate depth
+            depth_map = self.estimate_depth(input_image)
+            if depth_map is None:
+                raise ValueError("Depth estimation failed")
+            # Create point cloud
+            point_cloud = self.create_point_cloud(input_image, depth_map)
+            if point_cloud is None:
+                raise ValueError("Point cloud creation failed")
+            # Convert to mesh
+            mesh = self.convert_to_mesh(point_cloud)
+            if mesh is None:
+                raise ValueError("Mesh conversion failed")
+            # Save mesh
+            output_path = "/tmp/robust_3d_model.obj"
+            o3d.io.write_triangle_mesh(output_path, mesh)
+            return output_path
+        except Exception as e:
+            print(f"Full pipeline error: {e}")
+            return f"Error during conversion: {str(e)}"
 def create_huggingface_space():
     # Initialize converter
+    converter = RobustDepthTo3DConverter()
     def convert_image(input_image):
         try:
             output_model = converter.process_image(input_image)
             return output_model
         except Exception as e:
+            return f"Conversion failed: {str(e)}"
     # Gradio Interface
     iface = gr.Interface(
         fn=convert_image,
         inputs=gr.Image(type="pil", label="Input Image"),
         outputs=gr.File(label="3D Model (OBJ)"),
+        title="Robust AI Image to 3D Converter",
+        description="Convert images to 3D models with advanced error handling."
     )
     return iface