Spaces:

jiten6555
/

Imagemodel

Runtime error

App Files Files Community

jiten6555 commited on Nov 30, 2024

Commit

cf37aee

verified ·

1 Parent(s): ef357b7

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -42

app.py CHANGED Viewed

@@ -1,51 +1,73 @@
 import torch
 import gradio as gr
-from PIL import Image
 import numpy as np
 import open3d as o3d
 import cv2
-class RobustImageTo3DConverter:
     def __init__(self):
-        # Use OpenCV for depth estimation
-        self.use_midas = False
-    def estimate_depth(self, image):
         """
-        Estimate depth using OpenCV's stereo algorithms
         """
-        # Convert PIL Image to OpenCV format
-        img_array = np.array(image)
-        # Convert to grayscale
-        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
-        # Create simple depth estimation using edge detection and blurring
-        edges = cv2.Canny(gray, 100, 200)
-        depth_map = cv2.distanceTransform(255 - edges, cv2.DIST_L2, 5)
-        # Normalize depth map
-        depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX)
-        return depth_map
     def create_point_cloud(self, image, depth_map):
         """
-        Convert depth map to 3D point cloud with reduced resolution
         """
         img_array = np.array(image)
         height, width = img_array.shape[:2]
-        # Downsample to reduce computational load
-        step = 5  # Reduce resolution
         points = []
         colors = []
         for y in range(0, height, step):
             for x in range(0, width, step):
-                z = depth_map[y, x] / 255.0  # Normalize depth
-                points.append([x, y, z * 10])  # Scale depth
-                colors.append(img_array[y, x] / 255.0)  # Normalize colors
         pcd = o3d.geometry.PointCloud()
         pcd.points = o3d.utility.Vector3dVector(points)
@@ -55,42 +77,48 @@ class RobustImageTo3DConverter:
     def convert_to_mesh(self, point_cloud):
         """
-        Simplified mesh reconstruction for CPU
         """
         point_cloud.estimate_normals()
         point_cloud.orient_normals_consistent_tangent_plane(100)
-        # Use simpler mesh reconstruction with lower depth
         mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
-            point_cloud, depth=6  # Reduced depth for faster processing
         )
-        # Color the mesh
-        mesh.paint_uniform_color([0.7, 0.7, 0.7])
         return mesh
     def process_image(self, input_image):
         """
-        CPU-friendly full pipeline for 3D conversion
         """
-        # Estimate depth
         depth_map = self.estimate_depth(input_image)
         # Create point cloud
-        point_cloud = self.create_point_cloud(input_image, depth_map)
         # Convert to mesh
         mesh = self.convert_to_mesh(point_cloud)
         # Save mesh
-        output_path = "/tmp/converted_3d_model.obj"
         o3d.io.write_triangle_mesh(output_path, mesh)
         return output_path
 def create_huggingface_space():
-    converter = RobustImageTo3DConverter()
     def convert_image(input_image):
         try:
@@ -98,10 +126,6 @@ def create_huggingface_space():
             if not isinstance(input_image, Image.Image):
                 input_image = Image.fromarray(input_image)
-            # Resize image if too large
-            max_size = (800, 800)
-            input_image.thumbnail(max_size, Image.LANCZOS)
             # Process image
             output_model = converter.process_image(input_image)
             return output_model
@@ -112,12 +136,9 @@ def create_huggingface_space():
     iface = gr.Interface(
         fn=convert_image,
         inputs=gr.Image(type="pil", label="Input Image"),
-        outputs=[
-            gr.File(label="3D Model (OBJ)"),
-            gr.Textbox(label="Conversion Status")
-        ],
-        title="Robust Image to 3D Model Converter",
-        description="Convert images to 3D models using OpenCV depth estimation and point cloud reconstruction."
     )
     return iface

 import torch
 import gradio as gr
 import numpy as np
 import open3d as o3d
+from PIL import Image
 import cv2
+class CPUFriendlyAIDepthTo3DConverter:
     def __init__(self):
+        # Load MiDaS depth estimation model with explicit CPU configuration
+        self.model = torch.hub.load('intel-isl/MiDaS', 'MiDaS_small', force_reload=False)
+        self.model.to('cpu')  # Ensure model runs on CPU
+        self.model.eval()
+        # Preprocessing transforms
+        self.transform = torch.hub.load('intel-isl/MiDaS', 'transforms').small_transform
+    def estimate_depth(self, input_image):
         """
+        CPU-optimized depth estimation
         """
+        # Convert PIL Image to numpy
+        img = np.array(input_image)
+        # Ensure image is in RGB
+        if img.shape[-1] == 4:  # If RGBA, convert to RGB
+            img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
+        # Preprocess image
+        input_batch = self.transform(img).unsqueeze(0).to('cpu')
+        # Estimate depth with minimal memory usage
+        with torch.no_grad():
+            prediction = self.model(input_batch)
+            depth = prediction.squeeze().cpu().numpy()
+        # Free up memory
+        torch.cuda.empty_cache()
+        # Normalize depth
+        depth_normalized = cv2.normalize(depth, None, 0, 255,
+                                         norm_type=cv2.NORM_MINMAX,
+                                         dtype=cv2.CV_8U)
+        return depth_normalized
     def create_point_cloud(self, image, depth_map):
         """
+        Efficient point cloud creation
         """
         img_array = np.array(image)
         height, width = img_array.shape[:2]
+        # More aggressive downsampling for memory efficiency
+        step = 4
         points = []
         colors = []
         for y in range(0, height, step):
             for x in range(0, width, step):
+                # Use depth as Z coordinate
+                z = depth_map[y, x] / 255.0 * 5  # Scaled depth
+                points.append([x, y, z])
+                # Safely get color
+                try:
+                    color = img_array[y, x] / 255.0
+                    colors.append(color)
+                except IndexError:
+                    colors.append([0.5, 0.5, 0.5])  # Default color if out of bounds
         pcd = o3d.geometry.PointCloud()
         pcd.points = o3d.utility.Vector3dVector(points)
     def convert_to_mesh(self, point_cloud):
         """
+        Memory-efficient mesh conversion
         """
+        # Estimate and orient normals
         point_cloud.estimate_normals()
         point_cloud.orient_normals_consistent_tangent_plane(100)
+        # Lower depth for less memory consumption
         mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+            point_cloud, depth=7  # Reduced from previous version
         )
+        # Simplified color handling
+        mesh.vertex_colors = point_cloud.colors
         return mesh
     def process_image(self, input_image):
         """
+        CPU-friendly full pipeline
         """
+        # Resize image to reduce memory usage
+        max_size = (800, 800)
+        input_image.thumbnail(max_size, Image.LANCZOS)
+        # Estimate depth using AI model
         depth_map = self.estimate_depth(input_image)
         # Create point cloud
+        point_cloud = self.create_point_cloud(np.array(input_image), depth_map)
         # Convert to mesh
         mesh = self.convert_to_mesh(point_cloud)
         # Save mesh
+        output_path = "/tmp/cpu_optimized_3d_model.obj"
         o3d.io.write_triangle_mesh(output_path, mesh)
         return output_path
 def create_huggingface_space():
+    # Initialize converter
+    converter = CPUFriendlyAIDepthTo3DConverter()
     def convert_image(input_image):
         try:
             if not isinstance(input_image, Image.Image):
                 input_image = Image.fromarray(input_image)
             # Process image
             output_model = converter.process_image(input_image)
             return output_model
     iface = gr.Interface(
         fn=convert_image,
         inputs=gr.Image(type="pil", label="Input Image"),
+        outputs=gr.File(label="3D Model (OBJ)"),
+        title="CPU-Friendly AI Image to 3D Converter",
+        description="Convert images to 3D models using lightweight AI depth estimation."
     )
     return iface