Final_Assignment_Template

Sleeping

App Files Files Community

huytofu92 commited on May 20

Commit

a9b6a0e

1 Parent(s): 4ae85c7

Enhance tools

Browse files

Files changed (2) hide show

tools.py +7 -1
vlm_tools.py +22 -14

tools.py CHANGED Viewed

@@ -155,7 +155,13 @@ def load_dataframe_from_excel(file_path: str)->pd.DataFrame:
     Returns:
         The pandas DataFrame
     """
-    return pd.read_excel(file_path)
 @tool
 def to_dataframe(data: List[dict], columns: List[str])->pd.DataFrame:

     Returns:
         The pandas DataFrame
     """
+    try:
+        df = pd.read_excel(file_path)
+    except Exception as e:
+        curr_dir = os.path.dirname(os.path.abspath(__file__))
+        file_path = os.path.join(curr_dir, file_path)
+        df = pd.read_excel(file_path)
+    return df
 @tool
 def to_dataframe(data: List[dict], columns: List[str])->pd.DataFrame:

vlm_tools.py CHANGED Viewed

@@ -49,10 +49,17 @@ def pre_processing(image: str, input_size=(416, 416))->tuple:
         # Convert BGR to RGB and normalize
         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # More reliable than array slicing
-        img = img.transpose(2, 0, 1)  # HWC to CHW
-        img = np.expand_dims(img, axis=0)  # Add batch dimension
         img = img.astype(np.float32) / 255.0  # Normalize to [0, 1]
         return img, original_shape
     except Exception as e:
         raise ValueError(f"Error in pre_processing: {str(e)}")
@@ -344,22 +351,23 @@ class ObjectDetectionTool(Tool):
                     # Preprocess the image
                     img, original_shape = pre_processing(image)
-                    # Verify input shape
                     if len(img.shape) != 4:  # Should be NCHW
                         raise ValueError(f"Invalid input shape: {img.shape}, expected NCHW format")
-                    # Create blob and run inference
-                    blob = cv2.dnn.blobFromImage(
-                        img[0].transpose(1, 2, 0),  # Convert back to HWC for blobFromImage
-                        1/255.0,  # Scale factor
-                        (416, 416),  # Size
-                        (0, 0, 0),  # Mean
-                        True,  # SwapRB
-                        crop=False
-                    )
                     # Run inference
-                    onnx_input = {self.input_name: blob}
                     onnx_output = self.onnx_model.run(None, onnx_input)
                     # Handle shape mismatch by transposing if needed

         # Convert BGR to RGB and normalize
         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # More reliable than array slicing
         img = img.astype(np.float32) / 255.0  # Normalize to [0, 1]
+        # Convert to NCHW format (batch, channels, height, width)
+        img = np.transpose(img, (2, 0, 1))  # HWC to CHW
+        img = np.expand_dims(img, axis=0)  # Add batch dimension
+        # Verify final shape
+        if img.shape != (1, 3, 416, 416):
+            print(f"Warning: Final shape is {img.shape}, expected (1, 3, 416, 416)")
+            img = np.reshape(img, (1, 3, 416, 416))
         return img, original_shape
     except Exception as e:
         raise ValueError(f"Error in pre_processing: {str(e)}")
                     # Preprocess the image
                     img, original_shape = pre_processing(image)
+                    # Verify input shape and convert to NCHW if needed
                     if len(img.shape) != 4:  # Should be NCHW
                         raise ValueError(f"Invalid input shape: {img.shape}, expected NCHW format")
+                    if img.shape[1] != 3:  # Should have 3 channels
+                        # If channels are last, transpose to NCHW
+                        if img.shape[3] == 3:
+                            img = np.transpose(img, (0, 3, 1, 2))
+                        else:
+                            raise ValueError(f"Invalid number of channels: {img.shape[1]}, expected 3")
+                    # Verify final shape
+                    if img.shape != (1, 3, 416, 416):
+                        print(f"Warning: Reshaping input from {img.shape} to (1, 3, 416, 416)")
+                        img = np.reshape(img, (1, 3, 416, 416))
                     # Run inference
+                    onnx_input = {self.input_name: img}
                     onnx_output = self.onnx_model.run(None, onnx_input)
                     # Handle shape mismatch by transposing if needed