Spaces:

mgbam
/

Healthapp

Sleeping

App Files Files Community

mgbam commited on Jan 29

Commit

211e3a6

verified ·

1 Parent(s): c5a8062

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -1

app.py CHANGED Viewed

@@ -238,19 +238,52 @@ class HypothesisTester(DataAnalyzer):
         else:
             return "No significant evidence against H0"
 class LogisticRegressionTrainer(DataAnalyzer):
-    """Logistic Regression Model Trainer."""
     def invoke(self, data: pd.DataFrame, target_col: str, columns: List[str], **kwargs) -> Dict[str, Any]:
         try:
             X = data[columns]
             y = data[target_col]
             X_train, X_test, y_train, y_test = train_test_split(
                 X, y, test_size=0.2, random_state=42
             )
             model = LogisticRegression(max_iter=1000)
             model.fit(X_train, y_train)
             y_pred = model.predict(X_test)
             accuracy = accuracy_score(y_test, y_pred)
             return {
                 "model_type": "Logistic Regression",
                 "accuracy": accuracy,
@@ -260,6 +293,7 @@ class LogisticRegressionTrainer(DataAnalyzer):
             logger.error(f"Logistic Regression Model Error: {str(e)}")
             return {"error": f"Logistic Regression Model Error: {str(e)}"}
 # ---------------------- Business Logic Layer ---------------------------
 class ClinicalRule(BaseModel):

         else:
             return "No significant evidence against H0"
+from sklearn.impute import SimpleImputer
 class LogisticRegressionTrainer(DataAnalyzer):
+    """Logistic Regression Model Trainer with Missing Value Handling."""
     def invoke(self, data: pd.DataFrame, target_col: str, columns: List[str], **kwargs) -> Dict[str, Any]:
         try:
             X = data[columns]
             y = data[target_col]
+            # Check for missing values in X
+            if X.isnull().values.any():
+                logger.info("Missing values detected in feature variables. Applying imputation.")
+                imputer = SimpleImputer(strategy='mean')  # You can choose 'median', 'most_frequent', etc.
+                X_imputed = imputer.fit_transform(X)
+                X = pd.DataFrame(X_imputed, columns=columns)
+                logger.info("Imputation completed for feature variables.")
+            else:
+                logger.info("No missing values detected in feature variables.")
+            # Check for missing values in y
+            if y.isnull().values.any():
+                logger.info("Missing values detected in target variable. Applying imputation.")
+                # For classification, it's common to impute with the mode
+                y_imputer = SimpleImputer(strategy='most_frequent')
+                y_imputed = y_imputer.fit_transform(y.values.reshape(-1, 1))
+                y = pd.Series(y_imputer.ravel())
+                logger.info("Imputation completed for target variable.")
+            else:
+                logger.info("No missing values detected in target variable.")
+            # Split the data
             X_train, X_test, y_train, y_test = train_test_split(
                 X, y, test_size=0.2, random_state=42
             )
+            logger.info("Data split into training and testing sets.")
+            # Initialize and train the model
             model = LogisticRegression(max_iter=1000)
             model.fit(X_train, y_train)
+            logger.info("Logistic Regression model training completed.")
+            # Make predictions and evaluate
             y_pred = model.predict(X_test)
             accuracy = accuracy_score(y_test, y_pred)
+            logger.info(f"Model accuracy on test set: {accuracy:.2%}")
             return {
                 "model_type": "Logistic Regression",
                 "accuracy": accuracy,
             logger.error(f"Logistic Regression Model Error: {str(e)}")
             return {"error": f"Logistic Regression Model Error: {str(e)}"}
 # ---------------------- Business Logic Layer ---------------------------
 class ClinicalRule(BaseModel):