mgbam commited on
Commit
211e3a6
·
verified ·
1 Parent(s): c5a8062

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -1
app.py CHANGED
@@ -238,19 +238,52 @@ class HypothesisTester(DataAnalyzer):
238
  else:
239
  return "No significant evidence against H0"
240
 
 
 
241
  class LogisticRegressionTrainer(DataAnalyzer):
242
- """Logistic Regression Model Trainer."""
243
  def invoke(self, data: pd.DataFrame, target_col: str, columns: List[str], **kwargs) -> Dict[str, Any]:
244
  try:
245
  X = data[columns]
246
  y = data[target_col]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  X_train, X_test, y_train, y_test = train_test_split(
248
  X, y, test_size=0.2, random_state=42
249
  )
 
 
 
250
  model = LogisticRegression(max_iter=1000)
251
  model.fit(X_train, y_train)
 
 
 
252
  y_pred = model.predict(X_test)
253
  accuracy = accuracy_score(y_test, y_pred)
 
 
254
  return {
255
  "model_type": "Logistic Regression",
256
  "accuracy": accuracy,
@@ -260,6 +293,7 @@ class LogisticRegressionTrainer(DataAnalyzer):
260
  logger.error(f"Logistic Regression Model Error: {str(e)}")
261
  return {"error": f"Logistic Regression Model Error: {str(e)}"}
262
 
 
263
  # ---------------------- Business Logic Layer ---------------------------
264
 
265
  class ClinicalRule(BaseModel):
 
238
  else:
239
  return "No significant evidence against H0"
240
 
241
+ from sklearn.impute import SimpleImputer
242
+
243
  class LogisticRegressionTrainer(DataAnalyzer):
244
+ """Logistic Regression Model Trainer with Missing Value Handling."""
245
  def invoke(self, data: pd.DataFrame, target_col: str, columns: List[str], **kwargs) -> Dict[str, Any]:
246
  try:
247
  X = data[columns]
248
  y = data[target_col]
249
+
250
+ # Check for missing values in X
251
+ if X.isnull().values.any():
252
+ logger.info("Missing values detected in feature variables. Applying imputation.")
253
+ imputer = SimpleImputer(strategy='mean') # You can choose 'median', 'most_frequent', etc.
254
+ X_imputed = imputer.fit_transform(X)
255
+ X = pd.DataFrame(X_imputed, columns=columns)
256
+ logger.info("Imputation completed for feature variables.")
257
+ else:
258
+ logger.info("No missing values detected in feature variables.")
259
+
260
+ # Check for missing values in y
261
+ if y.isnull().values.any():
262
+ logger.info("Missing values detected in target variable. Applying imputation.")
263
+ # For classification, it's common to impute with the mode
264
+ y_imputer = SimpleImputer(strategy='most_frequent')
265
+ y_imputed = y_imputer.fit_transform(y.values.reshape(-1, 1))
266
+ y = pd.Series(y_imputer.ravel())
267
+ logger.info("Imputation completed for target variable.")
268
+ else:
269
+ logger.info("No missing values detected in target variable.")
270
+
271
+ # Split the data
272
  X_train, X_test, y_train, y_test = train_test_split(
273
  X, y, test_size=0.2, random_state=42
274
  )
275
+ logger.info("Data split into training and testing sets.")
276
+
277
+ # Initialize and train the model
278
  model = LogisticRegression(max_iter=1000)
279
  model.fit(X_train, y_train)
280
+ logger.info("Logistic Regression model training completed.")
281
+
282
+ # Make predictions and evaluate
283
  y_pred = model.predict(X_test)
284
  accuracy = accuracy_score(y_test, y_pred)
285
+ logger.info(f"Model accuracy on test set: {accuracy:.2%}")
286
+
287
  return {
288
  "model_type": "Logistic Regression",
289
  "accuracy": accuracy,
 
293
  logger.error(f"Logistic Regression Model Error: {str(e)}")
294
  return {"error": f"Logistic Regression Model Error: {str(e)}"}
295
 
296
+
297
  # ---------------------- Business Logic Layer ---------------------------
298
 
299
  class ClinicalRule(BaseModel):