Hemg commited on
Commit
092001b
·
verified ·
1 Parent(s): 5b8842a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -15,12 +15,18 @@ scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
15
 
16
  def encode_categorical_columns(df):
17
  label_encoder = LabelEncoder()
 
 
18
  ordinal_columns = df.select_dtypes(include=['object']).columns
19
-
 
20
  for col in ordinal_columns:
21
  df[col] = label_encoder.fit_transform(df[col])
22
 
 
23
  nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
 
 
24
  df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
25
 
26
  return df
@@ -44,7 +50,10 @@ def predict_performance(Location, Course, College, Faculty, Source, Event, Prese
44
 
45
  input_data = [Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
46
 
47
- feature_names = ["Location", "Course", "College", "Faculty", "Source", "Event", "Presenter", "Visited_Parent", "Visited_College_for_Inquiry", "Attended_Any_Event", "College_Fee", "GPA", "Year"]
 
 
 
48
 
49
  input_df = pd.DataFrame([input_data], columns=feature_names)
50
 
@@ -52,12 +61,14 @@ def predict_performance(Location, Course, College, Faculty, Source, Event, Prese
52
  print("\nDataFrame before encoding:")
53
  print(input_df)
54
 
 
55
  df = encode_categorical_columns(input_df)
56
 
57
  # Debug print 3: Show DataFrame after encoding
58
  print("\nDataFrame after encoding:")
59
  print(df)
60
 
 
61
  scaled_input = scaler.transform(df)
62
 
63
  # Make the prediction
 
15
 
16
  def encode_categorical_columns(df):
17
  label_encoder = LabelEncoder()
18
+
19
+ # Identify categorical columns
20
  ordinal_columns = df.select_dtypes(include=['object']).columns
21
+
22
+ # Encode ordinal columns using LabelEncoder
23
  for col in ordinal_columns:
24
  df[col] = label_encoder.fit_transform(df[col])
25
 
26
+ # Get nominal columns for one-hot encoding
27
  nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
28
+
29
+ # Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
30
  df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
31
 
32
  return df
 
50
 
51
  input_data = [Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
52
 
53
+ feature_names = [
54
+ "Location", "Course", "College", "Faculty", "Source", "Event", "Presenter",
55
+ "Visited_Parent", "Visited_College_for_Inquiry", "Attended_Any_Event", "College_Fee", "GPA", "Year"
56
+ ]
57
 
58
  input_df = pd.DataFrame([input_data], columns=feature_names)
59
 
 
61
  print("\nDataFrame before encoding:")
62
  print(input_df)
63
 
64
+ # Encode categorical columns
65
  df = encode_categorical_columns(input_df)
66
 
67
  # Debug print 3: Show DataFrame after encoding
68
  print("\nDataFrame after encoding:")
69
  print(df)
70
 
71
+ # Scale input data using the loaded scaler
72
  scaled_input = scaler.transform(df)
73
 
74
  # Make the prediction