Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,12 +15,18 @@ scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
|
|
15 |
|
16 |
def encode_categorical_columns(df):
|
17 |
label_encoder = LabelEncoder()
|
|
|
|
|
18 |
ordinal_columns = df.select_dtypes(include=['object']).columns
|
19 |
-
|
|
|
20 |
for col in ordinal_columns:
|
21 |
df[col] = label_encoder.fit_transform(df[col])
|
22 |
|
|
|
23 |
nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
|
|
|
|
|
24 |
df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
|
25 |
|
26 |
return df
|
@@ -44,7 +50,10 @@ def predict_performance(Location, Course, College, Faculty, Source, Event, Prese
|
|
44 |
|
45 |
input_data = [Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
|
46 |
|
47 |
-
feature_names = [
|
|
|
|
|
|
|
48 |
|
49 |
input_df = pd.DataFrame([input_data], columns=feature_names)
|
50 |
|
@@ -52,12 +61,14 @@ def predict_performance(Location, Course, College, Faculty, Source, Event, Prese
|
|
52 |
print("\nDataFrame before encoding:")
|
53 |
print(input_df)
|
54 |
|
|
|
55 |
df = encode_categorical_columns(input_df)
|
56 |
|
57 |
# Debug print 3: Show DataFrame after encoding
|
58 |
print("\nDataFrame after encoding:")
|
59 |
print(df)
|
60 |
|
|
|
61 |
scaled_input = scaler.transform(df)
|
62 |
|
63 |
# Make the prediction
|
|
|
15 |
|
16 |
def encode_categorical_columns(df):
|
17 |
label_encoder = LabelEncoder()
|
18 |
+
|
19 |
+
# Identify categorical columns
|
20 |
ordinal_columns = df.select_dtypes(include=['object']).columns
|
21 |
+
|
22 |
+
# Encode ordinal columns using LabelEncoder
|
23 |
for col in ordinal_columns:
|
24 |
df[col] = label_encoder.fit_transform(df[col])
|
25 |
|
26 |
+
# Get nominal columns for one-hot encoding
|
27 |
nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
|
28 |
+
|
29 |
+
# Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
|
30 |
df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
|
31 |
|
32 |
return df
|
|
|
50 |
|
51 |
input_data = [Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
|
52 |
|
53 |
+
feature_names = [
|
54 |
+
"Location", "Course", "College", "Faculty", "Source", "Event", "Presenter",
|
55 |
+
"Visited_Parent", "Visited_College_for_Inquiry", "Attended_Any_Event", "College_Fee", "GPA", "Year"
|
56 |
+
]
|
57 |
|
58 |
input_df = pd.DataFrame([input_data], columns=feature_names)
|
59 |
|
|
|
61 |
print("\nDataFrame before encoding:")
|
62 |
print(input_df)
|
63 |
|
64 |
+
# Encode categorical columns
|
65 |
df = encode_categorical_columns(input_df)
|
66 |
|
67 |
# Debug print 3: Show DataFrame after encoding
|
68 |
print("\nDataFrame after encoding:")
|
69 |
print(df)
|
70 |
|
71 |
+
# Scale input data using the loaded scaler
|
72 |
scaled_input = scaler.transform(df)
|
73 |
|
74 |
# Make the prediction
|