Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,9 +6,9 @@ from huggingface_hub import hf_hub_download
|
|
6 |
from sklearn.preprocessing import LabelEncoder
|
7 |
|
8 |
# Load the trained model and scaler objects from file
|
9 |
-
REPO_ID = "Hemg/modelxxx"
|
10 |
-
MODEL_FILENAME = "predjob.joblib"
|
11 |
-
SCALER_FILENAME = "scalejob.joblib"
|
12 |
|
13 |
model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
|
14 |
scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
|
@@ -16,68 +16,72 @@ scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
|
|
16 |
def encode_categorical_columns(df):
|
17 |
label_encoder = LabelEncoder()
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
|
22 |
-
#
|
23 |
-
for
|
24 |
-
|
25 |
-
|
26 |
-
# Get nominal columns for one-hot encoding
|
27 |
-
nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
|
28 |
|
29 |
-
#
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
33 |
|
34 |
def predict_performance(Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
input_df = pd.DataFrame(
|
44 |
-
|
45 |
-
# Debug print: Show DataFrame before encoding
|
46 |
-
print("\nDataFrame before encoding:")
|
47 |
-
print(input_df)
|
48 |
|
49 |
-
|
50 |
-
|
51 |
|
52 |
-
#
|
53 |
-
|
54 |
-
print(df)
|
55 |
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
#
|
60 |
-
|
|
|
61 |
|
62 |
-
#
|
63 |
-
|
|
|
64 |
|
65 |
-
#
|
66 |
-
|
67 |
-
|
68 |
|
69 |
-
|
70 |
-
|
71 |
|
72 |
-
# Make
|
73 |
-
prediction = model.predict(
|
74 |
|
75 |
-
# Clip
|
76 |
prediction = np.clip(prediction, 0, 1)
|
77 |
|
78 |
-
|
79 |
-
print("\nPrediction details:")
|
80 |
-
print(f"Raw prediction: {prediction}")
|
81 |
|
82 |
return f"Chance of Admission: {prediction:.1f}"
|
83 |
|
|
|
6 |
from sklearn.preprocessing import LabelEncoder
|
7 |
|
8 |
# Load the trained model and scaler objects from file
|
9 |
+
REPO_ID = "Hemg/modelxxx"
|
10 |
+
MODEL_FILENAME = "predjob.joblib"
|
11 |
+
SCALER_FILENAME = "scalejob.joblib"
|
12 |
|
13 |
model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
|
14 |
scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
|
|
|
16 |
def encode_categorical_columns(df):
|
17 |
label_encoder = LabelEncoder()
|
18 |
|
19 |
+
# Create a copy of the DataFrame to avoid modifying the original
|
20 |
+
df_encoded = df.copy()
|
21 |
|
22 |
+
# Convert binary yes/no to 1/0
|
23 |
+
binary_columns = ['Visited Parent', 'Visited College for Inquiry', 'Attended Any Event']
|
24 |
+
for col in binary_columns:
|
25 |
+
df_encoded[col] = df_encoded[col].map({'Yes': 1, 'No': 0})
|
|
|
|
|
26 |
|
27 |
+
# Encode other categorical columns
|
28 |
+
categorical_columns = ['Location', 'Course', 'Faculty', 'Source', 'Event', 'Presenter']
|
29 |
+
for col in categorical_columns:
|
30 |
+
df_encoded[col] = label_encoder.fit_transform(df_encoded[col])
|
31 |
+
|
32 |
+
return df_encoded
|
33 |
|
34 |
def predict_performance(Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
|
35 |
+
# Create initial DataFrame
|
36 |
+
input_data = {
|
37 |
+
'Location': [Location],
|
38 |
+
'Course': [Course],
|
39 |
+
'College': [College],
|
40 |
+
'Faculty': [Faculty],
|
41 |
+
'Source': [Source],
|
42 |
+
'Event': [Event],
|
43 |
+
'Presenter': [Presenter],
|
44 |
+
'Visited Parent': [Visited_Parent],
|
45 |
+
'Visited College for Inquiry': [Visited_College_for_Inquiry],
|
46 |
+
'Attended Any Event': [Attended_Any_Event],
|
47 |
+
'College Fee': [float(College_Fee)], # Convert to float
|
48 |
+
'GPA': [float(GPA)], # Convert to float
|
49 |
+
'Year': [float(Year)] # Convert to float
|
50 |
+
}
|
51 |
|
52 |
+
input_df = pd.DataFrame(input_data)
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
print("\nInput DataFrame:")
|
55 |
+
print(input_df)
|
56 |
|
57 |
+
# Encode categorical variables
|
58 |
+
encoded_df = encode_categorical_columns(input_df)
|
|
|
59 |
|
60 |
+
print("\nEncoded DataFrame:")
|
61 |
+
print(encoded_df)
|
62 |
|
63 |
+
# Normalize numerical features (College Fee and Year)
|
64 |
+
numerical_features = ['College Fee', 'Year']
|
65 |
+
encoded_df[numerical_features] = encoded_df[numerical_features].astype(float)
|
66 |
|
67 |
+
# Create min-max scaling manually for College Fee
|
68 |
+
college_fee_min, college_fee_max = 1000000, 1700000
|
69 |
+
encoded_df['College Fee'] = (encoded_df['College Fee'] - college_fee_min) / (college_fee_max - college_fee_min)
|
70 |
|
71 |
+
# Create min-max scaling manually for Year
|
72 |
+
year_min, year_max = 2019, 2025
|
73 |
+
encoded_df['Year'] = (encoded_df['Year'] - year_min) / (year_max - year_min)
|
74 |
|
75 |
+
print("\nScaled DataFrame:")
|
76 |
+
print(encoded_df)
|
77 |
|
78 |
+
# Make prediction
|
79 |
+
prediction = model.predict(encoded_df)[0]
|
80 |
|
81 |
+
# Clip prediction between 0 and 1
|
82 |
prediction = np.clip(prediction, 0, 1)
|
83 |
|
84 |
+
print("\nPrediction:", prediction)
|
|
|
|
|
85 |
|
86 |
return f"Chance of Admission: {prediction:.1f}"
|
87 |
|