Spaces:

Hemg
/

chances_of_student_admission

Sleeping

App Files Files Community

Hemg commited on Nov 13, 2024

Commit

815574c

verified ·

1 Parent(s): 6ed3494

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -49

app.py CHANGED Viewed

@@ -6,9 +6,9 @@ from huggingface_hub import hf_hub_download
 from sklearn.preprocessing import LabelEncoder
 # Load the trained model and scaler objects from file
-REPO_ID = "Hemg/modelxxx"  # hugging face repo ID
-MODEL_FILENAME = "predjob.joblib"  # model file name
-SCALER_FILENAME = "scalejob.joblib"  # scaler file name
 model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
 scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
@@ -16,68 +16,72 @@ scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
 def encode_categorical_columns(df):
     label_encoder = LabelEncoder()
-    # Identify categorical columns
-    ordinal_columns = df.select_dtypes(include=['object']).columns
-    # Encode ordinal columns using LabelEncoder
-    for col in ordinal_columns:
-        df[col] = label_encoder.fit_transform(df[col])
-    # Get nominal columns for one-hot encoding
-    nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
-    # Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
-    df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
-    return df
 def predict_performance(Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
-    input_data = [Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
-    # Updated feature names to use spaces instead of underscores to match training data
-    feature_names = [
-        "Location", "Course", "College", "Faculty", "Source", "Event", "Presenter",
-        "Visited Parent", "Visited College for Inquiry", "Attended Any Event", "College Fee", "GPA", "Year"
-    ]
-    input_df = pd.DataFrame([input_data], columns=feature_names)
-    # Debug print: Show DataFrame before encoding
-    print("\nDataFrame before encoding:")
-    print(input_df)
-    # Encode categorical columns
-    df = encode_categorical_columns(input_df)
-    # Debug print: Show DataFrame after encoding
-    print("\nDataFrame after encoding:")
-    print(df)
-    # Extract features to scale
-    features_to_scale = df[["College Fee", "Year"]]
-    # Scale only the College Fee and Year
-    scaled_features = scaler.transform(features_to_scale)
-    # Replace the original features with the scaled features
-    df[["College Fee", "Year"]] = scaled_features
-    # Debug print: Show DataFrame after scaling
-    print("\nDataFrame after scaling:")
-    print(df)
-    # Prepare the input for the model
-    model_input = df.values  # Convert DataFrame to numpy array for model input
-    # Make the prediction
-    prediction = model.predict(model_input)[0]
-    # Clip the prediction to be between 0 and 1
     prediction = np.clip(prediction, 0, 1)
-    # Debug print: Show prediction details
-    print("\nPrediction details:")
-    print(f"Raw prediction: {prediction}")
     return f"Chance of Admission: {prediction:.1f}"

 from sklearn.preprocessing import LabelEncoder
 # Load the trained model and scaler objects from file
+REPO_ID = "Hemg/modelxxx"
+MODEL_FILENAME = "predjob.joblib"
+SCALER_FILENAME = "scalejob.joblib"
 model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
 scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
 def encode_categorical_columns(df):
     label_encoder = LabelEncoder()
+    # Create a copy of the DataFrame to avoid modifying the original
+    df_encoded = df.copy()
+    # Convert binary yes/no to 1/0
+    binary_columns = ['Visited Parent', 'Visited College for Inquiry', 'Attended Any Event']
+    for col in binary_columns:
+        df_encoded[col] = df_encoded[col].map({'Yes': 1, 'No': 0})
+    # Encode other categorical columns
+    categorical_columns = ['Location', 'Course', 'Faculty', 'Source', 'Event', 'Presenter']
+    for col in categorical_columns:
+        df_encoded[col] = label_encoder.fit_transform(df_encoded[col])
+    return df_encoded
 def predict_performance(Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
+    # Create initial DataFrame
+    input_data = {
+        'Location': [Location],
+        'Course': [Course],
+        'College': [College],
+        'Faculty': [Faculty],
+        'Source': [Source],
+        'Event': [Event],
+        'Presenter': [Presenter],
+        'Visited Parent': [Visited_Parent],
+        'Visited College for Inquiry': [Visited_College_for_Inquiry],
+        'Attended Any Event': [Attended_Any_Event],
+        'College Fee': [float(College_Fee)],  # Convert to float
+        'GPA': [float(GPA)],  # Convert to float
+        'Year': [float(Year)]  # Convert to float
+    }
+    input_df = pd.DataFrame(input_data)
+    print("\nInput DataFrame:")
+    print(input_df)
+    # Encode categorical variables
+    encoded_df = encode_categorical_columns(input_df)
+    print("\nEncoded DataFrame:")
+    print(encoded_df)
+    # Normalize numerical features (College Fee and Year)
+    numerical_features = ['College Fee', 'Year']
+    encoded_df[numerical_features] = encoded_df[numerical_features].astype(float)
+    # Create min-max scaling manually for College Fee
+    college_fee_min, college_fee_max = 1000000, 1700000
+    encoded_df['College Fee'] = (encoded_df['College Fee'] - college_fee_min) / (college_fee_max - college_fee_min)
+    # Create min-max scaling manually for Year
+    year_min, year_max = 2019, 2025
+    encoded_df['Year'] = (encoded_df['Year'] - year_min) / (year_max - year_min)
+    print("\nScaled DataFrame:")
+    print(encoded_df)
+    # Make prediction
+    prediction = model.predict(encoded_df)[0]
+    # Clip prediction between 0 and 1
     prediction = np.clip(prediction, 0, 1)
+    print("\nPrediction:", prediction)
     return f"Chance of Admission: {prediction:.1f}"