Hemg commited on
Commit
815574c
·
verified ·
1 Parent(s): 6ed3494

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -49
app.py CHANGED
@@ -6,9 +6,9 @@ from huggingface_hub import hf_hub_download
6
  from sklearn.preprocessing import LabelEncoder
7
 
8
  # Load the trained model and scaler objects from file
9
- REPO_ID = "Hemg/modelxxx" # hugging face repo ID
10
- MODEL_FILENAME = "predjob.joblib" # model file name
11
- SCALER_FILENAME = "scalejob.joblib" # scaler file name
12
 
13
  model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
14
  scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
@@ -16,68 +16,72 @@ scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
16
  def encode_categorical_columns(df):
17
  label_encoder = LabelEncoder()
18
 
19
- # Identify categorical columns
20
- ordinal_columns = df.select_dtypes(include=['object']).columns
21
 
22
- # Encode ordinal columns using LabelEncoder
23
- for col in ordinal_columns:
24
- df[col] = label_encoder.fit_transform(df[col])
25
-
26
- # Get nominal columns for one-hot encoding
27
- nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
28
 
29
- # Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
30
- df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
31
-
32
- return df
 
 
33
 
34
  def predict_performance(Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
35
- input_data = [Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
36
-
37
- # Updated feature names to use spaces instead of underscores to match training data
38
- feature_names = [
39
- "Location", "Course", "College", "Faculty", "Source", "Event", "Presenter",
40
- "Visited Parent", "Visited College for Inquiry", "Attended Any Event", "College Fee", "GPA", "Year"
41
- ]
 
 
 
 
 
 
 
 
 
42
 
43
- input_df = pd.DataFrame([input_data], columns=feature_names)
44
-
45
- # Debug print: Show DataFrame before encoding
46
- print("\nDataFrame before encoding:")
47
- print(input_df)
48
 
49
- # Encode categorical columns
50
- df = encode_categorical_columns(input_df)
51
 
52
- # Debug print: Show DataFrame after encoding
53
- print("\nDataFrame after encoding:")
54
- print(df)
55
 
56
- # Extract features to scale
57
- features_to_scale = df[["College Fee", "Year"]]
58
 
59
- # Scale only the College Fee and Year
60
- scaled_features = scaler.transform(features_to_scale)
 
61
 
62
- # Replace the original features with the scaled features
63
- df[["College Fee", "Year"]] = scaled_features
 
64
 
65
- # Debug print: Show DataFrame after scaling
66
- print("\nDataFrame after scaling:")
67
- print(df)
68
 
69
- # Prepare the input for the model
70
- model_input = df.values # Convert DataFrame to numpy array for model input
71
 
72
- # Make the prediction
73
- prediction = model.predict(model_input)[0]
74
 
75
- # Clip the prediction to be between 0 and 1
76
  prediction = np.clip(prediction, 0, 1)
77
 
78
- # Debug print: Show prediction details
79
- print("\nPrediction details:")
80
- print(f"Raw prediction: {prediction}")
81
 
82
  return f"Chance of Admission: {prediction:.1f}"
83
 
 
6
  from sklearn.preprocessing import LabelEncoder
7
 
8
  # Load the trained model and scaler objects from file
9
+ REPO_ID = "Hemg/modelxxx"
10
+ MODEL_FILENAME = "predjob.joblib"
11
+ SCALER_FILENAME = "scalejob.joblib"
12
 
13
  model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
14
  scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
 
16
  def encode_categorical_columns(df):
17
  label_encoder = LabelEncoder()
18
 
19
+ # Create a copy of the DataFrame to avoid modifying the original
20
+ df_encoded = df.copy()
21
 
22
+ # Convert binary yes/no to 1/0
23
+ binary_columns = ['Visited Parent', 'Visited College for Inquiry', 'Attended Any Event']
24
+ for col in binary_columns:
25
+ df_encoded[col] = df_encoded[col].map({'Yes': 1, 'No': 0})
 
 
26
 
27
+ # Encode other categorical columns
28
+ categorical_columns = ['Location', 'Course', 'Faculty', 'Source', 'Event', 'Presenter']
29
+ for col in categorical_columns:
30
+ df_encoded[col] = label_encoder.fit_transform(df_encoded[col])
31
+
32
+ return df_encoded
33
 
34
  def predict_performance(Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
35
+ # Create initial DataFrame
36
+ input_data = {
37
+ 'Location': [Location],
38
+ 'Course': [Course],
39
+ 'College': [College],
40
+ 'Faculty': [Faculty],
41
+ 'Source': [Source],
42
+ 'Event': [Event],
43
+ 'Presenter': [Presenter],
44
+ 'Visited Parent': [Visited_Parent],
45
+ 'Visited College for Inquiry': [Visited_College_for_Inquiry],
46
+ 'Attended Any Event': [Attended_Any_Event],
47
+ 'College Fee': [float(College_Fee)], # Convert to float
48
+ 'GPA': [float(GPA)], # Convert to float
49
+ 'Year': [float(Year)] # Convert to float
50
+ }
51
 
52
+ input_df = pd.DataFrame(input_data)
 
 
 
 
53
 
54
+ print("\nInput DataFrame:")
55
+ print(input_df)
56
 
57
+ # Encode categorical variables
58
+ encoded_df = encode_categorical_columns(input_df)
 
59
 
60
+ print("\nEncoded DataFrame:")
61
+ print(encoded_df)
62
 
63
+ # Normalize numerical features (College Fee and Year)
64
+ numerical_features = ['College Fee', 'Year']
65
+ encoded_df[numerical_features] = encoded_df[numerical_features].astype(float)
66
 
67
+ # Create min-max scaling manually for College Fee
68
+ college_fee_min, college_fee_max = 1000000, 1700000
69
+ encoded_df['College Fee'] = (encoded_df['College Fee'] - college_fee_min) / (college_fee_max - college_fee_min)
70
 
71
+ # Create min-max scaling manually for Year
72
+ year_min, year_max = 2019, 2025
73
+ encoded_df['Year'] = (encoded_df['Year'] - year_min) / (year_max - year_min)
74
 
75
+ print("\nScaled DataFrame:")
76
+ print(encoded_df)
77
 
78
+ # Make prediction
79
+ prediction = model.predict(encoded_df)[0]
80
 
81
+ # Clip prediction between 0 and 1
82
  prediction = np.clip(prediction, 0, 1)
83
 
84
+ print("\nPrediction:", prediction)
 
 
85
 
86
  return f"Chance of Admission: {prediction:.1f}"
87