Vineedhar commited on
Commit
5f28baf
·
verified ·
1 Parent(s): 1a83022

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.naive_bayes import GaussianNB
5
+ from sklearn.metrics import accuracy_score
6
+ from sklearn.preprocessing import LabelEncoder
7
+
8
+ # Title of the app
9
+ st.title("Scoring Engine")
10
+
11
+ # File upload section
12
+ uploaded_file = st.file_uploader("Upload your dataset (CSV format)", type="csv")
13
+
14
+ if uploaded_file is not None:
15
+ # Load the dataset
16
+ df = pd.read_csv(uploaded_file)
17
+
18
+ st.write("### Uploaded Dataset")
19
+ st.write(df)
20
+
21
+ # Dynamically calculate the mean ignoring NaN values
22
+ df['Average_score'] = df[['Boss_score', 'Colleague_score', 'Colleague_other_score',
23
+ 'Report_score', 'Customer_score', 'All_raters_Score']].mean(axis=1, skipna=True)
24
+
25
+ # Round the calculated average score to 2 decimal places
26
+ df['Average_score'] = df['Average_score'].round(1)
27
+
28
+ # Function to calculate self-score
29
+ def self_score(average, benchmark):
30
+ if average > benchmark:
31
+ return "High"
32
+ elif average < benchmark:
33
+ return "Low"
34
+ else:
35
+ return "Equal"
36
+
37
+ # Apply the function to calculate 'Self_score'
38
+ df['Self_score'] = df.apply(lambda row: self_score(row['Average_score'], row['Benchmark_score']), axis=1)
39
+
40
+ # Encode object-type columns
41
+ encoded_df = df.copy()
42
+ le = LabelEncoder()
43
+ for column in encoded_df.select_dtypes(include=['object']).columns:
44
+ encoded_df[column] = le.fit_transform(encoded_df[column].astype(str))
45
+
46
+ # Fill missing values with 0
47
+ encoded_df = encoded_df.fillna(0)
48
+
49
+ # Prepare features (X) and labels (y)
50
+ X = encoded_df.drop(columns=['Self_score'])
51
+ y = encoded_df['Self_score']
52
+
53
+ # Split data into training and testing sets
54
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
55
+
56
+ # Train the Gaussian Naive Bayes model
57
+ gnb = GaussianNB()
58
+ gnb.fit(X_train, y_train)
59
+
60
+ # Make predictions and calculate confidence scores
61
+ y_pred = gnb.predict(X_test)
62
+ confidence_scores = gnb.predict_proba(X_test).max(axis=1)
63
+
64
+ # Evaluate the model
65
+ accuracy = accuracy_score(y_test, y_pred)
66
+ st.write(f"### Model Accuracy: {accuracy:.2f}")
67
+
68
+ # Predict confidence scores for the entire dataset
69
+ y_prob = gnb.predict_proba(X)
70
+ confidence_scores = y_prob.max(axis=1)
71
+ df['Confidence_score'] = confidence_scores
72
+
73
+ st.write("### Processed Dataset")
74
+ st.write(df)
75
+
76
+ # Download button for the processed dataset
77
+ csv = df.to_csv(index=False).encode('utf-8')
78
+ st.download_button(
79
+ label="Download Processed Dataset",
80
+ data=csv,
81
+ file_name="processed_dataset.csv",
82
+ mime="text/csv"
83
+ )
84
+ else:
85
+ st.write("Please upload a dataset to begin.")