Spaces:

orYx-models
/

scoring-engine

Sleeping

App Files Files Community

scoring-engine / app.py

Vineedhar

Update app.py

96e20c2 verified 5 months ago

raw

history blame contribute delete

3.61 kB

	import streamlit as st
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import GaussianNB
	from sklearn.metrics import accuracy_score
	from sklearn.preprocessing import LabelEncoder


	# LICENSE.streamlit.Apachev2 - Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2024) (https://github.com/streamlit/streamlit/blob/develop/LICENSE)

	# LICENSE.pandas.BSD-3 - Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team (https://github.com/pandas-dev/pandas/blob/main/LICENSE)

	# LICENSE.sklearn.BSD-3 - Copyright (c) 2007-2024 The scikit-learn developers (https://github.com/scikit-learn/scikit-learn/blob/main/COPYING)


	# Title of the app
	st.title("Scoring Engine")

	# File upload section
	uploaded_file = st.file_uploader("Upload your dataset (CSV format)", type="csv")

	if uploaded_file is not None:
	# Load the dataset
	df = pd.read_csv(uploaded_file, index_col=0)

	# Dynamically calculate the mean ignoring NaN values
	df['Average_score'] = df[['Boss_score', 'Colleague_score', 'Colleague_other_score',
	'Report_score', 'Customer_score']].mean(axis=1, skipna=True)

	# Round the calculated average score to 2 decimal places
	df['Average_score'] = df['Average_score'].round(1)

	# Function to calculate self-score
	def self_score(average, benchmark):
	if average > benchmark:
	return "High"
	elif average < benchmark:
	return "Low"
	else:
	return "Equal"

	# Apply the function to calculate 'Self_score'
	df['Self_score'] = df.apply(lambda row: self_score(row['Average_score'], row['Benchmark_score']), axis=1)

	# Encode object-type columns
	encoded_df = df.copy()
	le = LabelEncoder()
	for column in encoded_df.select_dtypes(include=['object']).columns:
	encoded_df[column] = le.fit_transform(encoded_df[column].astype(str))

	# Fill missing values with 0
	encoded_df = encoded_df.fillna(0)

	# Prepare features (X) and labels (y)
	X = encoded_df.drop(columns=['Self_score'])
	y = encoded_df['Self_score']

	# Split data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

	# Train the Gaussian Naive Bayes model
	gnb = GaussianNB()
	gnb.fit(X_train, y_train)

	# Make predictions and calculate confidence scores
	y_pred = gnb.predict(X_test)
	confidence_scores = gnb.predict_proba(X_test).max(axis=1)

	# Evaluate the model
	accuracy = accuracy_score(y_test, y_pred)


	# Predict confidence scores for the entire dataset
	y_prob = gnb.predict_proba(X)
	confidence_scores = y_prob.max(axis=1)
	df['Confidence_score (%)'] = confidence_scores
	df['Confidence_score (%)'] = df['Confidence_score (%)'] * 100
	df =df.drop('All_raters_Score', axis = 1)
	df = df[[ 'Title', 'Code', 'Dimensions', 'Boss_score',
	'Colleague_score', 'Colleague_other_score', 'Report_score',
	'Customer_score', 'Benchmark_score','Average_score',
	'Self_score', 'Confidence_score (%)']]
	st.write("### Processed Dataset")
	st.write(df)
	st.write(f"### Model Accuracy: {accuracy:.2f}")
	# Download button for the processed dataset
	csv = df.to_csv(index=False).encode('utf-8')
	st.download_button(
	label="Download Processed Dataset",
	data=csv,
	file_name="processed_dataset.csv",
	mime="text/csv"
	)
	else:
	st.write("Please upload a dataset to begin.")