Spaces:

montebello-642
/

Logistic-Resression

Sleeping

App Files Files Community

Logistic-Resression / app.py

montebello-642

Initial commit

6b561c6 verified over 1 year ago

raw

history blame contribute delete

5.02 kB

	import pandas as pd
	from sklearn.model_selection import train_test_split, cross_val_score
	from sklearn.preprocessing import StandardScaler
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
	import seaborn as sns
	import matplotlib.pyplot as plt
	import gradio as gr

	#loading the dataset and select only the columns needed
	selected_columns = ['duration_mo', 'mos_ethnicity', 'complainant_ethnicity', 'is_force', 'is_abuse_of_authority', 'is_discourtesy', 'is_offensive_language', 'outcome_description']
	df = pd.read_csv('my_dataset_logistic.csv', usecols=selected_columns)

	print(df.columns)
	print(df.head())
	print(df.describe())
	print(df.isnull().sum())

	#set the name of the column to calculate accuracy
	X = df.drop('outcome_description', axis=1)
	y = df['outcome_description']
	X.fillna(0, inplace=True)

	#split into training and test set
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	#standardize the features
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	#train the model
	model = LogisticRegression(random_state=42)
	model.fit(X_train_scaled, y_train)

	#make predictions and evaluate the model
	y_pred = model.predict(X_test_scaled)
	accuracy = accuracy_score(y_test, y_pred)
	print(f'Accuracy: {accuracy:.2f}')

	#classification report with confusion matrix, correlation graph and standard deviation of all the variables
	print(classification_report(y_test, y_pred))

	# Confusion Matrix
	conf_matrix = confusion_matrix(y_test, y_pred)
	plt.figure(figsize=(8, 6))
	sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,xticklabels=df['outcome_description'].unique(), yticklabels=df['outcome_description'].unique())
	plt.title("Confusion Matrix")
	plt.xlabel("Predicted")
	plt.ylabel("Actual")
	plt.show()

	#Correlation Matrix
	correlation_matrix = df.corr()
	plt.figure(figsize=(10, 8))
	sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
	plt.title('Correlation Matrix')
	plt.show()

	#plotting a bar chart to visualize better the correlation
	target_correlations = correlation_matrix['outcome_description'].sort_values(ascending=False)
	plt.figure(figsize=(10, 6))
	target_correlations.drop('outcome_description').plot(kind='bar', color='blue')
	plt.title('Correlations with Target Variable')
	plt.xlabel('Features')
	plt.ylabel('Correlation')
	plt.show()

	#Standard Deviation
	std_dev = df.std()
	print('\nStandard deviation')
	print(std_dev)

	#gradio implementation
	#create the available options for the ethnicities
	mos_ethnicity_options = ["Hispanic", "White", "Black", "Asian", "American Indian", "Other Race", "Refused", "Unknown"]
	complainant_ethnicity_options = ["Hispanic", "White", "Black", "Asian", "American Indian", "Other Race", "Refused", "Unknown"]

	#defining the function to make predictions using the model
	def predict_outcome_duration(mos_ethnicity, complainant_ethnicity, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language, duration_mo):
	try:
	#converting values from string to int
	mos_ethnicity_encoded = mos_ethnicity_options.index(mos_ethnicity)
	complainant_ethnicity_encoded = complainant_ethnicity_options.index(complainant_ethnicity)

	#converting checkbox value to int
	is_force = int(is_force)
	is_abuse_of_authority = int(is_abuse_of_authority)
	is_discourtesy = int(is_discourtesy)
	is_offensive_language = int(is_offensive_language)

	input_data = [[duration_mo, mos_ethnicity_encoded, complainant_ethnicity_encoded, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language]]
	input_scaled = scaler.transform(input_data)
	prediction = model.predict(input_scaled)[0]

	#outputting the result
	return "Arrest" if prediction == 1 else "No Arrest"

	except Exception as e:
	return f"Error: {str(e)}"

	#creating the gradio interface, using dropdowns to show the different ethnicities, checkbox to identify which type of allegation it was and a slider with the duration in months
	mos_ethnicity_dropdown = gr.Dropdown(choices=mos_ethnicity_options,label="Defendant Ethnicity")
	complainant_ethnicity_dropdown = gr.Dropdown(choices=complainant_ethnicity_options, label="Complainant Ethnicity")
	is_force_checkbox = gr.Checkbox()
	is_abuse_of_authority_checkbox = gr.Checkbox()
	is_discourtesy_checkbox = gr.Checkbox()
	is_offensive_language_checkbox = gr.Checkbox()
	duration_mo_slider = gr.Slider(minimum=0, maximum=20, label="Duration in months")

	iface = gr.Interface(
	fn=predict_outcome_duration,
	inputs=[complainant_ethnicity_dropdown, mos_ethnicity_dropdown, is_force_checkbox, is_abuse_of_authority_checkbox, is_discourtesy_checkbox, is_offensive_language_checkbox, duration_mo_slider],
	outputs="text",
	live=True,
	title="Complaint Outcome Prediction"
	)

	# Launch the Gradio Interface
	iface.launch(share=True)