Spaces:

FerdiErs
/

DataScienceSalary

Runtime error

App Files Files Community

DataScienceSalary / eda.py

FerdiErs

Update eda.py

ae0c8dc almost 2 years ago

raw

history blame contribute delete

3.86 kB

	import streamlit as st
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import plotly.express as px
	from PIL import Image
	from wordcloud import WordCloud
	from wordcloud import ImageColorGenerator
	from wordcloud import STOPWORDS

	st.set_page_config(
	page_title = 'Data Science Salary Estimator'
	)

	def run():

	# Membuat Title
	st.title('Data Science Salary Estimator')

	#Sub header
	st.subheader('Description for Data Science Salary Estimator')

	# Insert Gambar
	image = Image.open('gaji.jpg')
	st.image(image, caption ='SALARY')

	#description
	st.write('The goals of this salary estimator')
	st.write('as a data scientist i want to know if im getting the decent salary from the company, so i created this machine learning model to predict salary for jobs in data world.')
	st.write('I hope with this salary estimator can help fellas data to see if they also get a decent salary or not')
	st.markdown('---')

	# Menambahkan Deskripsi
	st.write('This page is created to show the visualization of the dataset')

	st.markdown('---')

	st.write('Description')
	st.write('Experience Level')
	st.write('EN, which is Entry-level. MI, which is Mid-level. SE, which is Senior-level. EX, which is Executive-level.')

	st.write('Employment Type')
	st.write('FT, which is Full Time. PT, which is Part Time. CT, which is Contract. FL, which is Freelance.')

	st.write('Remote Ratio')
	st.write('100, which is Full remote. 50, which is hybrid. 0, which is on site.')

	st.markdown('---')



	#show dataframe
	data = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
	st.dataframe(data)


	#membuat histogram salary
	st.write('### Histogram Salary')
	fig = plt.figure(figsize=(10,5))
	sns.histplot(data['salary_in_usd'], kde=True, bins=40)
	plt.title('Histogram of salary in usd')
	st.pyplot(fig)

	#membuat pie chart experience
	st.write('### Experince Distribution')
	exp = data.experience_level.value_counts()
	def make_autopct(values):
	def my_autopct(pct):
	total = sum(values)
	val = int(round(pct*total/100.0))
	return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
	return my_autopct
	fig = plt.figure(figsize=(5,5))
	exp.plot.pie(autopct=make_autopct(exp))
	plt.title('Experince Level Distribution')
	st.pyplot(fig)


	#barchart posisi dengan gaji terbesar
	st.write('### 5 Role with highest paycheck')
	work_rate = data.groupby(['job_title'])['salary_in_usd'].mean()
	work = work_rate.nlargest(5)
	fig = plt.figure(figsize=(15,5))
	work.plot(kind = "bar")
	plt.title('5 Role with Highest Paycheck')
	st.pyplot(fig)


	# negara dengan gaji tertinggi
	st.write('### Country with highest paycheck')
	location_payrate = data.groupby(['company_location'])['salary_in_usd'].sum()
	lar = location_payrate.nlargest(5)
	fig = plt.figure(figsize=(15,5))
	lar.plot(kind = "bar")
	plt.title('5 Countries Highest Paycheck')
	st.pyplot(fig)


	# popular job
	st.write('### TOP 10 JOBS')
	job = data.groupby(['job_title'])['job_title'].count()
	top_job = job.nlargest(10)
	fig = plt.figure(figsize=(12,6))
	plt.xticks(rotation=0)
	plt.title("Top 10 Jobs")
	plt.ylabel('Job Titles')
	plt.xlabel('Counts')
	sns.barplot(y=top_job.index, x= top_job.values)
	st.pyplot(fig)


	#wordcloud
	# see most job with word cloud
	text = " ".join(i for i in data.job_title)
	stopwords = set(STOPWORDS)
	wordcloud = WordCloud(width=1600, height=800).generate(text)
	fig = plt.figure( figsize=(15,10), facecolor='k')
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis("off")
	st.pyplot(fig)



	if __name__== '__main__':
	run()