FerdiErs's picture
Update eda.py
ae0c8dc
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
from wordcloud import WordCloud
from wordcloud import ImageColorGenerator
from wordcloud import STOPWORDS
st.set_page_config(
page_title = 'Data Science Salary Estimator'
)
def run():
# Membuat Title
st.title('Data Science Salary Estimator')
#Sub header
st.subheader('Description for Data Science Salary Estimator')
# Insert Gambar
image = Image.open('gaji.jpg')
st.image(image, caption ='SALARY')
#description
st.write('The goals of this salary estimator')
st.write('as a data scientist i want to know if im getting the decent salary from the company, so i created this machine learning model to predict salary for jobs in data world.')
st.write('I hope with this salary estimator can help fellas data to see if they also get a decent salary or not')
st.markdown('---')
# Menambahkan Deskripsi
st.write('This page is created to show the visualization of the dataset')
st.markdown('---')
st.write('Description')
st.write('Experience Level')
st.write('EN, which is Entry-level. MI, which is Mid-level. SE, which is Senior-level. EX, which is Executive-level.')
st.write('Employment Type')
st.write('FT, which is Full Time. PT, which is Part Time. CT, which is Contract. FL, which is Freelance.')
st.write('Remote Ratio')
st.write('100, which is Full remote. 50, which is hybrid. 0, which is on site.')
st.markdown('---')
#show dataframe
data = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
st.dataframe(data)
#membuat histogram salary
st.write('### Histogram Salary')
fig = plt.figure(figsize=(10,5))
sns.histplot(data['salary_in_usd'], kde=True, bins=40)
plt.title('Histogram of salary in usd')
st.pyplot(fig)
#membuat pie chart experience
st.write('### Experince Distribution')
exp = data.experience_level.value_counts()
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
return my_autopct
fig = plt.figure(figsize=(5,5))
exp.plot.pie(autopct=make_autopct(exp))
plt.title('Experince Level Distribution')
st.pyplot(fig)
#barchart posisi dengan gaji terbesar
st.write('### 5 Role with highest paycheck')
work_rate = data.groupby(['job_title'])['salary_in_usd'].mean()
work = work_rate.nlargest(5)
fig = plt.figure(figsize=(15,5))
work.plot(kind = "bar")
plt.title('5 Role with Highest Paycheck')
st.pyplot(fig)
# negara dengan gaji tertinggi
st.write('### Country with highest paycheck')
location_payrate = data.groupby(['company_location'])['salary_in_usd'].sum()
lar = location_payrate.nlargest(5)
fig = plt.figure(figsize=(15,5))
lar.plot(kind = "bar")
plt.title('5 Countries Highest Paycheck')
st.pyplot(fig)
# popular job
st.write('### TOP 10 JOBS')
job = data.groupby(['job_title'])['job_title'].count()
top_job = job.nlargest(10)
fig = plt.figure(figsize=(12,6))
plt.xticks(rotation=0)
plt.title("Top 10 Jobs")
plt.ylabel('Job Titles')
plt.xlabel('Counts')
sns.barplot(y=top_job.index, x= top_job.values)
st.pyplot(fig)
#wordcloud
# see most job with word cloud
text = " ".join(i for i in data.job_title)
stopwords = set(STOPWORDS)
wordcloud = WordCloud(width=1600, height=800).generate(text)
fig = plt.figure( figsize=(15,10), facecolor='k')
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
st.pyplot(fig)
if __name__== '__main__':
run()