File size: 3,863 Bytes
e107c83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae0c8dc
 
 
 
 
 
e107c83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import streamlit as st 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px 
from PIL import Image
from wordcloud import WordCloud
from wordcloud import ImageColorGenerator
from wordcloud import STOPWORDS

st.set_page_config(
    page_title = 'Data Science Salary Estimator'
)

def run():

    # Membuat Title 
    st.title('Data Science Salary Estimator')

    #Sub header 
    st.subheader('Description for Data Science Salary Estimator') 

    # Insert Gambar 
    image = Image.open('gaji.jpg')
    st.image(image, caption ='SALARY')

    #description 
    st.write('The goals of this salary estimator')
    st.write('as a data scientist i want to know if im getting the decent salary from the company, so i created this machine learning model to predict salary for jobs in data world.')
    st.write('I hope with this salary estimator can help fellas data to see if they also get a decent salary or not')
    st.markdown('---')

    # Menambahkan Deskripsi 
    st.write('This page is created to show the visualization of the dataset')

    st.markdown('---')

    st.write('Description')
    st.write('Experience Level')
    st.write('EN, which is Entry-level. MI, which is Mid-level. SE, which is Senior-level. EX, which is Executive-level.')

    st.write('Employment Type')
    st.write('FT, which is Full Time. PT, which is Part Time. CT, which is Contract. FL, which is Freelance.')

    st.write('Remote Ratio')
    st.write('100, which is Full remote. 50, which is hybrid. 0, which is on site.')

    st.markdown('---')



    #show dataframe 
    data = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/DataScienceSalaries.csv')
    st.dataframe(data)


    #membuat histogram salary 
    st.write('### Histogram Salary')
    fig = plt.figure(figsize=(10,5))
    sns.histplot(data['salary_in_usd'], kde=True, bins=40)
    plt.title('Histogram of salary in usd')
    st.pyplot(fig)

    #membuat pie chart experience 
    st.write('### Experince Distribution')
    exp = data.experience_level.value_counts()
    def make_autopct(values):
        def my_autopct(pct):
            total = sum(values)
            val = int(round(pct*total/100.0))
            return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
        return my_autopct
    fig = plt.figure(figsize=(5,5))
    exp.plot.pie(autopct=make_autopct(exp))
    plt.title('Experince Level Distribution')
    st.pyplot(fig)

    
    #barchart posisi dengan gaji terbesar
    st.write('### 5 Role with highest paycheck')
    work_rate = data.groupby(['job_title'])['salary_in_usd'].mean()
    work = work_rate.nlargest(5)
    fig = plt.figure(figsize=(15,5))
    work.plot(kind = "bar")
    plt.title('5 Role with Highest Paycheck')
    st.pyplot(fig)


    # negara dengan gaji tertinggi 
    st.write('### Country with highest paycheck')
    location_payrate = data.groupby(['company_location'])['salary_in_usd'].sum()
    lar = location_payrate.nlargest(5)
    fig = plt.figure(figsize=(15,5))
    lar.plot(kind = "bar")
    plt.title('5 Countries Highest Paycheck')
    st.pyplot(fig)


    # popular job
    st.write('### TOP 10 JOBS')
    job = data.groupby(['job_title'])['job_title'].count()
    top_job = job.nlargest(10)
    fig = plt.figure(figsize=(12,6))
    plt.xticks(rotation=0)
    plt.title("Top 10 Jobs")
    plt.ylabel('Job Titles')
    plt.xlabel('Counts')
    sns.barplot(y=top_job.index, x= top_job.values)
    st.pyplot(fig)
    

    #wordcloud 
    # see most job with word cloud
    text = " ".join(i for i in data.job_title)
    stopwords = set(STOPWORDS)
    wordcloud = WordCloud(width=1600, height=800).generate(text)
    fig = plt.figure( figsize=(15,10), facecolor='k')
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    st.pyplot(fig)



if __name__== '__main__':
    run()