Spaces:

DevUP-VietDevelopers
/

salary-estimator

Runtime error

App Files Files Community

vumichien commited on Dec 5, 2023

Commit

def27a0

1 Parent(s): 89f4618

up

Browse files

Files changed (17) hide show

.idea/.gitignore +8 -0
.idea/DataScienceSalary.iml +8 -0
.idea/inspectionProfiles/Project_Default.xml +95 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
DataScienceSalaries.csv +0 -0
README.md +5 -6
__pycache__/eda.cpython-39.pyc +0 -0
__pycache__/prediction.cpython-39.pyc +0 -0
app.py +10 -0
eda.py +130 -0
gaji.jpg +0 -0
model.pkl +3 -0
prediction.py +58 -0
requirements.txt +9 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/

.idea/DataScienceSalary.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.9 (DataScienceSalary)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,95 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <Languages>
+        <language minSize="79" name="Python" />
+      </Languages>
+    </inspection_tool>
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="62">
+            <item index="0" class="java.lang.String" itemvalue="beautifulsoup4" />
+            <item index="1" class="java.lang.String" itemvalue="bs4" />
+            <item index="2" class="java.lang.String" itemvalue="mecab-python3" />
+            <item index="3" class="java.lang.String" itemvalue="pydantic" />
+            <item index="4" class="java.lang.String" itemvalue="joblib" />
+            <item index="5" class="java.lang.String" itemvalue="scikit-learn" />
+            <item index="6" class="java.lang.String" itemvalue="torch" />
+            <item index="7" class="java.lang.String" itemvalue="numpy" />
+            <item index="8" class="java.lang.String" itemvalue="requests" />
+            <item index="9" class="java.lang.String" itemvalue="unidic-lite" />
+            <item index="10" class="java.lang.String" itemvalue="mysql-connector-python" />
+            <item index="11" class="java.lang.String" itemvalue="sqlalchemy" />
+            <item index="12" class="java.lang.String" itemvalue="pycaret" />
+            <item index="13" class="java.lang.String" itemvalue="tensorflow-text" />
+            <item index="14" class="java.lang.String" itemvalue="keybert" />
+            <item index="15" class="java.lang.String" itemvalue="pandas" />
+            <item index="16" class="java.lang.String" itemvalue="tqdm" />
+            <item index="17" class="java.lang.String" itemvalue="fastapi" />
+            <item index="18" class="java.lang.String" itemvalue="spacy" />
+            <item index="19" class="java.lang.String" itemvalue="regex" />
+            <item index="20" class="java.lang.String" itemvalue="tensorflow-hub" />
+            <item index="21" class="java.lang.String" itemvalue="uvicorn" />
+            <item index="22" class="java.lang.String" itemvalue="xgboost" />
+            <item index="23" class="java.lang.String" itemvalue="onnxruntime" />
+            <item index="24" class="java.lang.String" itemvalue="av" />
+            <item index="25" class="java.lang.String" itemvalue="onnx" />
+            <item index="26" class="java.lang.String" itemvalue="opencv-python-headless" />
+            <item index="27" class="java.lang.String" itemvalue="streamlit_webrtc" />
+            <item index="28" class="java.lang.String" itemvalue="torchvision" />
+            <item index="29" class="java.lang.String" itemvalue="cdifflib" />
+            <item index="30" class="java.lang.String" itemvalue="protobuf" />
+            <item index="31" class="java.lang.String" itemvalue="python-stdnum" />
+            <item index="32" class="java.lang.String" itemvalue="transformers" />
+            <item index="33" class="java.lang.String" itemvalue="nltk" />
+            <item index="34" class="java.lang.String" itemvalue="sentence-transformers" />
+            <item index="35" class="java.lang.String" itemvalue="datasets" />
+            <item index="36" class="java.lang.String" itemvalue="fsspec" />
+            <item index="37" class="java.lang.String" itemvalue="neuralcoref" />
+            <item index="38" class="java.lang.String" itemvalue="dateparse" />
+            <item index="39" class="java.lang.String" itemvalue="faker" />
+            <item index="40" class="java.lang.String" itemvalue="sentencepiece" />
+            <item index="41" class="java.lang.String" itemvalue="langid" />
+            <item index="42" class="java.lang.String" itemvalue="boto3" />
+            <item index="43" class="java.lang.String" itemvalue="redis" />
+            <item index="44" class="java.lang.String" itemvalue="celery" />
+            <item index="45" class="java.lang.String" itemvalue="gensim" />
+            <item index="46" class="java.lang.String" itemvalue="PyMuPDF" />
+            <item index="47" class="java.lang.String" itemvalue="opencv-python" />
+            <item index="48" class="java.lang.String" itemvalue="h5py" />
+            <item index="49" class="java.lang.String" itemvalue="Shapely" />
+            <item index="50" class="java.lang.String" itemvalue="basemap" />
+            <item index="51" class="java.lang.String" itemvalue="reportlab" />
+            <item index="52" class="java.lang.String" itemvalue="snowflake-connector-python" />
+            <item index="53" class="java.lang.String" itemvalue="importlib-resources" />
+            <item index="54" class="java.lang.String" itemvalue="pdfplumber" />
+            <item index="55" class="java.lang.String" itemvalue="pymc3" />
+            <item index="56" class="java.lang.String" itemvalue="textract" />
+            <item index="57" class="java.lang.String" itemvalue="statsmodels" />
+            <item index="58" class="java.lang.String" itemvalue="aiohttp" />
+            <item index="59" class="java.lang.String" itemvalue="thinc" />
+            <item index="60" class="java.lang.String" itemvalue="torchaudio" />
+            <item index="61" class="java.lang.String" itemvalue="Pillow" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N801" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyTypedDictInspection" enabled="false" level="WARNING" enabled_by_default="false" />
+    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredIdentifiers">
+        <list>
+          <option value="wbc.assets.templates.template_script.process.pattern_name" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (DataScienceSalary)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/DataScienceSalary.iml" filepath="$PROJECT_DIR$/.idea/DataScienceSalary.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

DataScienceSalaries.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md CHANGED Viewed

@@ -1,13 +1,12 @@
 ---
-title: Salary Estimator
-emoji: 🚀
-colorFrom: indigo
-colorTo: gray
 sdk: streamlit
-sdk_version: 1.29.0
 app_file: app.py
 pinned: false
-license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: DataScienceSalary
+emoji: 🌍
+colorFrom: red
+colorTo: green
 sdk: streamlit
+sdk_version: 1.21.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

__pycache__/eda.cpython-39.pyc ADDED Viewed

Binary file (4.7 kB). View file

__pycache__/prediction.cpython-39.pyc ADDED Viewed

Binary file (1.96 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Choose Page : ', ('Description','Salary Estimator'))
+if navigation == 'Description':
+    eda.run()
+else:
+    prediction.run()

eda.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+from wordcloud import WordCloud
+from wordcloud import ImageColorGenerator
+from wordcloud import STOPWORDS
+st.set_page_config(
+    page_title='Công cụ ước tính lương nghề khoa học dữ liệu'
+)
+def run():
+    # Membuat Title
+    st.title('Công cụ ước tính lương nghề khoa học dữ liệu')
+    # Sub header
+    st.subheader('Mô tả cho công cụ ước tính lương')
+    # Insert Gambar
+    image = Image.open('gaji.jpg')
+    st.image(image, caption='Lương', use_column_width=True)
+    # description
+    st.write('Mục tiêu của công cụ ước tính lương')
+    st.write(
+        'Với tư cách là một nhà khoa học dữ liệu, tôi muốn biết liệu tôi có nhận được mức lương xứng đáng từ công ty hay không, vì vậy tôi đã tạo ra mô hình học máy này để dự đoán mức lương cho các công việc trong thế giới dữ liệu.')
+    st.write('Tôi hy vọng với công cụ ước tính lương này có thể giúp các bạn biết liệu mình có nhận được mức lương xứng đáng hay không.')
+    st.markdown('---')
+    # Menambahkan Deskripsi
+    st.write('Trang này được tạo để hiển thị trực quan của tập dữ liệu')
+    st.markdown('---')
+    st.write('Mô tả')
+    st.write('Cấp độ kinh nghiệm')
+    st.write(
+        'EN, là cấp độ đầu vào. MI, là cấp độ trung bình. SE, cấp độ cao cấp. EX, cấp điều hành.')
+    st.write('Hình thức hợp đồng')
+    st.write('FT, tức là Toàn thời gian. PT, tức là Bán thời gian. CT, đó là Hợp đồng. FL, đó là nghề tự do.')
+    st.write('Tỷ lệ làm từ xa')
+    st.write('100, hoàn toàn từ xa. 50, là loại kết hợp. 0, là hoàn toàn làm trên công ty.')
+    st.markdown('---')
+    # show dataframe
+    data = pd.read_csv('DataScienceSalaries.csv')
+    st.dataframe(data)
+    # membuat histogram salary
+    st.write('### Biểu đồ lương')
+    fig = plt.figure(figsize=(10, 5), dpi=150)
+    sns.histplot(data['salary_in_usd'], kde=True, bins=40)
+    plt.xlabel('Lương bằng usd', fontsize=15)
+    plt.ylabel('Số lượng', fontsize=15)
+    plt.title('Biểu đồ lương bằng usd', fontsize=15)
+    st.pyplot(fig)
+    # membuat pie chart experience
+    st.write('### Biểu đồ phân phối cấp độ kinh nghiệm')
+    exp = data.experience_level.value_counts()
+    def make_autopct(values):
+        def my_autopct(pct):
+            total = sum(values)
+            val = int(round(pct * total / 100.0))
+            return '{p:.2f}%  ({v:d})'.format(p=pct, v=val)
+        return my_autopct
+    fig = plt.figure(figsize=(5, 5), dpi=150)
+    exp.plot.pie(autopct=make_autopct(exp))
+    plt.title('Biểu đồ phân phối cấp độ kinh nghiệm', fontsize=15)
+    st.pyplot(fig)
+    # barchart posisi dengan gaji terbesar
+    st.write('### 5 Vị trí có mức lương cao nhất')
+    work_rate = data.groupby(['job_title'])['salary_in_usd'].mean()
+    work = work_rate.nlargest(5)
+    fig = plt.figure(figsize=(15, 5), dpi=150)
+    work.plot(kind="bar")
+    plt.title('5 Vị trí có mức lương cao nhất', fontsize=15)
+    plt.xlabel('Tên vị trí', fontsize=15)
+    plt.xticks(rotation=45)
+    plt.ylabel('Lương bằng usd', fontsize=15)
+    st.pyplot(fig)
+    # negara dengan gaji tertinggi
+    st.write('### 5 Quốc gia có mức lương cao nhất')
+    location_payrate = data.groupby(['company_location'])['salary_in_usd'].sum()
+    lar = location_payrate.nlargest(5)
+    fig = plt.figure(figsize=(15, 8), dpi=150)
+    lar.plot(kind="bar")
+    plt.title('5 Quốc gia có mức lương cao nhất', fontsize=15)
+    plt.xlabel('Tên quốc gia', fontsize=15)
+    plt.xticks(rotation=0)
+    plt.ylabel('Lương bằng usd', fontsize=15)
+    st.pyplot(fig)
+    # popular job
+    st.write('### Top 10 công việc')
+    job = data.groupby(['job_title'])['job_title'].count()
+    top_job = job.nlargest(10)
+    fig = plt.figure(figsize=(12, 12), dpi=150)
+    plt.xticks(rotation=0)
+    plt.title("Top 10 công việc", fontsize=15)
+    plt.ylabel('Tên công việc', fontsize=15)
+    plt.xlabel('Số lượng', fontsize=15)
+    sns.barplot(y=top_job.index, x=top_job.values)
+    st.pyplot(fig)
+    # wordcloud
+    # see most job with word cloud
+    text = " ".join(i for i in data.job_title)
+    stopwords = set(STOPWORDS)
+    wordcloud = WordCloud(width=1600, height=800).generate(text)
+    fig = plt.figure(figsize=(15, 10), facecolor='k')
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    st.pyplot(fig)
+if __name__ == '__main__':
+    run()

gaji.jpg ADDED Viewed

model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c2e808cb0369e96e7d65665dde67791a3e360b4c4b1af3187a4b6f8c9485e19
+size 166331

prediction.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+import json
+import joblib as jb
+# load models
+model = jb.load('model.pkl')
+# load data
+df = pd.read_csv('DataScienceSalaries.csv')
+def run():
+    st.markdown("<h1 style='text-align: center;'>Salary Estimator</h1>", unsafe_allow_html=True)
+    # description
+    st.subheader('Vui lòng kiểm tra mức lương của bạn tại đây.')
+    with st.form('key=form_prediction'):
+        year = st.selectbox('Năm làm việc', df['work_year'].unique())
+        experience = st.selectbox('Cấp độ kinh nghiệm', df['experience_level'].unique())
+        employment = st.selectbox('Hình thức hợp đồng', df['employment_type'].unique())
+        job = st.selectbox('Vị trí công việc', sorted(df['job_title'].unique()))
+        residence = st.selectbox('Quốc tịch', sorted(df['employee_residence'].unique()))
+        remote = st.selectbox('Làm việc từ xa', df['remote_ratio'].unique())
+        location = st.selectbox('Vị trí công ty', sorted(df['company_location'].unique()))
+        size = st.selectbox('Quy mô công ty', df['company_size'].unique())
+        submitted = st.form_submit_button('Dự đoán')
+    inf = {
+        'work_year': year,
+        'experience_level': experience,
+        'employment_type': employment,
+        'job_title': job,
+        'employee_residence': residence,
+        'remote_ratio': remote,
+        'company_location': location,
+        'company_size': size
+    }
+    data_inf = pd.DataFrame([inf])
+    st.dataframe(data_inf)
+    if submitted:
+        # Predict using bagging
+        y_pred_inf = model.predict(data_inf)
+        st.write('Với kinh nghiệm này bạn sẽ nhận được mức lương khoảng')
+        st.write('# $', str(int(y_pred_inf)))
+        st.write(
+            'LƯU Ý: Hãy nhớ rằng mô hình này không chính xác 100%, vui lòng kiểm tra lại với một trang web khác về tiền lương như Glassdoor')
+if __name__ == '__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+pandas
+seaborn
+matplotlib
+plotly
+Pillow
+numpy
+WordCloud
+scikit-learn==1.2.2