Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from streamlit.components.v1 import html | |
import nbformat | |
from nbconvert import HTMLExporter | |
from wordcloud import WordCloud | |
# Load the CSV data | |
file_path = 'category upwork jobs.csv' | |
jobs_df = pd.read_csv(file_path) | |
# Adjust column names as per the CSV | |
category_column = 'category' # Replace with the actual column name for category | |
job_title_column = 'title' # Replace with the actual column name for job title | |
description_column = 'description' | |
key_column = 'key' | |
date_column = 'Date' | |
# Sidebar menu | |
st.sidebar.title("Navigation") | |
option = st.sidebar.radio("Go to", ["Home", "Plots", "Notebook","Download Datasets"]) | |
# Home Page: Display data with category filter | |
if option == "Home": | |
st.title("Jobs Dashboard") | |
# Filter Jobs by Category | |
st.sidebar.header("Filter Jobs by Category") | |
categories = jobs_df[category_column].unique() # Extract unique categories | |
selected_category = st.sidebar.selectbox("Choose a category:", categories) | |
# Filter jobs based on the selected category | |
filtered_jobs = jobs_df[jobs_df[category_column] == selected_category] | |
# Display filtered jobs with additional columns | |
st.write(f"Showing jobs in category: **{selected_category}**") | |
st.dataframe(filtered_jobs[['title','key','description','date']]) | |
# Optional: Show a count of jobs in the selected category | |
st.write(f"Total jobs in this category: {len(filtered_jobs)}") | |
# Plots Page: Display category distribution plot | |
elif option == "Plots": | |
st.title("Job Visualization") | |
# 1. Job Category Distribution Bar Plot | |
st.subheader("Job Category Distribution") | |
category_counts = jobs_df[category_column].value_counts() | |
fig1, ax1 = plt.subplots() | |
ax1.bar(category_counts.index, category_counts.values) | |
ax1.set_xlabel("Job Category") | |
ax1.set_ylabel("Number of Jobs") | |
ax1.set_title("Distribution of Jobs Across Categories") | |
plt.xticks(rotation=45, ha="right") | |
st.pyplot(fig1) | |
# 2. Pie Chart for Category Distribution | |
st.subheader("Job Category Proportions") | |
fig2, ax2 = plt.subplots(figsize=(10, 25)) | |
ax2.pie(category_counts, labels=category_counts.index, autopct='%1.1f%%', startangle=140) | |
ax2.axis('equal') # Equal aspect ratio ensures the pie chart is circular. | |
st.pyplot(fig2) | |
# 4. Top Job Titles Bar Plot | |
st.subheader("Top Job Titles") | |
top_job_titles = jobs_df[job_title_column].value_counts().head(10) | |
fig4, ax4 = plt.subplots() | |
ax4.bar(top_job_titles.index, top_job_titles.values) | |
ax4.set_xlabel("Job Title") | |
ax4.set_ylabel("Count") | |
ax4.set_title("Top 10 Job Titles") | |
plt.xticks(rotation=45, ha="right") | |
st.pyplot(fig4) | |
# 5. Word Cloud for Job Descriptions | |
st.subheader("Word Cloud for Job Descriptions") | |
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(jobs_df[description_column].dropna())) | |
fig5, ax5 = plt.subplots(figsize=(10, 5)) | |
ax5.imshow(wordcloud, interpolation='bilinear') | |
ax5.axis('off') # Turn off the axis | |
st.pyplot(fig5) | |
elif option == "Notebook": | |
st.title("Jupyter Notebook") | |
# Load and convert the notebook to HTML | |
notebook_path = 'upwork_dashboard.ipynb' # Update with the actual path to your notebook | |
with open(notebook_path) as f: | |
notebook_content = nbformat.read(f, as_version=4) | |
# Create a new markdown cell with the link to the Google Colab notebook | |
colab_link = "[Open in Google Colab](https://colab.research.google.com/drive/1qoTldQ-Kr6DgePRNYgdlQqqHq5JQax0h?usp=sharing)" | |
new_cell = nbformat.v4.new_markdown_cell(colab_link) | |
# Insert the new cell at the top of the notebook | |
notebook_content.cells.insert(0, new_cell) | |
# Export the notebook to HTML | |
html_exporter = HTMLExporter() | |
html_exporter.exclude_input = False # Include code cells in the notebook display | |
notebook_html, _ = html_exporter.from_notebook_node(notebook_content) | |
# Display the notebook HTML in Streamlit | |
html(notebook_html, height=800, scrolling=True) | |
# Notebook Page: Render the Jupyter Notebook | |
elif option == "Download Datasets": | |
st.title("Download Datasets") | |
d=pd.read_csv("category upwork jobs.csv") | |
d1=pd.read_csv("jobs.csv") | |
# Download links for the datasets | |
st.markdown("Click the links below to download the datasets:") | |
# Link for category upwork jobs dataset | |
with open("category upwork jobs.csv", 'rb') as f: | |
st.download_button( | |
label="Download Category Upwork Jobs Dataset", | |
data=f, | |
file_name='category_upwork_jobs.csv', | |
mime='text/csv' | |
) | |
st.dataframe(d) | |
# Link for the original dataset | |
with open("jobs.csv", 'rb') as f: | |
st.download_button( | |
label="Download Original Dataset", | |
data=f, | |
file_name='origina scraped data.csv', | |
mime='text/csv' | |
) | |
st.dataframe(d1) | |