import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from streamlit.components.v1 import html
import nbformat
from nbconvert import HTMLExporter
from wordcloud import WordCloud
# Load the CSV data
file_path = 'category upwork jobs (1).csv'
jobs_df = pd.read_csv(file_path)
# Adjust column names as per the CSV
category_column = 'category' # Replace with the actual column name for category
job_title_column = 'title' # Replace with the actual column name for job title
description_column = 'description'
key_column = 'key'
date_column = 'Date'
# Sidebar menu
option ="Go to", ["Home", "Plots", "Notebook","Download Datasets"])
# Home Page: Display data with category filter
if option == "Home":
st.title("Jobs Dashboard")
# Filter Jobs by Category
st.sidebar.header("Filter Jobs by Category")
categories = jobs_df[category_column].unique() # Extract unique categories
selected_category = st.sidebar.selectbox("Choose a category:", categories)
# Filter jobs based on the selected category
filtered_jobs = jobs_df[jobs_df[category_column] == selected_category]
# Display filtered jobs with additional columns
st.write(f"Showing jobs in category: **{selected_category}**")
# Optional: Show a count of jobs in the selected category
st.write(f"Total jobs in this category: {len(filtered_jobs)}")
# Plots Page: Display category distribution plot
elif option == "Plots":
st.title("Job Visualization")
# 1. Job Category Distribution Bar Plot
st.subheader("Job Category Distribution")
category_counts = jobs_df[category_column].value_counts()
# Create a color palette for the bars
colors = # You can choose any colormap you prefer
# Create the bar plot
fig1, ax1 = plt.subplots(figsize=(10, 6)) # Set figure size
bars =, category_counts.values, color=colors[:len(category_counts)])
ax1.set_xlabel("Job Category")
ax1.set_ylabel("Number of Jobs")
ax1.set_title("Distribution of Jobs Across Categories")
plt.xticks(rotation=45, ha="right")
# Create a legend with job categories and their counts
legend_labels = [f"{category}: {int(count)}" for category, count in zip(category_counts.index, category_counts.values)]
ax1.legend(bars, legend_labels, title="Job Categories", loc="upper left", bbox_to_anchor=(1, 1)) # Position the legend
# No text labels on the bars and no labels above the bars
# Adjust layout to give space for the legend
plt.subplots_adjust(right=0.75) # Adjust right margin for legend space
# 2. Pie Chart for Category Distribution
st.subheader("Job Category Proportions")
fig2, ax2 = plt.subplots(figsize=(10, 10)) # Adjust the size as needed
wedges, texts, autotexts = ax2.pie(
startangle=140, # Optional: Change colors for better aesthetics
ax2.axis('equal') # Equal aspect ratio ensures the pie chart is circular.
# Customize the text labels
for text in texts:
text.set_fontsize(10) # Adjust font size for labels
for autotext in autotexts:
autotext.set_color('white') # Change the color of the percentage text
autotext.set_fontsize(10) # Adjust font size for percentage
# Add a legend to the right of the pie chart
ax2.legend(wedges, category_counts.index, title="Job Categories", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
# 4. Top Job Titles Bar Plot
st.subheader("Top Job Titles")
top_job_titles = jobs_df[job_title_column].value_counts().head(10)
# Create the bar plot
fig4, ax4 = plt.subplots(figsize=(10, 6)) # Adjust figure size for better readability
bars =, top_job_titles.values, color='lightcoral')
ax4.set_xlabel("Job Title")
ax4.set_title("Top 10 Job Titles")
plt.xticks(rotation=45, ha="right")
# Add labels on the right side of the bars
for bar in bars:
yval = bar.get_height()
ax4.text(bar.get_x() + bar.get_width() / 2, yval, int(yval),ha='center', va='bottom', color='black') # Centered above the bar
# Adjust layout to give space for labels
plt.subplots_adjust(right=0.85) # Adjust right margin for space
# 5. Word Cloud for Job Descriptions
st.subheader("Word Cloud for Job Descriptions")
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(jobs_df[description_column].dropna()))
fig5, ax5 = plt.subplots(figsize=(10, 5))
ax5.imshow(wordcloud, interpolation='bilinear')
ax5.axis('off') # Turn off the axis
elif option == "Notebook":
st.title("Jupyter Notebook")
# Load and convert the notebook to HTML
notebook_path = 'upwork_dashboard.ipynb' # Update with the actual path to your notebook
with open(notebook_path) as f:
notebook_content =, as_version=4)
# Create a new markdown cell with the link to the Google Colab notebook
colab_link = "[Open in Google Colab]("
new_cell = nbformat.v4.new_markdown_cell(colab_link)
# Insert the new cell at the top of the notebook
notebook_content.cells.insert(0, new_cell)
# Export the notebook to HTML
html_exporter = HTMLExporter()
html_exporter.exclude_input = False # Include code cells in the notebook display
notebook_html, _ = html_exporter.from_notebook_node(notebook_content)
# Display the notebook HTML in Streamlit
html(notebook_html, height=800, scrolling=True)
# Notebook Page: Render the Jupyter Notebook
elif option == "Download Datasets":
st.title("Download Datasets")
d=pd.read_csv("category upwork jobs (1).csv")
# Download links for the datasets
st.markdown("Click the links below to download the datasets:")
# Link for category upwork jobs dataset
with open("category upwork jobs.csv", 'rb') as f:
label="Download Category Upwork Jobs Dataset",
# Link for the original dataset
with open("jobs.csv", 'rb') as f:
label="Download previous Dataset",
file_name='previous Dataset data.csv',