Spaces:

Yassmen
/

Job.web.scrapping

Running

App Files Files Community

Job.web.scrapping / app.py

Yassmen

Update app.py

59858af verified 9 months ago

raw

history blame

4.77 kB


	import streamlit as st
	import requests
	import numpy as np
	from PIL import Image
	import warnings
	warnings.filterwarnings("ignore")
	import requests
	import pandas as pd
	import numpy as np
	from bs4 import BeautifulSoup
	import bs4
	from urllib.request import urlopen
	import time
	import re
	import time
	import matplotlib.pyplot as plt
	import seaborn as sns
	import matplotlib as mpl
	import plotly
	import plotly.express as px
	import plotly.graph_objs as go
	import plotly.offline as py
	from plotly.offline import iplot
	from plotly.subplots import make_subplots
	import plotly.figure_factory as ff
	from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from selenium.webdriver.chrome.service import Service

	import requests
	import platform
	import zipfile
	import os
	import subprocess

	import os
	import subprocess


	# Make sure the chromedriver is executable
	#os.chmod('chromedriver', 0o755)
	# Set up Chrome options
	#options = webdriver.ChromeOptions()
	#options.add_argument('--headless')
	#options.add_argument('--no-sandbox')
	#options.add_argument('--disable-dev-shm-usage')
	#from selenium import webdriver

	#options = webdriver.ChromeOptions()
	#options.add_argument('--headless')
	#driver = webdriver.Chrome(options=options)
	#print("'''''",driver.capabilities['browserVersion'],"''''")
	#driver.quit()
	# Initialize the ChromeDriver
	#service = Service('chromedriver')
	#driver = webdriver.Chrome(service=service, options=options)

	from wuzzuf_scraper import Wuzzuf_scrapping
	from linkedin_scraper import LINKEDIN_Scrapping
	from data_analysis import map_bubble,linkedin_exp,wuzzuf_exp

	####################### stream lit app ################################


	st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide")


	# ---- HEADER SECTION ----
	with st.container():
	left_column, right_column = st.columns(2)
	with left_column:
	st.subheader("Hi! I am Yassmen :wave:")
	st.title("An Electronics and Communcation Engineer")
	st.write(
	"In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:"
	)
	st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)")
	with right_column:
	st.image("im.gif", use_column_width=True)
	# st_lottie(lottie_coding, height=300, key="coding")



	import streamlit as st
	from streamlit_option_menu import option_menu

	#with st.sidebar:
	# selected = option_menu("Main Menu", ["select website", 'search job','numbers of jobs'], icons=['linkedin', 'search','123'], menu_icon="cast", default_index=1)

	webs =["Wuzzuf","Linkedin"]
	jobs =["Machine Learning","AI Engineer","Data Analysis","Software Testing"]
	nums = np.arange(1,1000)

	#with st.sidebar:
	#if selected == "select website":
	site = st.sidebar.selectbox("select one website", webs)
	#elif selected == "search job":
	job = st.sidebar.selectbox("select one job", jobs)
	#elif selected == "numbers of jobs":
	num_jobs = st.sidebar.selectbox("select num of jobs you want to scrap", nums)



	import streamlit.components.v1 as components

	n2 = pd.DataFrame()

	if st.sidebar.button('Start Scrapping'):
	if site =="Wuzzuf":

	with st.container():
	st.write("---")
	tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
	with tab1 :
	with st.spinner('✨Now loading...' ):
	time.sleep(5)
	n1 = Wuzzuf_scrapping(job ,num_jobs )
	try:
	tab1.dataframe(n1)
	except:
	try:
	tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success
	except:
	tab1.table(n1)
	with tab2:
	map_bubble(n1)
	with tab3:
	#tab3.plotly_chart(wuzzuf_exp(n1))
	wuzzuf_exp(n1)


	if site =="Linkedin":
	with st.container():
	st.write("---")
	'''
	if site =="Linkedin":
	with st.container():
	st.write("---")
	tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"])
	with tab1 :
	with st.spinner('✨Now loading...' ):
	time.sleep(5)
	n1 = LINKEDIN_Scrapping(job ,num_jobs )
	try:
	tab1.dataframe(n1)
	except:
	try:
	tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success
	except:
	tab1.table(n1)
	with tab2:
	map_bubble(n1)
	with tab3:
	linkedin_exp(n1) # WILL CHANGE'''