Spaces:

charulp2499
/

JobScrapper

Running

App Files Files

charulp2499 commited on Jan 20

Commit

52c07e0

verified ·

1 Parent(s): 8ea77e6

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -76

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
-import gradio as gr
 import logging
 from linkedin_jobs_scraper import LinkedinScraper
-from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
 from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
-from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
 import pandas as pd
 # Configure logging
@@ -19,38 +18,30 @@ def on_data(data: EventData):
         'Title': data.title,
         'Company': data.company,
         'Location': data.location,
-        # 'Company Link': data.company_link,
         'Job Link': data.link,
-        # 'Insights': data.insights,
         'Description Length': len(data.description),
     })
 def on_end():
     print("[ON_END] Scraping completed.")
-# LinkedIn Scraper function
-def scrape_jobs(query, locations, time_filter):
     global job_data
     try:
         job_data = []
-        if time_filter == "From Past Month":
-            time_filter = TimeFilters.MONTH
-        elif time_filter == "From Last 24 Hours":
-            time_filter = TimeFilters.DAY
-        else:
-            time_filter = TimeFilters.MONTH
         scraper = LinkedinScraper(
             chrome_executable_path=None,
             chrome_binary_location=None,
             chrome_options=None,
             headless=True,
-            max_workers=10,
             slow_mo=0.8,
             page_load_timeout=60,
         )
         scraper.on(Events.DATA, on_data)
         scraper.on(Events.END, on_end)
@@ -60,13 +51,11 @@ def scrape_jobs(query, locations, time_filter):
                 options=QueryOptions(
                     locations=locations.split(','),
                     apply_link=True,
-                    skip_promoted_jobs=False,
                     page_offset=0,
                     limit=100,
                     filters=QueryFilters(
-                        # relevance=RelevanceFilters.RECENT,
-                        time=time_filter,
-                        # on_site_or_remote=OnSiteOrRemoteFilters.REMOTE,
                     ),
                 ),
             ),
@@ -75,77 +64,36 @@ def scrape_jobs(query, locations, time_filter):
         scraper.run(queries)
         # Convert to DataFrame and return
-        # Save the job data to a CSV file after scraping ends
-        # if job_data:
-        #     # Save the job data to a CSV file
-        #     file_name = "jobs_data.csv"
-        #     df = pd.DataFrame(job_data)
-        #     df.to_csv(file_name, index=False)
-        #     message = f"Jobs data saved to {file_name}"
-        #     return file_name, message  # Return the CSV file path and success message
-        # else:
-        #     message = "No job data found for the given query and locations."
-        #     return None, message
         if job_data:
-            df = pd.DataFrame(job_data)
-            message = f"Jobs ({len(job_data)}) data successfully scraped."
-            return df, message  # Return DataFrame and message
         else:
-            return pd.DataFrame(), '-'
     except Exception as e:
         # Handle errors gracefully
-        message = f"An error occurred during scraping: {e}"
         return None, message
-# Define Gradio interface
-# def gradio_interface(query, locations):
-#     csv_data, message = scrape_jobs(query, locations)
-#     if csv_data:
-#         return csv_data, message
-#     else:
-#         return None, "No results to display."
-def gradio_interface(query, locations, time_filter):
-    df, message = scrape_jobs(query, locations, time_filter)
-    return df, message
-# # Gradio app layout
-# iface = gr.Interface(
-#     fn=gradio_interface,
-#     inputs=[
-#         gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
-#         gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, India"),
-#     ],
-#     outputs=[
-#         gr.File(label="Download CSV"),
-#         gr.Textbox(label="Message"),
-#     ],
-#     title="LinkedIn Job Scraper",
-#     description="Enter the job query and locations to scrape LinkedIn job postings. Outputs a downloadable CSV file.",
-# )
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
         gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
-        gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, Canada, Germany, India"),
-        gr.Dropdown(
-            label="Time Filter",
-            choices=["From Past Month", "From Last 24 Hours"],  # The options the user can select
-            value="From Past Month",  # Default option
-            type="value",
-            ),
     ],
     outputs=[
-        gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
         gr.Textbox(label="Message"),
     ],
-    title="Job Scraper",
-    description="Enter a job query and locations to scrape job postings and display the results in a table.",
 )
 # Launch app
 if __name__ == "__main__":
-    iface.launch()

 import logging
 from linkedin_jobs_scraper import LinkedinScraper
+from linkedin_jobs_scraper.events import Events, EventData
 from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
+from linkedin_jobs_scraper.filters import TimeFilters
 import pandas as pd
 # Configure logging
         'Title': data.title,
         'Company': data.company,
         'Location': data.location,
         'Job Link': data.link,
         'Description Length': len(data.description),
     })
 def on_end():
     print("[ON_END] Scraping completed.")
+# LinkedIn Scraper function with error handling
+def scrape_jobs(query, locations):
     global job_data
     try:
         job_data = []
         scraper = LinkedinScraper(
             chrome_executable_path=None,
             chrome_binary_location=None,
             chrome_options=None,
             headless=True,
+            max_workers=5,
             slow_mo=0.8,
             page_load_timeout=60,
         )
+        # Catching the exception for missing chrome and notify the user
         scraper.on(Events.DATA, on_data)
         scraper.on(Events.END, on_end)
                 options=QueryOptions(
                     locations=locations.split(','),
                     apply_link=True,
+                    skip_promoted_jobs=True,
                     page_offset=0,
                     limit=100,
                     filters=QueryFilters(
+                        time=TimeFilters.DAY,  # Specify desired time filter
                     ),
                 ),
             ),
         scraper.run(queries)
         # Convert to DataFrame and return
         if job_data:
+            df = pd.DataFrame(job_data)
+            return df, "Scraping successful"
         else:
+            return pd.DataFrame(), "No jobs found"
     except Exception as e:
         # Handle errors gracefully
+        message = f"Error occurred: {str(e)}"
         return None, message
+# Gradio interface
+def gradio_interface(query, locations):
+    df, message = scrape_jobs(query, locations)
+    return df, message
+# Gradio app layout
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
         gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
+        gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States", value="United States"),
     ],
     outputs=[
+        gr.Dataframe(label="Job Results", headers=["Date", "Title", "Company", "Location", "Job Link"], interactive=True),
         gr.Textbox(label="Message"),
     ],
+    title="LinkedIn Job Scraper",
+    description="Scrape LinkedIn for jobs based on query and locations.",
 )
 # Launch app
 if __name__ == "__main__":
+    iface.launch()