Spaces:

charulp2499
/

JobScrapper

Running

App Files Files

charulp2499 commited on Jan 20

Commit

8308e31

verified ·

1 Parent(s): 69c661e

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -27

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import gradio as gr
 import logging
 from linkedin_jobs_scraper import LinkedinScraper
-from linkedin_jobs_scraper.events import Events, EventData
 from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
-from linkedin_jobs_scraper.filters import TimeFilters
 import pandas as pd
 # Configure logging
@@ -21,80 +21,93 @@ def on_data(data: EventData):
         'Location': data.location,
         'Job Link': data.link,
         'Description Length': len(data.description),
     })
 def on_end():
     print("[ON_END] Scraping completed.")
-# LinkedIn Scraper function with error handling
-def scrape_jobs(query, locations):
     global job_data
     try:
         job_data = []
         scraper = LinkedinScraper(
             chrome_executable_path=None,
             chrome_binary_location=None,
             chrome_options=None,
             headless=True,
-            max_workers=5,
             slow_mo=0.8,
             page_load_timeout=60,
         )
-        # Catching the exception for missing chrome and notify the user
         scraper.on(Events.DATA, on_data)
         scraper.on(Events.END, on_end)
         queries = [
             Query(
                 query=query,
                 options=QueryOptions(
                     locations=locations.split(','),
                     apply_link=True,
-                    skip_promoted_jobs=True,
                     page_offset=0,
                     limit=100,
                     filters=QueryFilters(
-                        time=TimeFilters.DAY,  # Specify desired time filter
                     ),
                 ),
             ),
         ]
         scraper.run(queries)
-        # Convert to DataFrame and return
         if job_data:
-            df = pd.DataFrame(job_data)
-            return df, "Scraping successful"
         else:
-            return pd.DataFrame(), "No jobs found"
     except Exception as e:
         # Handle errors gracefully
-        message = f"Error occurred: {str(e)}"
         return None, message
-# Gradio interface
-def gradio_interface(query, locations):
-    df, message = scrape_jobs(query, locations)
-    return df, message
-# Gradio app layout
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
         gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
-        gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States", value="United States"),
     ],
     outputs=[
-        gr.Dataframe(label="Job Results", headers=["Date", "Title", "Company", "Location", "Job Link"], interactive=True),
         gr.Textbox(label="Message"),
     ],
-    title="LinkedIn Job Scraper",
-    description="Scrape LinkedIn for jobs based on query and locations.",
 )
-# Launch app
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
 import logging
 from linkedin_jobs_scraper import LinkedinScraper
+from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
 from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
+from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, OnSiteOrRemoteFilters
 import pandas as pd
 # Configure logging
         'Location': data.location,
         'Job Link': data.link,
         'Description Length': len(data.description),
+        'Description': data.description,
     })
 def on_end():
     print("[ON_END] Scraping completed.")
+# Scraper function
+def scrape_jobs(query, locations, time_filter):
     global job_data
     try:
         job_data = []
         scraper = LinkedinScraper(
             chrome_executable_path=None,
             chrome_binary_location=None,
             chrome_options=None,
             headless=True,
+            max_workers=10,
             slow_mo=0.8,
             page_load_timeout=60,
         )
         scraper.on(Events.DATA, on_data)
         scraper.on(Events.END, on_end)
+        if time_filter == "From Past Month":
+            time_filter = TimeFilters.MONTH
+        elif time_filter == "From Last 24 Hours":
+            time_filter = TimeFilters.DAY
+        else:
+            time_filter = TimeFilters.MONTH
         queries = [
             Query(
                 query=query,
                 options=QueryOptions(
                     locations=locations.split(','),
                     apply_link=True,
+                    skip_promoted_jobs=False,
                     page_offset=0,
                     limit=100,
                     filters=QueryFilters(
+                        # relevance=RelevanceFilters.RECENT,
+                        time=time_filter,
                     ),
                 ),
             ),
         ]
         scraper.run(queries)
         if job_data:
+            df = pd.DataFrame(job_data)
+            message = f"Jobs ({len(job_data)}) data successfully scraped."
+            return df, message
         else:
+            return pd.DataFrame(), '-'
     except Exception as e:
         # Handle errors gracefully
+        message = f"An error occurred during scraping: {e}"
         return None, message
+def gradio_interface(query, locations, time_filter):
+    df, message = scrape_jobs(query, locations, time_filter)
+    return df, message
+#App Layout
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
         gr.Textbox(label="Job Query", placeholder="e.g., Data Scientist", value="Unity developers"),
+        gr.Textbox(label="Locations (comma-separated)", placeholder="e.g., United States, India", value="United States, United Kingdom, Canada, Germany, India"),
+        gr.Dropdown(
+            label="Time Filter",
+            choices=["From Past Month", "From Last 24 Hours"],
+            value="From Past Month",  # Default option
+            type="value",
+            ),
     ],
     outputs=[
+        gr.Dataframe(label="Job Results", headers=['Date','Company', 'ApplyLink'], interactive=True),
         gr.Textbox(label="Message"),
     ],
+    title="Job Scraper",
+    description="Enter a job query and locations to scrape job postings and display the results in a table.",
 )
 if __name__ == "__main__":
+    iface.launch()