lelafav502 commited on
Commit
77569f3
Β·
verified Β·
1 Parent(s): acbbe65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -57
app.py CHANGED
@@ -1,57 +1,35 @@
1
- const puppeteer = require('puppeteer');
2
-
3
- const scrapeLinkedIn = async (cookieString) => {
4
- const browser = await puppeteer.launch({
5
- headless: false, // Set to true to run in headless mode
6
- args: ['--no-sandbox', '--disable-setuid-sandbox']
7
- });
8
-
9
- const page = await browser.newPage();
10
-
11
- // Set cookies from the cookie string
12
- await page.setCookie(...parseCookies(cookieString));
13
-
14
- // Navigate to LinkedIn profile page
15
- await page.goto('https://www.linkedin.com/in/iamyashchouhan/');
16
-
17
- // Wait for necessary elements to load
18
- await page.waitForSelector('h1.text-heading-xlarge');
19
- await page.waitForSelector('div.text-body-medium.break-words');
20
- await page.waitForSelector('span.text-body-small.t-black--light.break-words');
21
- await page.waitForSelector('span.text-body-small.inline.t-black--light.break-words');
22
- await page.waitForSelector('img.pv-top-card-profile-picture__image--show');
23
-
24
- // Extract data
25
- const profileData = await page.evaluate(() => {
26
- const name = document.querySelector('h1.text-heading-xlarge')?.innerText.trim() || 'N/A';
27
- const jobTitle = document.querySelector('div.text-body-medium.break-words')?.innerText.trim() || 'N/A';
28
- const companyName = document.querySelector('span.text-body-small.t-black')?.innerText.trim() || 'N/A';
29
- const location = document.querySelector('span.text-body-small.inline.t-black--light.break-words')?.innerText.trim() || 'N/A';
30
- const profilePicture = document.querySelector('img.pv-top-card-profile-picture__image--show')?.src || 'N/A';
31
-
32
- return {
33
- name,
34
- jobTitle,
35
- companyName,
36
- location,
37
- profilePicture
38
- };
39
- });
40
-
41
- console.log(profileData);
42
-
43
- await browser.close();
44
- };
45
-
46
- // Utility function to parse cookies from string
47
- const parseCookies = (cookieString) => {
48
- return cookieString.split(';').map(cookie => {
49
- const [name, value] = cookie.split('=');
50
- return { name: name.trim(), value: value.trim() };
51
- });
52
- };
53
-
54
- // Example cookie string
55
- const cookieString = 'bcookie="v=2&af32c293-7891-49cf-8067-df4dce00e64d"; lang=v=2&lang=en-us; bscookie="v=1&20240817130125b1ec5422-50f2-4f01-805e-bad20d1a4b9aAQEL-598jVXxlXXMUvFcezhOOWnKoG0r"; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; li_rm=AQEd1fGLin9USgAAAZFgbbvFLgEwl7oKHpDTDeWuDHLE2pZe0Qjg3zQJfHqwtfecSkXwkOSE9s-5JLVHj8BA0ajjre7a89HXIDUK-iuUzDXqClg4NYY_u-HM; aam_uuid=33504427776277414921619885016503064550; _gcl_au=1.1.914565872.1723899697; li_at=AQEDAVHaPIgAVnJJAAABkWBuMDAAAAGRhHq0ME0AvvlZkrxKpKJTCDH-1fbqrhz4cBySifG_VqBbR0KG-k9xi9t9F4JzqZquF6QzwbhCnLSCV_le4f3HN9_T28I6LEQlPpa6XfpnzVvl8wV1rHf1motu; liap=true; JSESSIONID="ajax:2411311991931161902"; timezone=Asia/Calcutta; li_theme=light; li_theme_set=app; li_sugr=7d505644-b4e5-4104-a80b-47707e18befe; _guid=b6bb7d34-5518-4c8a-a758-57e62b8e4be8; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19953%7CMCMID%7C33641963811349951991636447048932156461%7CMCAAMLH-1724504525%7C12%7CMCAAMB-1724504525%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1723906925s%7CNONE%7CvVersion%7C5.1.1%7CMCCIDH%7C-938386711; AnalyticsSyncHistory=AQL0EnWu-K2sfQAAAZFgbkXlw_LRTTlwS1Cp6SyLnc41dYSBOKFwmcz5DUAfmDvxnGLrnomftGt2NbFOowi8Kw; lms_ads=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; lms_analytics=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; dfpfpt=d599bcf329464821a0b6a10636bc5775; fptctx2=taBcrIH61PuCVH7eNCyH0F58uBDuZFZOunQHZt3FugkcRoyY%252bUVHEFYEt8P2yLQtdNvBbcr3S%252f3vRu726mO994Cm7G2ktg4zrcz8Ay6h1Kfv2knasBRWMBK8y8R5U6BTm0UWC0xzNu0vpcHi51BBdJS8FjiTa2mv5qADTJU2iYbtMROf5O524KEwMG6OQvKc5n3JBww0UvS6xKy9tBleJ9ZCyXZi52GjCNH%252foQS5tROMMqPuMuUiRHZCNL8PHm0twS5PYixIVKIbqahbUmT6r10cJAZPz8mUq5ZM4yhmMLvo42XxcbxNumdPwpNuxXotxR5MDBdj6jdPl%252fFQBXRrByZCAMkv6n06IkIjn9qXRsY%253d; UserMatchHistory=AQKxNUNal2LwkQAAAZFgbpwnv_tgZQrD7O7THqLjnAaOZOpNsrV_WkTwLtl9GVUgf-ciRDfBf4uRsPpBAjF-hnYGb1zC7dImsk6UA9G0G-WTZ_t84INqYp5D6zvGNuxIaQVIfkV-5EwRhnX8kZJTQblfsDaU0izbhr_lFT8FvKSd_Ftjm-mqIvpautFCJ2LohN6GpexSjy0WYlWvrM0pgdn98uRT5TebLEg5A8qQ8eEGoeZAdMkmuzOwZLUUEQJg7_REwA4-RjRf8V-tSHnolKC7JmNswECy2Fdze69Nd9wbNGNStvCdfgBIcLx3URcCDqQqmAeGt_gdaGHiCKWyJ_1SalDF5Q0fv-V4D2jIEHSyaiWSWA; lidc="b=TB40:s=T:r=T:a=T:p=T:g=5264:u=1:x=1:i=1723899748:t=1723984887:v=2:sig=AQHup-aaYxgVEK8Szf3j2t9TpwOreZfc"';
56
-
57
- scrapeLinkedIn(cookieString);
 
1
+ import asyncio
2
+ from playwright.async_api import async_playwright
3
+
4
+ async def scrape_linkedin(cookie_string):
5
+ async with async_playwright() as p:
6
+ browser = await p.chromium.launch(headless=True)
7
+ context = await browser.new_context()
8
+
9
+ # Set cookies
10
+ cookies = [{'name': pair.split('=')[0], 'value': pair.split('=')[1]} for pair in cookie_string.split('; ')]
11
+ await context.add_cookies(cookies)
12
+
13
+ page = await context.new_page()
14
+ await page.goto('https://www.linkedin.com/in/iamyashchouhan/', wait_until='networkidle')
15
+
16
+ # Extract data
17
+ name = await page.inner_text('h1.text-heading-xlarge', timeout=5000) or 'N/A'
18
+ job_title = await page.inner_text('div.text-body-medium.break-words', timeout=5000) or 'N/A'
19
+ company_name = await page.inner_text('span.text-body-small.t-black', timeout=5000) or 'N/A'
20
+ location = await page.inner_text('span.text-body-small.inline.t-black--light.break-words', timeout=5000) or 'N/A'
21
+ profile_picture = await page.get_attribute('img.pv-top-card-profile-picture__image--show', 'src') or 'N/A'
22
+
23
+ print({
24
+ "name": name,
25
+ "job_title": job_title,
26
+ "company_name": company_name,
27
+ "location": location,
28
+ "profile_picture": profile_picture,
29
+ })
30
+
31
+ await browser.close()
32
+
33
+ # Replace with your actual cookie string
34
+ cookie_string = 'bcookie="v=2&af32c293-7891-49cf-8067-df4dce00e64d"; lang=v=2&lang=en-us; bscookie="v=1&20240817130125b1ec5422-50f2-4f01-805e-bad20d1a4b9aAQEL-598jVXxlXXMUvFcezhOOWnKoG0r"; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; li_rm=AQEd1fGLin9USgAAAZFgbbvFLgEwl7oKHpDTDeWuDHLE2pZe0Qjg3zQJfHqwtfecSkXwkOSE9s-5JLVHj8BA0ajjre7a89HXIDUK-iuUzDXqClg4NYY_u-HM; aam_uuid=33504427776277414921619885016503064550; _gcl_au=1.1.914565872.1723899697; li_at=AQEDAVHaPIgAVnJJAAABkWBuMDAAAAGRhHq0ME0AvvlZkrxKpKJTCDH-1fbqrhz4cBySifG_VqBbR0KG-k9xi9t9F4JzqZquF6QzwbhCnLSCV_le4f3HN9_T28I6LEQlPpa6XfpnzVvl8wV1rHf1motu; liap=true; JSESSIONID="ajax:2411311991931161902"; timezone=Asia/Calcutta; li_theme=light; li_theme_set=app; li_sugr=7d505644-b4e5-4104-a80b-47707e18befe; _guid=b6bb7d34-5518-4c8a-a758-57e62b8e4be8; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19953%7CMCMID%7C33641963811349951991636447048932156461%7CMCAAMLH-1724504525%7C12%7CMCAAMB-1724504525%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1723906925s%7CNONE%7CvVersion%7C5.1.1%7CMCCIDH%7C-938386711; AnalyticsSyncHistory=AQL0EnWu-K2sfQAAAZFgbkXlw_LRTTlwS1Cp6SyLnc41dYSBOKFwmcz5DUAfmDvxnGLrnomftGt2NbFOowi8Kw; lms_ads=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; lms_analytics=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; dfpfpt=d599bcf329464821a0b6a10636bc5775; fptctx2=taBcrIH61PuCVH7eNCyH0F58uBDuZFZOunQHZt3FugkcRoyY%252bUVHEFYEt8P2yLQtdNvBbcr3S%252f3vRu726mO994Cm7G2ktg4zrcz8Ay6h1Kfv2knasBRWMBK8y8R5U6BTm0UWC0xzNu0vpcHi51BBdJS8FjiTa2mv5qADTJU2iYbtMROf5O524KEwMG6OQvKc5n3JBww0UvS6xKy9tBleJ9ZCyXZi52GjCNH%252foQS5tROMMqPuMuUiRHZCNL8PHm0twS5PYixIVKIbqahbUmT6r10cJAZPz8mUq5ZM4yhmMLvo42XxcbxNumdPwpNuxXotxR5MDBdj6jdPl%252fFQBXRrByZCAMkv6n06IkIjn9qXRsY%253d; UserMatchHistory=AQKxNUNal2LwkQAAAZFgbpwnv_tgZQrD7O7THqLjnAaOZOpNsrV_WkTwLtl9GVUgf-ciRDfBf4uRsPpBAjF-hnYGb1zC7dImsk6UA9G0G-WTZ_t84INqYp5D6zvGNuxIaQVIfkV-5EwRhnX8kZJTQblfsDaU0izbhr_lFT8FvKSd_Ftjm-mqIvpautFCJ2LohN6GpexSjy0WYlWvrM0pgdn98uRT5TebLEg5A8qQ8eEGoeZAdMkmuzOwZLUUEQJg7_REwA4-RjRf8V-tSHnolKC7JmNswECy2Fdze69Nd9wbNGNStvCdfgBIcLx3URcCDqQqmAeGt_gdaGHiCKWyJ_1SalDF5Q0fv-V4D2jIEHSyaiWSWA; lidc="b=TB40:s=T:r=T:a=T:p=T:g=5264:u=1:x=1:i=1723899748:t=1723984887:v=2:sig=AQHup-aaYxgVEK8Szf3j2t9TpwOreZfc"'
35
+ asyncio.run(scrape_linkedin(cookie_string))