lelafav502 commited on
Commit
acbbe65
Β·
verified Β·
1 Parent(s): 41e2385

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -27
app.py CHANGED
@@ -1,29 +1,57 @@
1
  const puppeteer = require('puppeteer');
2
 
3
- (async () => {
4
- const browser = await puppeteer.launch({ headless: true });
5
- const page = await browser.newPage();
6
-
7
- // Navigate to a URL
8
- await page.goto('https://www.linkedin.com/in/iamyashchouhan/');
9
-
10
- // Wait for the content to load
11
- await page.waitForSelector('h1.text-heading-xlarge');
12
-
13
- // Extract data
14
- const name = await page.$eval('h1.text-heading-xlarge', el => el.innerText);
15
- const jobTitle = await page.$eval('div.text-body-medium.break-words', el => el.innerText);
16
- const companyName = await page.$eval('span.text-body-small.t-black', el => el.innerText);
17
- const location = await page.$eval('span.text-body-small.inline.t-black--light.break-words', el => el.innerText);
18
- const profilePicture = await page.$eval('img.pv-top-card-profile-picture__image', el => el.src);
19
-
20
- console.log({
21
- name,
22
- jobTitle,
23
- companyName,
24
- location,
25
- profilePicture,
26
- });
27
-
28
- await browser.close();
29
- })();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  const puppeteer = require('puppeteer');
2
 
3
+ const scrapeLinkedIn = async (cookieString) => {
4
+ const browser = await puppeteer.launch({
5
+ headless: false, // Set to true to run in headless mode
6
+ args: ['--no-sandbox', '--disable-setuid-sandbox']
7
+ });
8
+
9
+ const page = await browser.newPage();
10
+
11
+ // Set cookies from the cookie string
12
+ await page.setCookie(...parseCookies(cookieString));
13
+
14
+ // Navigate to LinkedIn profile page
15
+ await page.goto('https://www.linkedin.com/in/iamyashchouhan/');
16
+
17
+ // Wait for necessary elements to load
18
+ await page.waitForSelector('h1.text-heading-xlarge');
19
+ await page.waitForSelector('div.text-body-medium.break-words');
20
+ await page.waitForSelector('span.text-body-small.t-black--light.break-words');
21
+ await page.waitForSelector('span.text-body-small.inline.t-black--light.break-words');
22
+ await page.waitForSelector('img.pv-top-card-profile-picture__image--show');
23
+
24
+ // Extract data
25
+ const profileData = await page.evaluate(() => {
26
+ const name = document.querySelector('h1.text-heading-xlarge')?.innerText.trim() || 'N/A';
27
+ const jobTitle = document.querySelector('div.text-body-medium.break-words')?.innerText.trim() || 'N/A';
28
+ const companyName = document.querySelector('span.text-body-small.t-black')?.innerText.trim() || 'N/A';
29
+ const location = document.querySelector('span.text-body-small.inline.t-black--light.break-words')?.innerText.trim() || 'N/A';
30
+ const profilePicture = document.querySelector('img.pv-top-card-profile-picture__image--show')?.src || 'N/A';
31
+
32
+ return {
33
+ name,
34
+ jobTitle,
35
+ companyName,
36
+ location,
37
+ profilePicture
38
+ };
39
+ });
40
+
41
+ console.log(profileData);
42
+
43
+ await browser.close();
44
+ };
45
+
46
+ // Utility function to parse cookies from string
47
+ const parseCookies = (cookieString) => {
48
+ return cookieString.split(';').map(cookie => {
49
+ const [name, value] = cookie.split('=');
50
+ return { name: name.trim(), value: value.trim() };
51
+ });
52
+ };
53
+
54
+ // Example cookie string
55
+ const cookieString = 'bcookie="v=2&af32c293-7891-49cf-8067-df4dce00e64d"; lang=v=2&lang=en-us; bscookie="v=1&20240817130125b1ec5422-50f2-4f01-805e-bad20d1a4b9aAQEL-598jVXxlXXMUvFcezhOOWnKoG0r"; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; li_rm=AQEd1fGLin9USgAAAZFgbbvFLgEwl7oKHpDTDeWuDHLE2pZe0Qjg3zQJfHqwtfecSkXwkOSE9s-5JLVHj8BA0ajjre7a89HXIDUK-iuUzDXqClg4NYY_u-HM; aam_uuid=33504427776277414921619885016503064550; _gcl_au=1.1.914565872.1723899697; li_at=AQEDAVHaPIgAVnJJAAABkWBuMDAAAAGRhHq0ME0AvvlZkrxKpKJTCDH-1fbqrhz4cBySifG_VqBbR0KG-k9xi9t9F4JzqZquF6QzwbhCnLSCV_le4f3HN9_T28I6LEQlPpa6XfpnzVvl8wV1rHf1motu; liap=true; JSESSIONID="ajax:2411311991931161902"; timezone=Asia/Calcutta; li_theme=light; li_theme_set=app; li_sugr=7d505644-b4e5-4104-a80b-47707e18befe; _guid=b6bb7d34-5518-4c8a-a758-57e62b8e4be8; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19953%7CMCMID%7C33641963811349951991636447048932156461%7CMCAAMLH-1724504525%7C12%7CMCAAMB-1724504525%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1723906925s%7CNONE%7CvVersion%7C5.1.1%7CMCCIDH%7C-938386711; AnalyticsSyncHistory=AQL0EnWu-K2sfQAAAZFgbkXlw_LRTTlwS1Cp6SyLnc41dYSBOKFwmcz5DUAfmDvxnGLrnomftGt2NbFOowi8Kw; lms_ads=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; lms_analytics=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; dfpfpt=d599bcf329464821a0b6a10636bc5775; fptctx2=taBcrIH61PuCVH7eNCyH0F58uBDuZFZOunQHZt3FugkcRoyY%252bUVHEFYEt8P2yLQtdNvBbcr3S%252f3vRu726mO994Cm7G2ktg4zrcz8Ay6h1Kfv2knasBRWMBK8y8R5U6BTm0UWC0xzNu0vpcHi51BBdJS8FjiTa2mv5qADTJU2iYbtMROf5O524KEwMG6OQvKc5n3JBww0UvS6xKy9tBleJ9ZCyXZi52GjCNH%252foQS5tROMMqPuMuUiRHZCNL8PHm0twS5PYixIVKIbqahbUmT6r10cJAZPz8mUq5ZM4yhmMLvo42XxcbxNumdPwpNuxXotxR5MDBdj6jdPl%252fFQBXRrByZCAMkv6n06IkIjn9qXRsY%253d; UserMatchHistory=AQKxNUNal2LwkQAAAZFgbpwnv_tgZQrD7O7THqLjnAaOZOpNsrV_WkTwLtl9GVUgf-ciRDfBf4uRsPpBAjF-hnYGb1zC7dImsk6UA9G0G-WTZ_t84INqYp5D6zvGNuxIaQVIfkV-5EwRhnX8kZJTQblfsDaU0izbhr_lFT8FvKSd_Ftjm-mqIvpautFCJ2LohN6GpexSjy0WYlWvrM0pgdn98uRT5TebLEg5A8qQ8eEGoeZAdMkmuzOwZLUUEQJg7_REwA4-RjRf8V-tSHnolKC7JmNswECy2Fdze69Nd9wbNGNStvCdfgBIcLx3URcCDqQqmAeGt_gdaGHiCKWyJ_1SalDF5Q0fv-V4D2jIEHSyaiWSWA; lidc="b=TB40:s=T:r=T:a=T:p=T:g=5264:u=1:x=1:i=1723899748:t=1723984887:v=2:sig=AQHup-aaYxgVEK8Szf3j2t9TpwOreZfc"';
56
+
57
+ scrapeLinkedIn(cookieString);