Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,57 @@
|
|
1 |
const puppeteer = require('puppeteer');
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
const puppeteer = require('puppeteer');
|
2 |
|
3 |
+
const scrapeLinkedIn = async (cookieString) => {
|
4 |
+
const browser = await puppeteer.launch({
|
5 |
+
headless: false, // Set to true to run in headless mode
|
6 |
+
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
7 |
+
});
|
8 |
+
|
9 |
+
const page = await browser.newPage();
|
10 |
+
|
11 |
+
// Set cookies from the cookie string
|
12 |
+
await page.setCookie(...parseCookies(cookieString));
|
13 |
+
|
14 |
+
// Navigate to LinkedIn profile page
|
15 |
+
await page.goto('https://www.linkedin.com/in/iamyashchouhan/');
|
16 |
+
|
17 |
+
// Wait for necessary elements to load
|
18 |
+
await page.waitForSelector('h1.text-heading-xlarge');
|
19 |
+
await page.waitForSelector('div.text-body-medium.break-words');
|
20 |
+
await page.waitForSelector('span.text-body-small.t-black--light.break-words');
|
21 |
+
await page.waitForSelector('span.text-body-small.inline.t-black--light.break-words');
|
22 |
+
await page.waitForSelector('img.pv-top-card-profile-picture__image--show');
|
23 |
+
|
24 |
+
// Extract data
|
25 |
+
const profileData = await page.evaluate(() => {
|
26 |
+
const name = document.querySelector('h1.text-heading-xlarge')?.innerText.trim() || 'N/A';
|
27 |
+
const jobTitle = document.querySelector('div.text-body-medium.break-words')?.innerText.trim() || 'N/A';
|
28 |
+
const companyName = document.querySelector('span.text-body-small.t-black')?.innerText.trim() || 'N/A';
|
29 |
+
const location = document.querySelector('span.text-body-small.inline.t-black--light.break-words')?.innerText.trim() || 'N/A';
|
30 |
+
const profilePicture = document.querySelector('img.pv-top-card-profile-picture__image--show')?.src || 'N/A';
|
31 |
+
|
32 |
+
return {
|
33 |
+
name,
|
34 |
+
jobTitle,
|
35 |
+
companyName,
|
36 |
+
location,
|
37 |
+
profilePicture
|
38 |
+
};
|
39 |
+
});
|
40 |
+
|
41 |
+
console.log(profileData);
|
42 |
+
|
43 |
+
await browser.close();
|
44 |
+
};
|
45 |
+
|
46 |
+
// Utility function to parse cookies from string
|
47 |
+
const parseCookies = (cookieString) => {
|
48 |
+
return cookieString.split(';').map(cookie => {
|
49 |
+
const [name, value] = cookie.split('=');
|
50 |
+
return { name: name.trim(), value: value.trim() };
|
51 |
+
});
|
52 |
+
};
|
53 |
+
|
54 |
+
// Example cookie string
|
55 |
+
const cookieString = 'bcookie="v=2&af32c293-7891-49cf-8067-df4dce00e64d"; lang=v=2&lang=en-us; bscookie="v=1&20240817130125b1ec5422-50f2-4f01-805e-bad20d1a4b9aAQEL-598jVXxlXXMUvFcezhOOWnKoG0r"; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; li_rm=AQEd1fGLin9USgAAAZFgbbvFLgEwl7oKHpDTDeWuDHLE2pZe0Qjg3zQJfHqwtfecSkXwkOSE9s-5JLVHj8BA0ajjre7a89HXIDUK-iuUzDXqClg4NYY_u-HM; aam_uuid=33504427776277414921619885016503064550; _gcl_au=1.1.914565872.1723899697; li_at=AQEDAVHaPIgAVnJJAAABkWBuMDAAAAGRhHq0ME0AvvlZkrxKpKJTCDH-1fbqrhz4cBySifG_VqBbR0KG-k9xi9t9F4JzqZquF6QzwbhCnLSCV_le4f3HN9_T28I6LEQlPpa6XfpnzVvl8wV1rHf1motu; liap=true; JSESSIONID="ajax:2411311991931161902"; timezone=Asia/Calcutta; li_theme=light; li_theme_set=app; li_sugr=7d505644-b4e5-4104-a80b-47707e18befe; _guid=b6bb7d34-5518-4c8a-a758-57e62b8e4be8; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19953%7CMCMID%7C33641963811349951991636447048932156461%7CMCAAMLH-1724504525%7C12%7CMCAAMB-1724504525%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1723906925s%7CNONE%7CvVersion%7C5.1.1%7CMCCIDH%7C-938386711; AnalyticsSyncHistory=AQL0EnWu-K2sfQAAAZFgbkXlw_LRTTlwS1Cp6SyLnc41dYSBOKFwmcz5DUAfmDvxnGLrnomftGt2NbFOowi8Kw; lms_ads=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; lms_analytics=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; dfpfpt=d599bcf329464821a0b6a10636bc5775; fptctx2=taBcrIH61PuCVH7eNCyH0F58uBDuZFZOunQHZt3FugkcRoyY%252bUVHEFYEt8P2yLQtdNvBbcr3S%252f3vRu726mO994Cm7G2ktg4zrcz8Ay6h1Kfv2knasBRWMBK8y8R5U6BTm0UWC0xzNu0vpcHi51BBdJS8FjiTa2mv5qADTJU2iYbtMROf5O524KEwMG6OQvKc5n3JBww0UvS6xKy9tBleJ9ZCyXZi52GjCNH%252foQS5tROMMqPuMuUiRHZCNL8PHm0twS5PYixIVKIbqahbUmT6r10cJAZPz8mUq5ZM4yhmMLvo42XxcbxNumdPwpNuxXotxR5MDBdj6jdPl%252fFQBXRrByZCAMkv6n06IkIjn9qXRsY%253d; UserMatchHistory=AQKxNUNal2LwkQAAAZFgbpwnv_tgZQrD7O7THqLjnAaOZOpNsrV_WkTwLtl9GVUgf-ciRDfBf4uRsPpBAjF-hnYGb1zC7dImsk6UA9G0G-WTZ_t84INqYp5D6zvGNuxIaQVIfkV-5EwRhnX8kZJTQblfsDaU0izbhr_lFT8FvKSd_Ftjm-mqIvpautFCJ2LohN6GpexSjy0WYlWvrM0pgdn98uRT5TebLEg5A8qQ8eEGoeZAdMkmuzOwZLUUEQJg7_REwA4-RjRf8V-tSHnolKC7JmNswECy2Fdze69Nd9wbNGNStvCdfgBIcLx3URcCDqQqmAeGt_gdaGHiCKWyJ_1SalDF5Q0fv-V4D2jIEHSyaiWSWA; lidc="b=TB40:s=T:r=T:a=T:p=T:g=5264:u=1:x=1:i=1723899748:t=1723984887:v=2:sig=AQHup-aaYxgVEK8Szf3j2t9TpwOreZfc"';
|
56 |
+
|
57 |
+
scrapeLinkedIn(cookieString);
|