Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,57 +1,35 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
location,
|
37 |
-
profilePicture
|
38 |
-
};
|
39 |
-
});
|
40 |
-
|
41 |
-
console.log(profileData);
|
42 |
-
|
43 |
-
await browser.close();
|
44 |
-
};
|
45 |
-
|
46 |
-
// Utility function to parse cookies from string
|
47 |
-
const parseCookies = (cookieString) => {
|
48 |
-
return cookieString.split(';').map(cookie => {
|
49 |
-
const [name, value] = cookie.split('=');
|
50 |
-
return { name: name.trim(), value: value.trim() };
|
51 |
-
});
|
52 |
-
};
|
53 |
-
|
54 |
-
// Example cookie string
|
55 |
-
const cookieString = 'bcookie="v=2&af32c293-7891-49cf-8067-df4dce00e64d"; lang=v=2&lang=en-us; bscookie="v=1&20240817130125b1ec5422-50f2-4f01-805e-bad20d1a4b9aAQEL-598jVXxlXXMUvFcezhOOWnKoG0r"; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; li_rm=AQEd1fGLin9USgAAAZFgbbvFLgEwl7oKHpDTDeWuDHLE2pZe0Qjg3zQJfHqwtfecSkXwkOSE9s-5JLVHj8BA0ajjre7a89HXIDUK-iuUzDXqClg4NYY_u-HM; aam_uuid=33504427776277414921619885016503064550; _gcl_au=1.1.914565872.1723899697; li_at=AQEDAVHaPIgAVnJJAAABkWBuMDAAAAGRhHq0ME0AvvlZkrxKpKJTCDH-1fbqrhz4cBySifG_VqBbR0KG-k9xi9t9F4JzqZquF6QzwbhCnLSCV_le4f3HN9_T28I6LEQlPpa6XfpnzVvl8wV1rHf1motu; liap=true; JSESSIONID="ajax:2411311991931161902"; timezone=Asia/Calcutta; li_theme=light; li_theme_set=app; li_sugr=7d505644-b4e5-4104-a80b-47707e18befe; _guid=b6bb7d34-5518-4c8a-a758-57e62b8e4be8; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19953%7CMCMID%7C33641963811349951991636447048932156461%7CMCAAMLH-1724504525%7C12%7CMCAAMB-1724504525%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1723906925s%7CNONE%7CvVersion%7C5.1.1%7CMCCIDH%7C-938386711; AnalyticsSyncHistory=AQL0EnWu-K2sfQAAAZFgbkXlw_LRTTlwS1Cp6SyLnc41dYSBOKFwmcz5DUAfmDvxnGLrnomftGt2NbFOowi8Kw; lms_ads=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; lms_analytics=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; dfpfpt=d599bcf329464821a0b6a10636bc5775; fptctx2=taBcrIH61PuCVH7eNCyH0F58uBDuZFZOunQHZt3FugkcRoyY%252bUVHEFYEt8P2yLQtdNvBbcr3S%252f3vRu726mO994Cm7G2ktg4zrcz8Ay6h1Kfv2knasBRWMBK8y8R5U6BTm0UWC0xzNu0vpcHi51BBdJS8FjiTa2mv5qADTJU2iYbtMROf5O524KEwMG6OQvKc5n3JBww0UvS6xKy9tBleJ9ZCyXZi52GjCNH%252foQS5tROMMqPuMuUiRHZCNL8PHm0twS5PYixIVKIbqahbUmT6r10cJAZPz8mUq5ZM4yhmMLvo42XxcbxNumdPwpNuxXotxR5MDBdj6jdPl%252fFQBXRrByZCAMkv6n06IkIjn9qXRsY%253d; UserMatchHistory=AQKxNUNal2LwkQAAAZFgbpwnv_tgZQrD7O7THqLjnAaOZOpNsrV_WkTwLtl9GVUgf-ciRDfBf4uRsPpBAjF-hnYGb1zC7dImsk6UA9G0G-WTZ_t84INqYp5D6zvGNuxIaQVIfkV-5EwRhnX8kZJTQblfsDaU0izbhr_lFT8FvKSd_Ftjm-mqIvpautFCJ2LohN6GpexSjy0WYlWvrM0pgdn98uRT5TebLEg5A8qQ8eEGoeZAdMkmuzOwZLUUEQJg7_REwA4-RjRf8V-tSHnolKC7JmNswECy2Fdze69Nd9wbNGNStvCdfgBIcLx3URcCDqQqmAeGt_gdaGHiCKWyJ_1SalDF5Q0fv-V4D2jIEHSyaiWSWA; lidc="b=TB40:s=T:r=T:a=T:p=T:g=5264:u=1:x=1:i=1723899748:t=1723984887:v=2:sig=AQHup-aaYxgVEK8Szf3j2t9TpwOreZfc"';
|
56 |
-
|
57 |
-
scrapeLinkedIn(cookieString);
|
|
|
1 |
+
import asyncio
|
2 |
+
from playwright.async_api import async_playwright
|
3 |
+
|
4 |
+
async def scrape_linkedin(cookie_string):
|
5 |
+
async with async_playwright() as p:
|
6 |
+
browser = await p.chromium.launch(headless=True)
|
7 |
+
context = await browser.new_context()
|
8 |
+
|
9 |
+
# Set cookies
|
10 |
+
cookies = [{'name': pair.split('=')[0], 'value': pair.split('=')[1]} for pair in cookie_string.split('; ')]
|
11 |
+
await context.add_cookies(cookies)
|
12 |
+
|
13 |
+
page = await context.new_page()
|
14 |
+
await page.goto('https://www.linkedin.com/in/iamyashchouhan/', wait_until='networkidle')
|
15 |
+
|
16 |
+
# Extract data
|
17 |
+
name = await page.inner_text('h1.text-heading-xlarge', timeout=5000) or 'N/A'
|
18 |
+
job_title = await page.inner_text('div.text-body-medium.break-words', timeout=5000) or 'N/A'
|
19 |
+
company_name = await page.inner_text('span.text-body-small.t-black', timeout=5000) or 'N/A'
|
20 |
+
location = await page.inner_text('span.text-body-small.inline.t-black--light.break-words', timeout=5000) or 'N/A'
|
21 |
+
profile_picture = await page.get_attribute('img.pv-top-card-profile-picture__image--show', 'src') or 'N/A'
|
22 |
+
|
23 |
+
print({
|
24 |
+
"name": name,
|
25 |
+
"job_title": job_title,
|
26 |
+
"company_name": company_name,
|
27 |
+
"location": location,
|
28 |
+
"profile_picture": profile_picture,
|
29 |
+
})
|
30 |
+
|
31 |
+
await browser.close()
|
32 |
+
|
33 |
+
# Replace with your actual cookie string
|
34 |
+
cookie_string = 'bcookie="v=2&af32c293-7891-49cf-8067-df4dce00e64d"; lang=v=2&lang=en-us; bscookie="v=1&20240817130125b1ec5422-50f2-4f01-805e-bad20d1a4b9aAQEL-598jVXxlXXMUvFcezhOOWnKoG0r"; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; li_rm=AQEd1fGLin9USgAAAZFgbbvFLgEwl7oKHpDTDeWuDHLE2pZe0Qjg3zQJfHqwtfecSkXwkOSE9s-5JLVHj8BA0ajjre7a89HXIDUK-iuUzDXqClg4NYY_u-HM; aam_uuid=33504427776277414921619885016503064550; _gcl_au=1.1.914565872.1723899697; li_at=AQEDAVHaPIgAVnJJAAABkWBuMDAAAAGRhHq0ME0AvvlZkrxKpKJTCDH-1fbqrhz4cBySifG_VqBbR0KG-k9xi9t9F4JzqZquF6QzwbhCnLSCV_le4f3HN9_T28I6LEQlPpa6XfpnzVvl8wV1rHf1motu; liap=true; JSESSIONID="ajax:2411311991931161902"; timezone=Asia/Calcutta; li_theme=light; li_theme_set=app; li_sugr=7d505644-b4e5-4104-a80b-47707e18befe; _guid=b6bb7d34-5518-4c8a-a758-57e62b8e4be8; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19953%7CMCMID%7C33641963811349951991636447048932156461%7CMCAAMLH-1724504525%7C12%7CMCAAMB-1724504525%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1723906925s%7CNONE%7CvVersion%7C5.1.1%7CMCCIDH%7C-938386711; AnalyticsSyncHistory=AQL0EnWu-K2sfQAAAZFgbkXlw_LRTTlwS1Cp6SyLnc41dYSBOKFwmcz5DUAfmDvxnGLrnomftGt2NbFOowi8Kw; lms_ads=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; lms_analytics=AQHT4qUGrfZOIgAAAZFgbkdaM2xWt5R4poZlO-8N7IgSbiKi-CIvK7XZmEnUuwaXDhrFFGddwAVIWaGdMTyo2sdztohB4ggW; dfpfpt=d599bcf329464821a0b6a10636bc5775; fptctx2=taBcrIH61PuCVH7eNCyH0F58uBDuZFZOunQHZt3FugkcRoyY%252bUVHEFYEt8P2yLQtdNvBbcr3S%252f3vRu726mO994Cm7G2ktg4zrcz8Ay6h1Kfv2knasBRWMBK8y8R5U6BTm0UWC0xzNu0vpcHi51BBdJS8FjiTa2mv5qADTJU2iYbtMROf5O524KEwMG6OQvKc5n3JBww0UvS6xKy9tBleJ9ZCyXZi52GjCNH%252foQS5tROMMqPuMuUiRHZCNL8PHm0twS5PYixIVKIbqahbUmT6r10cJAZPz8mUq5ZM4yhmMLvo42XxcbxNumdPwpNuxXotxR5MDBdj6jdPl%252fFQBXRrByZCAMkv6n06IkIjn9qXRsY%253d; UserMatchHistory=AQKxNUNal2LwkQAAAZFgbpwnv_tgZQrD7O7THqLjnAaOZOpNsrV_WkTwLtl9GVUgf-ciRDfBf4uRsPpBAjF-hnYGb1zC7dImsk6UA9G0G-WTZ_t84INqYp5D6zvGNuxIaQVIfkV-5EwRhnX8kZJTQblfsDaU0izbhr_lFT8FvKSd_Ftjm-mqIvpautFCJ2LohN6GpexSjy0WYlWvrM0pgdn98uRT5TebLEg5A8qQ8eEGoeZAdMkmuzOwZLUUEQJg7_REwA4-RjRf8V-tSHnolKC7JmNswECy2Fdze69Nd9wbNGNStvCdfgBIcLx3URcCDqQqmAeGt_gdaGHiCKWyJ_1SalDF5Q0fv-V4D2jIEHSyaiWSWA; lidc="b=TB40:s=T:r=T:a=T:p=T:g=5264:u=1:x=1:i=1723899748:t=1723984887:v=2:sig=AQHup-aaYxgVEK8Szf3j2t9TpwOreZfc"'
|
35 |
+
asyncio.run(scrape_linkedin(cookie_string))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|