Spaces:

Atif20024
/

LinkedIn-Profile-QA

Sleeping

LinkedIn-Profile-QA / doc_loading.py

Main. Uploaded all the files required to run this app

d064c90 verified about 1 year ago

723 Bytes

	import requests
	from bs4 import BeautifulSoup
	import re
	import os
	from PyPDF2 import PdfReader

	def read_pdf_text(pdf_file):
	# Reading the pdf
	pdf_reader = PdfReader(pdf_file)
	all_text = ""
	# make it limited. min(5, len(pages))
	for page in pdf_reader.pages:
	all_text += page.extract_text()
	return all_text

	def parse_linkedin_pdf(pdf_text):
	sections = re.split(r'\n(?=\b(?:Experience\|Contact\|Education\|Top Skills\|Languages\|Honors-Awards)\b)', pdf_text)
	parsed_data = {}
	for section in sections:
	lines = section.split('\n')
	section_name = lines[0]
	section_text = '\n'.join(lines[1:])
	parsed_data[section_name] = section_text
	return parsed_data