Spaces:

KrishGoyani
/

GLiNER_Resume_Parser

Runtime error

App Files Files Community

GLiNER_Resume_Parser / app.py

KrishGoyani

Update app.py

3be6673 verified 11 months ago

raw

history blame

3.3 kB

	from gliner import GLiNER
	import re
	import fitz
	import gradio as gr

	model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True)




	def clean_text(text):
	# Remove all escape characters
	cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)

	# Remove any other non-printable characters
	cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text)

	# Replace multiple spaces with a single space
	cleaned_text = re.sub(r'\s+', ' ', cleaned_text)

	# Strip leading and trailing whitespace
	cleaned_text = cleaned_text.strip()

	return cleaned_text


	def pdf2text(file_path):

	with fitz.open(file_path) as doc:
	text = ""
	for page in doc:
	text += page.get_text()

	return clean_text(text)


	def ner(text, labels, threshold) :
	labels = labels.split(",")
	labels = [label.strip() for label in labels]
	print(labels)


	return {
	"text": text,
	"entities": [
	{
	"entity": entity["label"],
	"word": entity["text"],
	"start": entity["start"],
	"end": entity["end"],
	"score": 0,
	}
	for entity in model.predict_entities(
	text, labels, flat_ner=False, threshold=threshold
	)
	],
	}

	def parser(file_path, labels, threshold):
	text = pdf2text(file_path)
	return ner(text, labels, threshold)


	# Define a custom CSS style
	custom_css = """
	body {
	background-color: #f0f8ff;
	font-family: 'Arial', sans-serif;
	}
	.container {
	margin: auto;
	padding: 20px;
	border-radius: 10px;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	}
	h1 {
	color: #3d1ad9;
	text-align: center;
	}
	#file_upload {
	display: flex;
	justify-content: center;
	margin-bottom: 20px;
	}
	"""


	with gr.Blocks(css=custom_css) as demo:
	gr.HTML("<h1>AI-Powered Resume Parser</h1>")
	gr.HTML("<p style='text-align: center;'>This tool uses advanced NLP techniques to extract key information from your resume.</p>")


	with gr.Row() as row:
	labels = gr.Textbox(
	label="Labels",
	placeholder="Enter your labels here (comma separated)",
	scale=2,
	)
	threshold = gr.Slider(
	0,
	1,
	value=0.3,
	step=0.01,
	label="Threshold",
	info="Lower the threshold to increase how many entities get predicted.",
	scale=0,
	)

	with gr.Row():
	file_input = gr.File(label="Upload Resume",
	file_types=['.pdf'],
	elem_id="file_upload"
	)

	with gr.Row():
	parse_button = gr.Button("Parse Resume")

	with gr.Row():
	output = gr.HighlightedText(label="Parsed Resume",
	combine_adjacent=True
	)

	parse_button.click(fn=parser, inputs=[file_input,labels, threshold], outputs=output)

	gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>")

	# Launch the interface
	demo.queue()
	demo.launch(share=True, debug=True)