KrishGoyani's picture
Update app.py
56622b3 verified
raw
history blame
3.28 kB
from gliner import GLiNER
import re
import fitz
import gradio as gr
model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True)
labels = [
# Personal Information
'person',
'date_of_birth',
'address',
'mobile_phone_number',
'email_address',
'social_media_handle',
# Education
'education',
'degree',
'graduation_year',
'grade',
# Work Experience
'work_experience',
'company',
'job_title',
# Skills and Qualifications
'skill',
'certification',
'language',
# Achievements and Projects
'achievement',
'award',
'project',
'publication',
# Additional Information
'hobby',
'link',
# General
'organization'
]
def clean_text(text):
# Remove all escape characters
cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)
# Remove any other non-printable characters
cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text)
# Replace multiple spaces with a single space
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
# Strip leading and trailing whitespace
cleaned_text = cleaned_text.strip()
return cleaned_text
def pdf2text(file_path):
with fitz.open(file_path) as doc:
text = ""
for page in doc:
text += page.get_text()
return clean_text(text)
def ner(text) :
return {
"text": text,
"entities": [
{
"entity": entity["label"],
"word": entity["text"],
"start": entity["start"],
"end": entity["end"],
"score": 0,
}
for entity in model.predict_entities(
text, labels, flat_ner=False, threshold=0.27
)
],
}
def parser(file_path):
text = pdf2text(file_path)
return ner(text)
# Define a custom CSS style
custom_css = """
body {
background-color: #f0f8ff;
font-family: 'Arial', sans-serif;
}
.container {
margin: auto;
padding: 20px;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
h1 {
color: #3d1ad9;
text-align: center;
}
#file_upload {
display: flex;
justify-content: center;
margin-bottom: 20px;
}
"""
with gr.Blocks(css=custom_css) as demo:
gr.HTML("<h1>AI-Powered Resume Parser</h1>")
gr.HTML("<p style='text-align: center;'>This tool uses advanced NLP techniques to extract key information from your resume.</p>")
with gr.Row():
file_input = gr.File(label="Upload Resume",
file_types=['.pdf'],
elem_id="file_upload"
)
with gr.Row():
parse_button = gr.Button("Parse Resume")
with gr.Row():
output = gr.HighlightedText(label="Parsed Resume",
combine_adjacent=True
)
parse_button.click(fn=parser, inputs=file_input, outputs=output)
gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>")
# Launch the interface
demo.queue()
demo.launch(share=True, debug=True)