KrishGoyani's picture
Update app.py
3be6673 verified
raw
history blame
3.3 kB
from gliner import GLiNER
import re
import fitz
import gradio as gr
model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True)
def clean_text(text):
# Remove all escape characters
cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)
# Remove any other non-printable characters
cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text)
# Replace multiple spaces with a single space
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
# Strip leading and trailing whitespace
cleaned_text = cleaned_text.strip()
return cleaned_text
def pdf2text(file_path):
with fitz.open(file_path) as doc:
text = ""
for page in doc:
text += page.get_text()
return clean_text(text)
def ner(text, labels, threshold) :
labels = labels.split(",")
labels = [label.strip() for label in labels]
print(labels)
return {
"text": text,
"entities": [
{
"entity": entity["label"],
"word": entity["text"],
"start": entity["start"],
"end": entity["end"],
"score": 0,
}
for entity in model.predict_entities(
text, labels, flat_ner=False, threshold=threshold
)
],
}
def parser(file_path, labels, threshold):
text = pdf2text(file_path)
return ner(text, labels, threshold)
# Define a custom CSS style
custom_css = """
body {
background-color: #f0f8ff;
font-family: 'Arial', sans-serif;
}
.container {
margin: auto;
padding: 20px;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
h1 {
color: #3d1ad9;
text-align: center;
}
#file_upload {
display: flex;
justify-content: center;
margin-bottom: 20px;
}
"""
with gr.Blocks(css=custom_css) as demo:
gr.HTML("<h1>AI-Powered Resume Parser</h1>")
gr.HTML("<p style='text-align: center;'>This tool uses advanced NLP techniques to extract key information from your resume.</p>")
with gr.Row() as row:
labels = gr.Textbox(
label="Labels",
placeholder="Enter your labels here (comma separated)",
scale=2,
)
threshold = gr.Slider(
0,
1,
value=0.3,
step=0.01,
label="Threshold",
info="Lower the threshold to increase how many entities get predicted.",
scale=0,
)
with gr.Row():
file_input = gr.File(label="Upload Resume",
file_types=['.pdf'],
elem_id="file_upload"
)
with gr.Row():
parse_button = gr.Button("Parse Resume")
with gr.Row():
output = gr.HighlightedText(label="Parsed Resume",
combine_adjacent=True
)
parse_button.click(fn=parser, inputs=[file_input,labels, threshold], outputs=output)
gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>")
# Launch the interface
demo.queue()
demo.launch(share=True, debug=True)