Spaces:

KrishGoyani
/

GLiNER_Resume_Parser

Runtime error

File size: 3,280 Bytes

from gliner import GLiNER
import re
import fitz 
import gradio as gr

model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True)

labels = [
    # Personal Information
    'person',
    'date_of_birth',
    'address',
    'mobile_phone_number',
    'email_address',
    'social_media_handle',

    # Education
    'education',
    'degree',
    'graduation_year',
    'grade',

    # Work Experience
    'work_experience',
    'company',
    'job_title',

    # Skills and Qualifications
    'skill',
    'certification',
    'language',

    # Achievements and Projects
    'achievement',
    'award',
    'project',
    'publication',

    # Additional Information
    'hobby',
    'link',

    # General
    'organization'
]



def clean_text(text):
    # Remove all escape characters
    cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)

    # Remove any other non-printable characters
    cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text)

    # Replace multiple spaces with a single space
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)

    # Strip leading and trailing whitespace
    cleaned_text = cleaned_text.strip()

    return cleaned_text


def pdf2text(file_path):

  with fitz.open(file_path) as doc:
      text = ""
      for page in doc:
          text += page.get_text()

  return clean_text(text)


def ner(text) :
    return {
        "text": text,
        "entities": [
            {
                "entity": entity["label"],
                "word": entity["text"],
                "start": entity["start"],
                "end": entity["end"],
                "score": 0,
            }
            for entity in model.predict_entities(
                text, labels, flat_ner=False, threshold=0.27
            )
        ],
    }

def parser(file_path):
  text = pdf2text(file_path)
  return ner(text)


# Define a custom CSS style
custom_css = """
body {
    background-color: #f0f8ff;
    font-family: 'Arial', sans-serif;
}
.container {
    margin: auto;
    padding: 20px;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
h1 {
    color: #3d1ad9;
    text-align: center;
}
#file_upload {
    display: flex;
    justify-content: center;
    margin-bottom: 20px;
}
"""


with gr.Blocks(css=custom_css) as demo:
    gr.HTML("<h1>AI-Powered Resume Parser</h1>")
    gr.HTML("<p style='text-align: center;'>This tool uses advanced NLP techniques to extract key information from your resume.</p>")
    
    with gr.Row():
        file_input = gr.File(label="Upload Resume",
                file_types=['.pdf'],
                 elem_id="file_upload"
                )
        
    with gr.Row():
        parse_button = gr.Button("Parse Resume")

    with gr.Row():
        output = gr.HighlightedText(label="Parsed Resume",
                           combine_adjacent=True
                           )

    parse_button.click(fn=parser, inputs=file_input, outputs=output)

    gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>")
    
# Launch the interface
demo.queue()
demo.launch(share=True, debug=True)