File size: 3,280 Bytes
6beb322
 
 
 
 
 
 
eff3e02
6beb322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239f80d
6beb322
 
 
 
 
 
 
 
 
 
 
56622b3
6beb322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from gliner import GLiNER
import re
import fitz 
import gradio as gr

model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True)

labels = [
    # Personal Information
    'person',
    'date_of_birth',
    'address',
    'mobile_phone_number',
    'email_address',
    'social_media_handle',

    # Education
    'education',
    'degree',
    'graduation_year',
    'grade',

    # Work Experience
    'work_experience',
    'company',
    'job_title',

    # Skills and Qualifications
    'skill',
    'certification',
    'language',

    # Achievements and Projects
    'achievement',
    'award',
    'project',
    'publication',

    # Additional Information
    'hobby',
    'link',

    # General
    'organization'
]



def clean_text(text):
    # Remove all escape characters
    cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)

    # Remove any other non-printable characters
    cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text)

    # Replace multiple spaces with a single space
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)

    # Strip leading and trailing whitespace
    cleaned_text = cleaned_text.strip()

    return cleaned_text


def pdf2text(file_path):

  with fitz.open(file_path) as doc:
      text = ""
      for page in doc:
          text += page.get_text()

  return clean_text(text)


def ner(text) :
    return {
        "text": text,
        "entities": [
            {
                "entity": entity["label"],
                "word": entity["text"],
                "start": entity["start"],
                "end": entity["end"],
                "score": 0,
            }
            for entity in model.predict_entities(
                text, labels, flat_ner=False, threshold=0.27
            )
        ],
    }

def parser(file_path):
  text = pdf2text(file_path)
  return ner(text)


# Define a custom CSS style
custom_css = """
body {
    background-color: #f0f8ff;
    font-family: 'Arial', sans-serif;
}
.container {
    margin: auto;
    padding: 20px;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
h1 {
    color: #3d1ad9;
    text-align: center;
}
#file_upload {
    display: flex;
    justify-content: center;
    margin-bottom: 20px;
}
"""


with gr.Blocks(css=custom_css) as demo:
    gr.HTML("<h1>AI-Powered Resume Parser</h1>")
    gr.HTML("<p style='text-align: center;'>This tool uses advanced NLP techniques to extract key information from your resume.</p>")
    
    with gr.Row():
        file_input = gr.File(label="Upload Resume",
                file_types=['.pdf'],
                 elem_id="file_upload"
                )
        
    with gr.Row():
        parse_button = gr.Button("Parse Resume")

    with gr.Row():
        output = gr.HighlightedText(label="Parsed Resume",
                           combine_adjacent=True
                           )

    parse_button.click(fn=parser, inputs=file_input, outputs=output)

    gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>")
    
# Launch the interface
demo.queue()
demo.launch(share=True, debug=True)