KrishGoyani commited on
Commit
6beb322
·
verified ·
1 Parent(s): df6c380

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gliner import GLiNER
2
+ import re
3
+ import fitz
4
+ import gradio as gr
5
+
6
+ model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True)
7
+
8
+ label = [
9
+ # Personal Information
10
+ 'person',
11
+ 'date_of_birth',
12
+ 'address',
13
+ 'mobile_phone_number',
14
+ 'email_address',
15
+ 'social_media_handle',
16
+
17
+ # Education
18
+ 'education',
19
+ 'degree',
20
+ 'graduation_year',
21
+ 'grade',
22
+
23
+ # Work Experience
24
+ 'work_experience',
25
+ 'company',
26
+ 'job_title',
27
+
28
+ # Skills and Qualifications
29
+ 'skill',
30
+ 'certification',
31
+ 'language',
32
+
33
+ # Achievements and Projects
34
+ 'achievement',
35
+ 'award',
36
+ 'project',
37
+ 'publication',
38
+
39
+ # Additional Information
40
+ 'hobby',
41
+ 'link',
42
+
43
+ # General
44
+ 'organization'
45
+ ]
46
+
47
+
48
+
49
+ def clean_text(text):
50
+ # Remove all escape characters
51
+ cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)
52
+
53
+ # Remove any other non-printable characters
54
+ cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text)
55
+
56
+ # Replace multiple spaces with a single space
57
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
58
+
59
+ # Strip leading and trailing whitespace
60
+ cleaned_text = cleaned_text.strip()
61
+
62
+ return cleaned_text
63
+
64
+
65
+ def pdf2text(file_path):
66
+
67
+ with fitz.open(file_path) as doc:
68
+ text = ""
69
+ for page in doc:
70
+ text += page.get_text()
71
+
72
+ return clean_text(text)
73
+
74
+
75
+ def ner(tex) :
76
+ return {
77
+ "text": text,
78
+ "entities": [
79
+ {
80
+ "entity": entity["label"],
81
+ "word": entity["text"],
82
+ "start": entity["start"],
83
+ "end": entity["end"],
84
+ "score": 0,
85
+ }
86
+ for entity in model.predict_entities(
87
+ text, labels, flat_ner=not nested_ner=True, threshold=0.27
88
+ )
89
+ ],
90
+ }
91
+
92
+ def parser(file_path):
93
+ text = pdf2text(file_path)
94
+ return ner(text)
95
+
96
+
97
+ # Define a custom CSS style
98
+ custom_css = """
99
+ body {
100
+ background-color: #f0f8ff;
101
+ font-family: 'Arial', sans-serif;
102
+ }
103
+ .container {
104
+ margin: auto;
105
+ padding: 20px;
106
+ border-radius: 10px;
107
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
108
+ }
109
+ h1 {
110
+ color: #3d1ad9;
111
+ text-align: center;
112
+ }
113
+ #file_upload {
114
+ display: flex;
115
+ justify-content: center;
116
+ margin-bottom: 20px;
117
+ }
118
+ """
119
+
120
+
121
+ with gr.Blocks(css=custom_css) as demo:
122
+ gr.HTML("<h1>AI-Powered Resume Parser</h1>")
123
+ gr.HTML("<p style='text-align: center;'>This tool uses advanced NLP techniques to extract key information from your resume.</p>")
124
+
125
+ with gr.Row():
126
+ file_input = gr.File(label="Upload Resume",
127
+ file_types=['.pdf'],
128
+ elem_id="file_upload"
129
+ )
130
+
131
+ with gr.Row():
132
+ parse_button = gr.Button("Parse Resume")
133
+
134
+ with gr.Row():
135
+ output = gr.HighlightedText(label="Parsed Resume",
136
+ combine_adjacent=True
137
+ )
138
+
139
+ parse_button.click(fn=parser, inputs=file_input, outputs=output)
140
+
141
+ gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>")
142
+
143
+ # Launch the interface
144
+ demo.queue()
145
+ demo.launch(share=True, debug=True)