Spaces:

KrishGoyani
/

GLiNER_Resume_Parser

Runtime error

App Files Files Community

KrishGoyani commited on Jun 21, 2024

Commit

6beb322

verified ·

1 Parent(s): df6c380

Create app.py

Browse files

Files changed (1) hide show

app.py +145 -0

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+from gliner import GLiNER
+import re
+import fitz
+import gradio as gr
+model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True)
+label = [
+    # Personal Information
+    'person',
+    'date_of_birth',
+    'address',
+    'mobile_phone_number',
+    'email_address',
+    'social_media_handle',
+    # Education
+    'education',
+    'degree',
+    'graduation_year',
+    'grade',
+    # Work Experience
+    'work_experience',
+    'company',
+    'job_title',
+    # Skills and Qualifications
+    'skill',
+    'certification',
+    'language',
+    # Achievements and Projects
+    'achievement',
+    'award',
+    'project',
+    'publication',
+    # Additional Information
+    'hobby',
+    'link',
+    # General
+    'organization'
+]
+def clean_text(text):
+    # Remove all escape characters
+    cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)
+    # Remove any other non-printable characters
+    cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text)
+    # Replace multiple spaces with a single space
+    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
+    # Strip leading and trailing whitespace
+    cleaned_text = cleaned_text.strip()
+    return cleaned_text
+def pdf2text(file_path):
+  with fitz.open(file_path) as doc:
+      text = ""
+      for page in doc:
+          text += page.get_text()
+  return clean_text(text)
+def ner(tex) :
+    return {
+        "text": text,
+        "entities": [
+            {
+                "entity": entity["label"],
+                "word": entity["text"],
+                "start": entity["start"],
+                "end": entity["end"],
+                "score": 0,
+            }
+            for entity in model.predict_entities(
+                text, labels, flat_ner=not nested_ner=True, threshold=0.27
+            )
+        ],
+    }
+def parser(file_path):
+  text = pdf2text(file_path)
+  return ner(text)
+# Define a custom CSS style
+custom_css = """
+body {
+    background-color: #f0f8ff;
+    font-family: 'Arial', sans-serif;
+}
+.container {
+    margin: auto;
+    padding: 20px;
+    border-radius: 10px;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+h1 {
+    color: #3d1ad9;
+    text-align: center;
+}
+#file_upload {
+    display: flex;
+    justify-content: center;
+    margin-bottom: 20px;
+}
+"""
+with gr.Blocks(css=custom_css) as demo:
+    gr.HTML("<h1>AI-Powered Resume Parser</h1>")
+    gr.HTML("<p style='text-align: center;'>This tool uses advanced NLP techniques to extract key information from your resume.</p>")
+    with gr.Row():
+        file_input = gr.File(label="Upload Resume",
+                file_types=['.pdf'],
+                 elem_id="file_upload"
+                )
+    with gr.Row():
+        parse_button = gr.Button("Parse Resume")
+    with gr.Row():
+        output = gr.HighlightedText(label="Parsed Resume",
+                           combine_adjacent=True
+                           )
+    parse_button.click(fn=parser, inputs=file_input, outputs=output)
+    gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>")
+# Launch the interface
+demo.queue()
+demo.launch(share=True, debug=True)