Spaces:

Manojajj
/

resume_parser_llama

Sleeping

App Files Files Community

Manojajj commited on Dec 2, 2024

Commit

d59b28c

verified ·

1 Parent(s): fb92761

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -16

app.py CHANGED Viewed

@@ -1,24 +1,22 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import pdfplumber
 import re
 import openpyxl
-import os
 from huggingface_hub import login
 # Function to authenticate Hugging Face using token
 def authenticate_hf(token):
     try:
-        login(token)
         return "Authentication Successful"
     except Exception as e:
         return f"Error: {e}"
-# Initialize the model and tokenizer
-model_name = "meta-llama/Llama-3.1-70B-Instruct"  # Replace with the actual model name
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_path):
     with pdfplumber.open(pdf_path) as pdf:
@@ -28,7 +26,7 @@ def extract_text_from_pdf(pdf_path):
     return text
 # Function to parse the resume text for name, email, phone, and skills
-def parse_resume(text):
     # Define the prompts for each type of information
     prompts = {
         "name": "Extract the name from this resume:\n",
@@ -51,7 +49,7 @@ def parse_resume(text):
             results[key] = email[0] if email else None
         elif key == 'phone':
             # Use regex to validate phone number format
-            phone = re.findall(r'\b\d{10,15}\b', response)
             results[key] = phone[0] if phone else None
         elif key == 'skills':
             # Extract technical skills
@@ -73,7 +71,7 @@ def save_to_excel(parsed_data, output_file):
     wb.save(output_file)
 # Function to process PDF files and output an Excel file
-def process_pdfs(pdfs):
     parsed_data = []
     for pdf in pdfs:
@@ -81,7 +79,7 @@ def process_pdfs(pdfs):
         text = extract_text_from_pdf(pdf.name)
         # Parse the text for relevant details
-        parsed_info = parse_resume(text)
         # Add parsed information to the list
         parsed_data.append(parsed_info)
@@ -106,13 +104,13 @@ with gr.Blocks() as app:
     gr.Markdown("### Upload PDF Resumes")
-    # File input to upload resumes (use "filepath" or "binary" for type)
-    pdfs_input = gr.File(file_count="multiple", type="filepath")
     output_file = gr.File()
-    # Process the PDFs and parse them
     process_button = gr.Button("Process Resumes")
-    process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file)
 # Launch the app
 app.launch()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import pdfplumber
 import re
 import openpyxl
 from huggingface_hub import login
 # Function to authenticate Hugging Face using token
 def authenticate_hf(token):
     try:
+        login(token)  # Log in to Hugging Face
+        # Once logged in, initialize the model and tokenizer
+        model_name = "meta-llama/Llama-3.1-70B-Instruct"  # Replace with your model name
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(model_name)
         return "Authentication Successful"
     except Exception as e:
         return f"Error: {e}"
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_path):
     with pdfplumber.open(pdf_path) as pdf:
     return text
 # Function to parse the resume text for name, email, phone, and skills
+def parse_resume(text, tokenizer, model):
     # Define the prompts for each type of information
     prompts = {
         "name": "Extract the name from this resume:\n",
             results[key] = email[0] if email else None
         elif key == 'phone':
             # Use regex to validate phone number format
+            phone = re.findall(r'\b\d{8,12}\b', response)
             results[key] = phone[0] if phone else None
         elif key == 'skills':
             # Extract technical skills
     wb.save(output_file)
 # Function to process PDF files and output an Excel file
+def process_pdfs(pdfs, tokenizer, model):
     parsed_data = []
     for pdf in pdfs:
         text = extract_text_from_pdf(pdf.name)
         # Parse the text for relevant details
+        parsed_info = parse_resume(text, tokenizer, model)
         # Add parsed information to the list
         parsed_data.append(parsed_info)
     gr.Markdown("### Upload PDF Resumes")
+    # File input to upload resumes (use "file" for multiple file uploads)
+    pdfs_input = gr.File(file_count="multiple", type="file")
     output_file = gr.File()
+    # Process the PDFs and parse them once authenticated
     process_button = gr.Button("Process Resumes")
+    process_button.click(process_pdfs, inputs=[pdfs_input], outputs=[output_file])
 # Launch the app
 app.launch()