Manojajj commited on
Commit
71bc45c
·
verified ·
1 Parent(s): d59b28c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -14
app.py CHANGED
@@ -1,22 +1,24 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import pdfplumber
4
  import re
5
  import openpyxl
 
6
  from huggingface_hub import login
7
 
8
  # Function to authenticate Hugging Face using token
9
  def authenticate_hf(token):
10
  try:
11
- login(token) # Log in to Hugging Face
12
- # Once logged in, initialize the model and tokenizer
13
- model_name = "meta-llama/Llama-3.1-70B-Instruct" # Replace with your model name
14
- tokenizer = AutoTokenizer.from_pretrained(model_name)
15
- model = AutoModelForCausalLM.from_pretrained(model_name)
16
  return "Authentication Successful"
17
  except Exception as e:
18
  return f"Error: {e}"
19
 
 
 
 
 
 
20
  # Function to extract text from PDF
21
  def extract_text_from_pdf(pdf_path):
22
  with pdfplumber.open(pdf_path) as pdf:
@@ -26,7 +28,7 @@ def extract_text_from_pdf(pdf_path):
26
  return text
27
 
28
  # Function to parse the resume text for name, email, phone, and skills
29
- def parse_resume(text, tokenizer, model):
30
  # Define the prompts for each type of information
31
  prompts = {
32
  "name": "Extract the name from this resume:\n",
@@ -49,7 +51,7 @@ def parse_resume(text, tokenizer, model):
49
  results[key] = email[0] if email else None
50
  elif key == 'phone':
51
  # Use regex to validate phone number format
52
- phone = re.findall(r'\b\d{8,12}\b', response)
53
  results[key] = phone[0] if phone else None
54
  elif key == 'skills':
55
  # Extract technical skills
@@ -71,7 +73,7 @@ def save_to_excel(parsed_data, output_file):
71
  wb.save(output_file)
72
 
73
  # Function to process PDF files and output an Excel file
74
- def process_pdfs(pdfs, tokenizer, model):
75
  parsed_data = []
76
 
77
  for pdf in pdfs:
@@ -79,7 +81,7 @@ def process_pdfs(pdfs, tokenizer, model):
79
  text = extract_text_from_pdf(pdf.name)
80
 
81
  # Parse the text for relevant details
82
- parsed_info = parse_resume(text, tokenizer, model)
83
 
84
  # Add parsed information to the list
85
  parsed_data.append(parsed_info)
@@ -104,13 +106,13 @@ with gr.Blocks() as app:
104
 
105
  gr.Markdown("### Upload PDF Resumes")
106
 
107
- # File input to upload resumes (use "file" for multiple file uploads)
108
- pdfs_input = gr.File(file_count="multiple", type="file")
109
  output_file = gr.File()
110
 
111
- # Process the PDFs and parse them once authenticated
112
  process_button = gr.Button("Process Resumes")
113
- process_button.click(process_pdfs, inputs=[pdfs_input], outputs=[output_file])
114
 
115
  # Launch the app
116
  app.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import pdfplumber
4
  import re
5
  import openpyxl
6
+ import os
7
  from huggingface_hub import login
8
 
9
  # Function to authenticate Hugging Face using token
10
  def authenticate_hf(token):
11
  try:
12
+ login(token)
 
 
 
 
13
  return "Authentication Successful"
14
  except Exception as e:
15
  return f"Error: {e}"
16
 
17
+ # Initialize the model and tokenizer
18
+ model_name = "meta-llama/Llama-3.1-70B-Instruct" # Replace with the actual model name
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ model = AutoModelForCausalLM.from_pretrained(model_name)
21
+
22
  # Function to extract text from PDF
23
  def extract_text_from_pdf(pdf_path):
24
  with pdfplumber.open(pdf_path) as pdf:
 
28
  return text
29
 
30
  # Function to parse the resume text for name, email, phone, and skills
31
+ def parse_resume(text):
32
  # Define the prompts for each type of information
33
  prompts = {
34
  "name": "Extract the name from this resume:\n",
 
51
  results[key] = email[0] if email else None
52
  elif key == 'phone':
53
  # Use regex to validate phone number format
54
+ phone = re.findall(r'\b\d{10,15}\b', response)
55
  results[key] = phone[0] if phone else None
56
  elif key == 'skills':
57
  # Extract technical skills
 
73
  wb.save(output_file)
74
 
75
  # Function to process PDF files and output an Excel file
76
+ def process_pdfs(pdfs):
77
  parsed_data = []
78
 
79
  for pdf in pdfs:
 
81
  text = extract_text_from_pdf(pdf.name)
82
 
83
  # Parse the text for relevant details
84
+ parsed_info = parse_resume(text)
85
 
86
  # Add parsed information to the list
87
  parsed_data.append(parsed_info)
 
106
 
107
  gr.Markdown("### Upload PDF Resumes")
108
 
109
+ # File input to upload resumes (use "filepath" for type)
110
+ pdfs_input = gr.File(file_count="multiple", type="filepath")
111
  output_file = gr.File()
112
 
113
+ # Process the PDFs and parse them
114
  process_button = gr.Button("Process Resumes")
115
+ process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file)
116
 
117
  # Launch the app
118
  app.launch()