Manojajj commited on
Commit
d59b28c
·
verified ·
1 Parent(s): fb92761

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -16
app.py CHANGED
@@ -1,24 +1,22 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import pdfplumber
4
  import re
5
  import openpyxl
6
- import os
7
  from huggingface_hub import login
8
 
9
  # Function to authenticate Hugging Face using token
10
  def authenticate_hf(token):
11
  try:
12
- login(token)
 
 
 
 
13
  return "Authentication Successful"
14
  except Exception as e:
15
  return f"Error: {e}"
16
 
17
- # Initialize the model and tokenizer
18
- model_name = "meta-llama/Llama-3.1-70B-Instruct" # Replace with the actual model name
19
- tokenizer = AutoTokenizer.from_pretrained(model_name)
20
- model = AutoModelForCausalLM.from_pretrained(model_name)
21
-
22
  # Function to extract text from PDF
23
  def extract_text_from_pdf(pdf_path):
24
  with pdfplumber.open(pdf_path) as pdf:
@@ -28,7 +26,7 @@ def extract_text_from_pdf(pdf_path):
28
  return text
29
 
30
  # Function to parse the resume text for name, email, phone, and skills
31
- def parse_resume(text):
32
  # Define the prompts for each type of information
33
  prompts = {
34
  "name": "Extract the name from this resume:\n",
@@ -51,7 +49,7 @@ def parse_resume(text):
51
  results[key] = email[0] if email else None
52
  elif key == 'phone':
53
  # Use regex to validate phone number format
54
- phone = re.findall(r'\b\d{10,15}\b', response)
55
  results[key] = phone[0] if phone else None
56
  elif key == 'skills':
57
  # Extract technical skills
@@ -73,7 +71,7 @@ def save_to_excel(parsed_data, output_file):
73
  wb.save(output_file)
74
 
75
  # Function to process PDF files and output an Excel file
76
- def process_pdfs(pdfs):
77
  parsed_data = []
78
 
79
  for pdf in pdfs:
@@ -81,7 +79,7 @@ def process_pdfs(pdfs):
81
  text = extract_text_from_pdf(pdf.name)
82
 
83
  # Parse the text for relevant details
84
- parsed_info = parse_resume(text)
85
 
86
  # Add parsed information to the list
87
  parsed_data.append(parsed_info)
@@ -106,13 +104,13 @@ with gr.Blocks() as app:
106
 
107
  gr.Markdown("### Upload PDF Resumes")
108
 
109
- # File input to upload resumes (use "filepath" or "binary" for type)
110
- pdfs_input = gr.File(file_count="multiple", type="filepath")
111
  output_file = gr.File()
112
 
113
- # Process the PDFs and parse them
114
  process_button = gr.Button("Process Resumes")
115
- process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file)
116
 
117
  # Launch the app
118
  app.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import pdfplumber
4
  import re
5
  import openpyxl
 
6
  from huggingface_hub import login
7
 
8
  # Function to authenticate Hugging Face using token
9
  def authenticate_hf(token):
10
  try:
11
+ login(token) # Log in to Hugging Face
12
+ # Once logged in, initialize the model and tokenizer
13
+ model_name = "meta-llama/Llama-3.1-70B-Instruct" # Replace with your model name
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForCausalLM.from_pretrained(model_name)
16
  return "Authentication Successful"
17
  except Exception as e:
18
  return f"Error: {e}"
19
 
 
 
 
 
 
20
  # Function to extract text from PDF
21
  def extract_text_from_pdf(pdf_path):
22
  with pdfplumber.open(pdf_path) as pdf:
 
26
  return text
27
 
28
  # Function to parse the resume text for name, email, phone, and skills
29
+ def parse_resume(text, tokenizer, model):
30
  # Define the prompts for each type of information
31
  prompts = {
32
  "name": "Extract the name from this resume:\n",
 
49
  results[key] = email[0] if email else None
50
  elif key == 'phone':
51
  # Use regex to validate phone number format
52
+ phone = re.findall(r'\b\d{8,12}\b', response)
53
  results[key] = phone[0] if phone else None
54
  elif key == 'skills':
55
  # Extract technical skills
 
71
  wb.save(output_file)
72
 
73
  # Function to process PDF files and output an Excel file
74
+ def process_pdfs(pdfs, tokenizer, model):
75
  parsed_data = []
76
 
77
  for pdf in pdfs:
 
79
  text = extract_text_from_pdf(pdf.name)
80
 
81
  # Parse the text for relevant details
82
+ parsed_info = parse_resume(text, tokenizer, model)
83
 
84
  # Add parsed information to the list
85
  parsed_data.append(parsed_info)
 
104
 
105
  gr.Markdown("### Upload PDF Resumes")
106
 
107
+ # File input to upload resumes (use "file" for multiple file uploads)
108
+ pdfs_input = gr.File(file_count="multiple", type="file")
109
  output_file = gr.File()
110
 
111
+ # Process the PDFs and parse them once authenticated
112
  process_button = gr.Button("Process Resumes")
113
+ process_button.click(process_pdfs, inputs=[pdfs_input], outputs=[output_file])
114
 
115
  # Launch the app
116
  app.launch()