Manojajj commited on
Commit
c0605d9
·
verified ·
1 Parent(s): 67ba08f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -2,7 +2,12 @@ import gradio as gr
2
  import pdfplumber
3
  import re
4
  import openpyxl
5
- from transformers import pipeline
 
 
 
 
 
6
 
7
  # Function to extract text from PDF
8
  def extract_text_from_pdf(pdf_path):
@@ -12,10 +17,6 @@ def extract_text_from_pdf(pdf_path):
12
  text += page.extract_text()
13
  return text
14
 
15
- # Load Llama model for parsing (replace with actual model path if available)
16
- model_name = "meta-llama/Llama-3.1-70B-Instruct" # Replace with the model path or identifier
17
- nlp = pipeline("text2text-generation", model=model_name)
18
-
19
  # Function to parse the resume text for name, email, phone, and skills
20
  def parse_resume(text):
21
  # Define the prompts for each type of information
@@ -29,22 +30,24 @@ def parse_resume(text):
29
  results = {}
30
 
31
  for key, prompt in prompts.items():
32
- # Get the model response for each field
33
- response = nlp(prompt + text)
34
-
 
 
35
  if key == 'email':
36
  # Use regex to validate email format
37
- email = re.findall(r'\S+@\S+', response[0]['generated_text'])
38
  results[key] = email[0] if email else None
39
  elif key == 'phone':
40
  # Use regex to validate phone number format
41
- phone = re.findall(r'\b\d{10,15}\b', response[0]['generated_text'])
42
  results[key] = phone[0] if phone else None
43
  elif key == 'skills':
44
  # Extract technical skills
45
- results[key] = response[0]['generated_text']
46
  else:
47
- results[key] = response[0]['generated_text']
48
 
49
  return results
50
 
@@ -79,12 +82,15 @@ def process_pdfs(pdfs):
79
 
80
  return output_file
81
 
82
- # Gradio interface setup
83
  iface = gr.Interface(
84
  fn=process_pdfs,
85
  inputs=gr.File(file_count="multiple", type="file"),
86
  outputs=gr.File(),
87
- live=True
 
 
 
88
  )
89
 
90
  # Launch the Gradio app
 
2
  import pdfplumber
3
  import re
4
  import openpyxl
5
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
6
+
7
+ # Authenticate Hugging Face API (ensure you're logged in already)
8
+ model_name = "meta-llama/Llama-3.1-70B-Instruct" # Replace with your actual model name
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForCausalLM.from_pretrained(model_name)
11
 
12
  # Function to extract text from PDF
13
  def extract_text_from_pdf(pdf_path):
 
17
  text += page.extract_text()
18
  return text
19
 
 
 
 
 
20
  # Function to parse the resume text for name, email, phone, and skills
21
  def parse_resume(text):
22
  # Define the prompts for each type of information
 
30
  results = {}
31
 
32
  for key, prompt in prompts.items():
33
+ # Generate model response for each field
34
+ inputs = tokenizer(prompt + text, return_tensors="pt")
35
+ outputs = model.generate(**inputs, max_length=500)
36
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+
38
  if key == 'email':
39
  # Use regex to validate email format
40
+ email = re.findall(r'\S+@\S+', response)
41
  results[key] = email[0] if email else None
42
  elif key == 'phone':
43
  # Use regex to validate phone number format
44
+ phone = re.findall(r'\b\d{10,15}\b', response)
45
  results[key] = phone[0] if phone else None
46
  elif key == 'skills':
47
  # Extract technical skills
48
+ results[key] = response
49
  else:
50
+ results[key] = response
51
 
52
  return results
53
 
 
82
 
83
  return output_file
84
 
85
+ # Gradio interface setup with blank API space (Hugging Face integration)
86
  iface = gr.Interface(
87
  fn=process_pdfs,
88
  inputs=gr.File(file_count="multiple", type="file"),
89
  outputs=gr.File(),
90
+ live=True,
91
+ title="AI Resume Parser",
92
+ description="Upload PDF resumes, and the app will parse and extract Name, Email, Phone, and Skills from them.",
93
+ examples=[["path_to_sample_resume.pdf"]] # Provide sample files if necessary
94
  )
95
 
96
  # Launch the Gradio app