Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,112 +1,117 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import io
|
3 |
import os
|
|
|
4 |
import PyPDF2
|
5 |
-
from gradio.components import File, Textbox
|
6 |
from langchain_openai import ChatOpenAI
|
7 |
from langchain.chains import LLMChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from langchain import PromptTemplate
|
10 |
-
|
11 |
-
from
|
12 |
|
13 |
def extract_text_from_pdf_binary(pdf_binary):
|
14 |
-
"""Extracts text from a PDF file binary."""
|
15 |
text = ""
|
16 |
pdf_data = io.BytesIO(pdf_binary)
|
17 |
reader = PyPDF2.PdfReader(pdf_data)
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
21 |
text += page_text
|
22 |
return text
|
23 |
|
24 |
-
def
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
Parameters:
|
29 |
-
- resume_text (str): Text of the resume.
|
30 |
-
- job_description (str): Text of the job description.
|
31 |
-
|
32 |
-
Returns:
|
33 |
-
- score (float): Similarity score between the resume and job description.
|
34 |
-
"""
|
35 |
-
vectorizer = TfidfVectorizer()
|
36 |
-
tfidf_matrix = vectorizer.fit_transform([resume_text, job_description])
|
37 |
-
score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
|
38 |
-
return score
|
39 |
-
|
40 |
-
def format_resume_and_score(api_key, file_content, job_description):
|
41 |
-
"""Formats the content of a resume PDF file to YAML and calculates its relevance to a job description."""
|
42 |
if not file_content:
|
43 |
raise ValueError("The uploaded file is empty.")
|
44 |
|
45 |
-
# Set the OpenAI API key
|
46 |
-
os.environ['OPENAI_API_KEY'] = api_key
|
47 |
resume_text = extract_text_from_pdf_binary(file_content)
|
48 |
|
49 |
-
# Formatting the resume to YAML
|
50 |
template = """Format the provided resume to this YAML template:
|
51 |
---
|
52 |
name: ''
|
53 |
phoneNumbers:
|
54 |
-
|
55 |
websites:
|
56 |
-
|
57 |
emails:
|
58 |
-
|
59 |
dateOfBirth: ''
|
60 |
addresses:
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
summary: ''
|
67 |
education:
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
workExperience:
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
skills:
|
79 |
-
|
80 |
certifications:
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
memory = ConversationBufferMemory(memory_key="resume_text")
|
85 |
|
86 |
-
|
87 |
-
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
def main():
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
96 |
iface = gr.Interface(
|
97 |
-
fn=
|
98 |
-
inputs=[
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
],
|
103 |
-
outputs=[
|
104 |
-
Textbox(label="Formatted Resume in YAML"),
|
105 |
-
Textbox(label="Resume Score")
|
106 |
-
],
|
107 |
-
title="Resume Formatter and Relevance Scorer",
|
108 |
-
description="Upload a PDF resume, paste the job description, and enter your OpenAI API key to format your resume to a YAML template and score its relevance to the job."
|
109 |
)
|
|
|
110 |
iface.launch(debug=True, share=True)
|
111 |
|
112 |
if __name__ == "__main__":
|
|
|
1 |
+
\import gradio as gr
|
|
|
2 |
import os
|
3 |
+
import io
|
4 |
import PyPDF2
|
|
|
5 |
from langchain_openai import ChatOpenAI
|
6 |
from langchain.chains import LLMChain
|
7 |
from langchain.memory import ConversationBufferMemory
|
8 |
from langchain import PromptTemplate
|
9 |
+
|
10 |
+
from gradio.components import File, Textbox, Slider
|
11 |
|
12 |
def extract_text_from_pdf_binary(pdf_binary):
|
|
|
13 |
text = ""
|
14 |
pdf_data = io.BytesIO(pdf_binary)
|
15 |
reader = PyPDF2.PdfReader(pdf_data)
|
16 |
+
num_pages = len(reader.pages)
|
17 |
+
|
18 |
+
for page in range(num_pages):
|
19 |
+
current_page = reader.pages[page]
|
20 |
+
page_text = current_page.extract_text()
|
21 |
+
if page_text: # Check if page_text is not None or empty
|
22 |
text += page_text
|
23 |
return text
|
24 |
|
25 |
+
def format_resume_to_yaml(api_key, file_content):
|
26 |
+
os.environ['OPENAI_API_KEY'] = api_key
|
27 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
if not file_content:
|
29 |
raise ValueError("The uploaded file is empty.")
|
30 |
|
|
|
|
|
31 |
resume_text = extract_text_from_pdf_binary(file_content)
|
32 |
|
|
|
33 |
template = """Format the provided resume to this YAML template:
|
34 |
---
|
35 |
name: ''
|
36 |
phoneNumbers:
|
37 |
+
- ''
|
38 |
websites:
|
39 |
+
- ''
|
40 |
emails:
|
41 |
+
- ''
|
42 |
dateOfBirth: ''
|
43 |
addresses:
|
44 |
+
- street: ''
|
45 |
+
city: ''
|
46 |
+
state: ''
|
47 |
+
zip: ''
|
48 |
+
country: ''
|
49 |
summary: ''
|
50 |
education:
|
51 |
+
- school: ''
|
52 |
+
degree: ''
|
53 |
+
fieldOfStudy: ''
|
54 |
+
startDate: ''
|
55 |
+
endDate: ''
|
56 |
workExperience:
|
57 |
+
- company: ''
|
58 |
+
position: ''
|
59 |
+
startDate: ''
|
60 |
+
endDate: ''
|
61 |
skills:
|
62 |
+
- name: ''
|
63 |
certifications:
|
64 |
+
- name: ''
|
65 |
+
{chat_history}
|
66 |
+
{human_input}"""
|
|
|
67 |
|
68 |
+
prompt = PromptTemplate(
|
69 |
+
input_variables=["chat_history", "human_input"],
|
70 |
+
template=template
|
71 |
+
)
|
72 |
|
73 |
+
memory = ConversationBufferMemory(memory_key="chat_history")
|
74 |
+
|
75 |
+
llm_chain = LLMChain(
|
76 |
+
llm=ChatOpenAI(model="gpt-3.5-turbo"),
|
77 |
+
prompt=prompt,
|
78 |
+
verbose=True,
|
79 |
+
memory=memory,
|
80 |
+
)
|
81 |
|
82 |
+
res = llm_chain.predict(human_input=resume_text)
|
83 |
+
return res
|
84 |
+
|
85 |
+
def match_resume_to_job_description(api_key, resume_file_content, job_description):
|
86 |
+
os.environ['OPENAI_API_KEY'] = api_key
|
87 |
+
|
88 |
+
if not resume_file_content or not job_description:
|
89 |
+
raise ValueError("The uploaded file or job description is empty.")
|
90 |
+
|
91 |
+
resume_text = extract_text_from_pdf_binary(resume_file_content)
|
92 |
+
|
93 |
+
prompt = f"Given the following resume text:\n{resume_text}\n\nAnd the job description:\n{job_description}\n\nEvaluate how well the resume matches the job description and provide a matching score from 0 to 100, where 100 is a perfect match."
|
94 |
+
|
95 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo")
|
96 |
+
response = llm.predict(prompt=prompt)
|
97 |
+
|
98 |
+
return response
|
99 |
|
100 |
def main():
|
101 |
+
input_api_key = Textbox(label="Enter your OpenAI API Key")
|
102 |
+
input_pdf_file = File(label="Upload your PDF resume", type="binary")
|
103 |
+
input_job_description = Textbox(label="Enter the job description", placeholder="Paste the job description here")
|
104 |
+
output_yaml = Textbox(label="Formatted Resume in YAML")
|
105 |
+
output_match_score = Textbox(label="Resume Match Score")
|
106 |
+
|
107 |
iface = gr.Interface(
|
108 |
+
fn=[format_resume_to_yaml, match_resume_to_job_description],
|
109 |
+
inputs=[input_api_key, input_pdf_file, input_job_description],
|
110 |
+
outputs=[output_yaml, output_match_score],
|
111 |
+
title="Resume to YAML Formatter and Matcher",
|
112 |
+
description="Upload a PDF resume and enter your OpenAI API key to get it formatted to a YAML template and matched to a job description.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
)
|
114 |
+
|
115 |
iface.launch(debug=True, share=True)
|
116 |
|
117 |
if __name__ == "__main__":
|