Spaces:
Sleeping
Sleeping
barghavani
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,86 +1,76 @@
|
|
1 |
-
from langchain_openai import ChatOpenAI
|
2 |
-
from langchain_core.prompts import PromptTemplate
|
3 |
-
from langchain.chains import LLMChain
|
4 |
-
from langchain.memory import ConversationBufferMemory
|
5 |
-
|
6 |
import gradio as gr
|
7 |
-
import os
|
8 |
import io
|
|
|
9 |
import PyPDF2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def extract_text_from_pdf_binary(pdf_binary):
|
|
|
12 |
text = ""
|
13 |
pdf_data = io.BytesIO(pdf_binary)
|
14 |
reader = PyPDF2.PdfReader(pdf_data)
|
15 |
-
|
16 |
-
|
17 |
-
current_page = reader.pages[page]
|
18 |
-
page_text = current_page.extract_text()
|
19 |
if page_text:
|
20 |
text += page_text
|
21 |
return text
|
22 |
|
23 |
-
def
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
# Add any additional fields as necessary.
|
39 |
-
}
|
40 |
|
41 |
-
def format_resume_to_yaml(api_key, file_content,
|
42 |
-
|
43 |
if not file_content:
|
44 |
raise ValueError("The uploaded file is empty.")
|
45 |
-
resume_text = extract_text_from_pdf_binary(file_content)
|
46 |
-
|
47 |
-
# Extract and parse resume text into fields
|
48 |
-
resume_fields = parse_resume_text_into_fields(resume_text)
|
49 |
-
|
50 |
-
# Define the YAML template here, before it's used
|
51 |
-
template = """---
|
52 |
-
{name}
|
53 |
-
{phoneNumbers}
|
54 |
-
{websites}
|
55 |
-
{emails}
|
56 |
-
{dateOfBirth}
|
57 |
-
{addresses}
|
58 |
-
{summary}
|
59 |
-
{education}
|
60 |
-
{workExperience}
|
61 |
-
{skills}
|
62 |
-
{certifications}
|
63 |
-
{chat_history}
|
64 |
-
{human_input}
|
65 |
-
"""
|
66 |
|
67 |
-
|
68 |
-
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
)
|
|
|
74 |
|
75 |
-
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
)
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import io
|
3 |
+
import os
|
4 |
import PyPDF2
|
5 |
+
from gradio.components import File, Textbox
|
6 |
+
from langchain_openai import ChatOpenAI
|
7 |
+
from langchain.chains import LLMChain
|
8 |
+
from langchain.memory import ConversationBufferMemory
|
9 |
+
from langchain import PromptTemplate
|
10 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
11 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
12 |
|
13 |
def extract_text_from_pdf_binary(pdf_binary):
|
14 |
+
"""Extracts text from a PDF file binary."""
|
15 |
text = ""
|
16 |
pdf_data = io.BytesIO(pdf_binary)
|
17 |
reader = PyPDF2.PdfReader(pdf_data)
|
18 |
+
for page in reader.pages:
|
19 |
+
page_text = page.extract_text()
|
|
|
|
|
20 |
if page_text:
|
21 |
text += page_text
|
22 |
return text
|
23 |
|
24 |
+
def calculate_resume_score(resume_text, job_description):
|
25 |
+
"""
|
26 |
+
Calculates the relevance score of the resume to the job description using cosine similarity.
|
27 |
+
|
28 |
+
Parameters:
|
29 |
+
- resume_text (str): Text of the resume.
|
30 |
+
- job_description (str): Text of the job description.
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
- score (float): Similarity score between the resume and job description.
|
34 |
+
"""
|
35 |
+
vectorizer = TfidfVectorizer()
|
36 |
+
tfidf_matrix = vectorizer.fit_transform([resume_text, job_description])
|
37 |
+
score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
|
38 |
+
return score
|
|
|
|
|
39 |
|
40 |
+
def format_resume_to_yaml(api_key, file_content, job_description):
|
41 |
+
"""Formats the content of a resume PDF file to YAML and calculates its relevance to a job description."""
|
42 |
if not file_content:
|
43 |
raise ValueError("The uploaded file is empty.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
os.environ['OPENAI_API_KEY'] = api_key
|
46 |
+
resume_text = extract_text_from_pdf_binary(file_content)
|
47 |
|
48 |
+
# Additional step to calculate the resume score relative to the job description.
|
49 |
+
resume_score = calculate_resume_score(resume_text, job_description)
|
50 |
+
|
51 |
+
# Formatting the resume to YAML (the existing implementation continues here)...
|
52 |
+
# Assume llm_chain.predict and other logic here as before.
|
53 |
|
54 |
+
# For demonstration, return both formatted resume (in real use, integrate this properly) and score.
|
55 |
+
return "Formatted Resume in YAML (placeholder)", resume_score
|
56 |
|
57 |
+
def main():
|
58 |
+
"""Main function to launch the Gradio interface with job description input."""
|
59 |
+
iface = gr.Interface(
|
60 |
+
fn=format_resume_to_yaml,
|
61 |
+
inputs=[
|
62 |
+
Textbox(label="Enter your OpenAI API Key"),
|
63 |
+
File(label="Upload your PDF resume", type="binary"),
|
64 |
+
Textbox(label="Paste the Job Description here", lines=10)
|
65 |
+
],
|
66 |
+
outputs=[
|
67 |
+
Textbox(label="Formatted Resume in YAML"),
|
68 |
+
Textbox(label="Resume Score")
|
69 |
+
],
|
70 |
+
title="Resume to YAML Formatter with ATS Scoring",
|
71 |
+
description="Upload a PDF resume, paste the job description, and enter your OpenAI API key to get the resume formatted to a YAML template and score its relevance to the job."
|
72 |
)
|
73 |
+
iface.launch(debug=True, share=True)
|
74 |
|
75 |
+
if __name__ == "__main__":
|
76 |
+
main()
|
|