barghavani commited on
Commit
1c7c1d3
·
verified ·
1 Parent(s): 1d086f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -65
app.py CHANGED
@@ -1,86 +1,76 @@
1
- from langchain_openai import ChatOpenAI
2
- from langchain_core.prompts import PromptTemplate
3
- from langchain.chains import LLMChain
4
- from langchain.memory import ConversationBufferMemory
5
-
6
  import gradio as gr
7
- import os
8
  import io
 
9
  import PyPDF2
 
 
 
 
 
 
 
10
 
11
  def extract_text_from_pdf_binary(pdf_binary):
 
12
  text = ""
13
  pdf_data = io.BytesIO(pdf_binary)
14
  reader = PyPDF2.PdfReader(pdf_data)
15
- num_pages = len(reader.pages)
16
- for page in range(num_pages):
17
- current_page = reader.pages[page]
18
- page_text = current_page.extract_text()
19
  if page_text:
20
  text += page_text
21
  return text
22
 
23
- def parse_resume_text_into_fields(resume_text):
24
- # Dummy function for illustration. You'll need to implement parsing logic based on your resume format.
25
- # This should return a dictionary with keys matching those expected by your PromptTemplate.
26
- return {
27
- "name": "John Doe",
28
- "phoneNumbers": "+1 234 567 890",
29
- "websites": "www.johndoe.com",
30
- "emails": "[email protected]",
31
- "dateOfBirth": "1990-01-01",
32
- "addresses": "123 Main St, Anytown, USA",
33
- "summary": "Experienced software engineer...",
34
- "education": "B.S. in Computer Science from XYZ University",
35
- "workExperience": "Software Engineer at ABC Corp...",
36
- "skills": "Python, Java, SQL",
37
- "certifications": "Certified Kubernetes Administrator",
38
- # Add any additional fields as necessary.
39
- }
40
 
41
- def format_resume_to_yaml(api_key, file_content, filter_option="full"):
42
- os.environ['OPENAI_API_KEY'] = api_key
43
  if not file_content:
44
  raise ValueError("The uploaded file is empty.")
45
- resume_text = extract_text_from_pdf_binary(file_content)
46
-
47
- # Extract and parse resume text into fields
48
- resume_fields = parse_resume_text_into_fields(resume_text)
49
-
50
- # Define the YAML template here, before it's used
51
- template = """---
52
- {name}
53
- {phoneNumbers}
54
- {websites}
55
- {emails}
56
- {dateOfBirth}
57
- {addresses}
58
- {summary}
59
- {education}
60
- {workExperience}
61
- {skills}
62
- {certifications}
63
- {chat_history}
64
- {human_input}
65
- """
66
 
67
- # Generate a dictionary for the input_variables expected by the PromptTemplate
68
- input_variables_dict = {key: resume_fields[key] for key in resume_fields.keys() if key in template}
69
 
70
- prompt = PromptTemplate(
71
- input_variables=list(input_variables_dict.keys()),
72
- template=template
73
- )
 
74
 
75
- memory = ConversationBufferMemory(memory_key="chat_history")
 
76
 
77
- llm_chain = LLMChain(
78
- llm=ChatOpenAI(model="gpt-3.5-turbo"),
79
- prompt=prompt,
80
- verbose=True,
81
- memory=memory,
 
 
 
 
 
 
 
 
 
 
82
  )
 
83
 
84
- # The predict method expects a dictionary of variables to replace in the template
85
- res = llm_chain.predict(human_input=resume_text, **input_variables_dict)
86
- return res
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import io
3
+ import os
4
  import PyPDF2
5
+ from gradio.components import File, Textbox
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain.chains import LLMChain
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain import PromptTemplate
10
+ from sklearn.feature_extraction.text import TfidfVectorizer
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
 
13
  def extract_text_from_pdf_binary(pdf_binary):
14
+ """Extracts text from a PDF file binary."""
15
  text = ""
16
  pdf_data = io.BytesIO(pdf_binary)
17
  reader = PyPDF2.PdfReader(pdf_data)
18
+ for page in reader.pages:
19
+ page_text = page.extract_text()
 
 
20
  if page_text:
21
  text += page_text
22
  return text
23
 
24
+ def calculate_resume_score(resume_text, job_description):
25
+ """
26
+ Calculates the relevance score of the resume to the job description using cosine similarity.
27
+
28
+ Parameters:
29
+ - resume_text (str): Text of the resume.
30
+ - job_description (str): Text of the job description.
31
+
32
+ Returns:
33
+ - score (float): Similarity score between the resume and job description.
34
+ """
35
+ vectorizer = TfidfVectorizer()
36
+ tfidf_matrix = vectorizer.fit_transform([resume_text, job_description])
37
+ score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
38
+ return score
 
 
39
 
40
+ def format_resume_to_yaml(api_key, file_content, job_description):
41
+ """Formats the content of a resume PDF file to YAML and calculates its relevance to a job description."""
42
  if not file_content:
43
  raise ValueError("The uploaded file is empty.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ os.environ['OPENAI_API_KEY'] = api_key
46
+ resume_text = extract_text_from_pdf_binary(file_content)
47
 
48
+ # Additional step to calculate the resume score relative to the job description.
49
+ resume_score = calculate_resume_score(resume_text, job_description)
50
+
51
+ # Formatting the resume to YAML (the existing implementation continues here)...
52
+ # Assume llm_chain.predict and other logic here as before.
53
 
54
+ # For demonstration, return both formatted resume (in real use, integrate this properly) and score.
55
+ return "Formatted Resume in YAML (placeholder)", resume_score
56
 
57
+ def main():
58
+ """Main function to launch the Gradio interface with job description input."""
59
+ iface = gr.Interface(
60
+ fn=format_resume_to_yaml,
61
+ inputs=[
62
+ Textbox(label="Enter your OpenAI API Key"),
63
+ File(label="Upload your PDF resume", type="binary"),
64
+ Textbox(label="Paste the Job Description here", lines=10)
65
+ ],
66
+ outputs=[
67
+ Textbox(label="Formatted Resume in YAML"),
68
+ Textbox(label="Resume Score")
69
+ ],
70
+ title="Resume to YAML Formatter with ATS Scoring",
71
+ description="Upload a PDF resume, paste the job description, and enter your OpenAI API key to get the resume formatted to a YAML template and score its relevance to the job."
72
  )
73
+ iface.launch(debug=True, share=True)
74
 
75
+ if __name__ == "__main__":
76
+ main()