Spaces:
Sleeping
Sleeping
Anushkabhat9
commited on
Upload 11 files
Browse files- README.md +3 -3
- app.py +460 -0
- apt.txt +5 -0
- firm-capsule-436804-b5-5f553d9f1043.json +13 -0
- logo.jpeg +0 -0
- requirements.txt +19 -0
- resume_class.py +36 -0
- resume_generation_gemini_pro.py +235 -0
- setup.sh +1 -0
- similarity_score_refined.py +146 -0
- template_image.png +0 -0
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: π
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.40.0
|
8 |
app_file: app.py
|
|
|
1 |
---
|
2 |
+
title: ResumeBuilder
|
3 |
emoji: π
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: indigo
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.40.0
|
8 |
app_file: app.py
|
app.py
ADDED
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
import streamlit as st
|
3 |
+
import os
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from resume_generation_gemini_pro import generate_gemini
|
7 |
+
from similarity_score_refined import similarity_main
|
8 |
+
from pdf2image import convert_from_path, convert_from_bytes
|
9 |
+
from docx import Document
|
10 |
+
import subprocess
|
11 |
+
import shutil
|
12 |
+
import io
|
13 |
+
from io import BytesIO
|
14 |
+
import tempfile
|
15 |
+
from PIL import Image, ImageDraw, ImageFont
|
16 |
+
import PyPDF2
|
17 |
+
from docx2pdf import convert
|
18 |
+
import pdfplumber
|
19 |
+
import docx
|
20 |
+
import numpy as np
|
21 |
+
import pypandoc
|
22 |
+
import streamlit.components.v1 as components
|
23 |
+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
24 |
+
|
25 |
+
# Create temporary directories
|
26 |
+
temp_dir = tempfile.mkdtemp()
|
27 |
+
|
28 |
+
# Custom CSS for styling
|
29 |
+
st.markdown("""
|
30 |
+
<style>
|
31 |
+
.main {
|
32 |
+
background-color: #f5f5f5;
|
33 |
+
font-family: Arial, sans-serif;
|
34 |
+
}
|
35 |
+
h1, h2 {
|
36 |
+
color: #4B7BE5;
|
37 |
+
text-align: center;
|
38 |
+
}
|
39 |
+
.stContainer {
|
40 |
+
# background-color: #000000;
|
41 |
+
display: flex;
|
42 |
+
justify-content: center;
|
43 |
+
align-items: center;
|
44 |
+
# max-width: 100%;
|
45 |
+
height: 30%;
|
46 |
+
width: 45%;
|
47 |
+
}
|
48 |
+
.logo-container {
|
49 |
+
# background-color: black;
|
50 |
+
display: flex;
|
51 |
+
justify-content: center;
|
52 |
+
align-items: center;
|
53 |
+
padding: 10px;
|
54 |
+
# max-width: 100%;
|
55 |
+
|
56 |
+
}
|
57 |
+
.logo-container img {
|
58 |
+
max-width: 60%;
|
59 |
+
height: 40%;
|
60 |
+
}
|
61 |
+
.stButton>button {
|
62 |
+
# background-color: #4B7BE5;
|
63 |
+
# color: white;
|
64 |
+
# font-size: 18px;
|
65 |
+
appearance: none;
|
66 |
+
background-color: transparent;
|
67 |
+
border: 0.125em solid #1A1A1A;
|
68 |
+
border-radius: 0.9375em;
|
69 |
+
box-sizing: border-box;
|
70 |
+
color: #3B3B3B;
|
71 |
+
cursor: pointer;
|
72 |
+
display: inline-block;
|
73 |
+
font-family: Roobert,-apple-system,BlinkMacSystemFont,"Segoe UI",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";
|
74 |
+
font-size: 16px;
|
75 |
+
font-weight: 600;
|
76 |
+
line-height: normal;
|
77 |
+
margin: 0;
|
78 |
+
min-height: 3.75em;
|
79 |
+
min-width: 0;
|
80 |
+
outline: none;
|
81 |
+
padding: 1em 2.3em;
|
82 |
+
text-align: center;
|
83 |
+
text-decoration: none;
|
84 |
+
transition: all 300ms cubic-bezier(.23, 1, 0.32, 1);
|
85 |
+
user-select: none;
|
86 |
+
-webkit-user-select: none;
|
87 |
+
touch-action: manipulation;
|
88 |
+
will-change: transform;
|
89 |
+
}
|
90 |
+
.stButton>button:hover {
|
91 |
+
color: #fff;
|
92 |
+
background-color: #1A1A1A;
|
93 |
+
box-shadow: rgba(0, 0, 0, 0.25) 0 8px 15px;
|
94 |
+
transform: translateY(-2px);
|
95 |
+
border: none !important;
|
96 |
+
|
97 |
+
}
|
98 |
+
/* From Uiverse.io by e-coders */
|
99 |
+
|
100 |
+
# .stButton>btn:disabled {
|
101 |
+
# pointer-events: none;
|
102 |
+
# }
|
103 |
+
|
104 |
+
.stButton>:active, focus {
|
105 |
+
box-shadow: none;
|
106 |
+
transform: translateY(0);
|
107 |
+
color: #fff;
|
108 |
+
border: none !important;
|
109 |
+
outline: none;
|
110 |
+
}
|
111 |
+
</style>
|
112 |
+
""", unsafe_allow_html=True)
|
113 |
+
|
114 |
+
# Add ResumeMagic Logo
|
115 |
+
# st.markdown('<div class="logo-container"></div>', unsafe_allow_html=True)
|
116 |
+
# st.image("template_image.png", width=80)
|
117 |
+
# st.markdown('</div>', unsafe_allow_html=True)
|
118 |
+
st.image("template_image.png", use_container_width =True)
|
119 |
+
|
120 |
+
# Title and Description
|
121 |
+
st.title("Resume Tailoring with Google Generative AI")
|
122 |
+
st.markdown("### Upload your resume and job description to check similarity and generate a tailored resume.")
|
123 |
+
|
124 |
+
|
125 |
+
# Helper function to save uploaded files temporarily and return their paths
|
126 |
+
def save_uploaded_file(content):
|
127 |
+
if hasattr(content, 'name'):
|
128 |
+
file_path = os.path.join("/tmp", content.name)
|
129 |
+
with open(file_path, "wb") as f:
|
130 |
+
f.write(content.read())
|
131 |
+
else:
|
132 |
+
file_path = os.path.join("/tmp", "temp_upload")
|
133 |
+
with open(file_path, "w") as f:
|
134 |
+
f.write(str(content))
|
135 |
+
return file_path
|
136 |
+
|
137 |
+
# def save_uploaded_file(uploaded_file):
|
138 |
+
# file_path = os.path.join("/tmp", uploaded_file.name)
|
139 |
+
# with open(file_path, "wb") as f:
|
140 |
+
# f.write(uploaded_file.getbuffer())
|
141 |
+
# return file_path
|
142 |
+
|
143 |
+
|
144 |
+
# Two columns for file uploaders
|
145 |
+
col1, col2 = st.columns(2)
|
146 |
+
with col1:
|
147 |
+
uploaded_resume = st.file_uploader("Upload Current Resume (.docx or .pdf)", type=["docx", "pdf"], key="resume")
|
148 |
+
with col2:
|
149 |
+
uploaded_job_description = st.file_uploader("Upload Job Description (.docx or .pdf)", type=["docx", "pdf"], key="job_description")
|
150 |
+
|
151 |
+
def get_score(resume_path, job_description_path):
|
152 |
+
similarity_score = similarity_main(resume_path, job_description_path)
|
153 |
+
if isinstance(similarity_score, str) and '%' in similarity_score:
|
154 |
+
similarity_score = float(similarity_score.replace('%', ''))
|
155 |
+
|
156 |
+
# Display messages based on score range
|
157 |
+
if similarity_score < 50:
|
158 |
+
st.markdown('<p style="color: red; font-weight: bold;">Low chance, skills gap identified!</p>', unsafe_allow_html=True)
|
159 |
+
pie_colors = ['#FF4B4B', '#E5E5E5']
|
160 |
+
elif 50 <= similarity_score < 70:
|
161 |
+
st.markdown('<p style="color: red; font-weight: bold;">Good chance but you can improve further!</p>', unsafe_allow_html=True)
|
162 |
+
pie_colors = ['#FFC107', '#E5E5E5']
|
163 |
+
else:
|
164 |
+
st.markdown('<p style="color: green; font-weight: bold;">Excellent! You can submit your CV.</p>', unsafe_allow_html=True)
|
165 |
+
pie_colors = ['#4CAF50', '#E5E5E5']
|
166 |
+
|
167 |
+
return similarity_score, pie_colors
|
168 |
+
|
169 |
+
def display_score(similarity, colors):
|
170 |
+
# Display Score as a Pie Chart
|
171 |
+
st.markdown(f"### Resume - Job Match: {int(similarity_score)}%")
|
172 |
+
|
173 |
+
# Pie chart to show similarity
|
174 |
+
fig, ax = plt.subplots()
|
175 |
+
# ax.pie([similarity_score, 100 - similarity_score], labels=['Match', 'Difference'], autopct='%1.1f%%', startangle=140, colors=['#4B7BE5', '#E5E5E5'])
|
176 |
+
ax.pie([similarity_score, 100 - similarity_score], labels=['Match', 'Difference'], autopct='%1.1f%%', startangle=140, colors=pie_colors)
|
177 |
+
|
178 |
+
ax.axis('equal')
|
179 |
+
st.pyplot(fig)
|
180 |
+
|
181 |
+
|
182 |
+
def display_docx_content(file):
|
183 |
+
doc = docx.Document(file)
|
184 |
+
full_text = []
|
185 |
+
for para in doc.paragraphs:
|
186 |
+
full_text.append(para.text)
|
187 |
+
return '\n'.join(full_text)
|
188 |
+
|
189 |
+
|
190 |
+
# Function to save a file from BytesIO to a temporary file
|
191 |
+
def save_bytes_to_tempfile(bytes_data, suffix):
|
192 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
193 |
+
temp_file.write(bytes_data)
|
194 |
+
return temp_file.name
|
195 |
+
|
196 |
+
def save_bytes_as_pdf(docx_bytes, output_path='output.pdf'):
|
197 |
+
# Create a temporary directory
|
198 |
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
199 |
+
# Write the DOCX bytes to a temporary file
|
200 |
+
temp_file = os.path.join(tmp_dir, 'temp.docx')
|
201 |
+
with open(temp_file, 'wb') as f:
|
202 |
+
f.write(docx_bytes)
|
203 |
+
|
204 |
+
# Convert the temporary DOCX to PDF
|
205 |
+
pdf_path = os.path.join(tmp_dir, 'output.pdf')
|
206 |
+
convert(temp_file, pdf_path)
|
207 |
+
|
208 |
+
# Copy the PDF to the desired output location
|
209 |
+
with open(output_path, 'wb') as f:
|
210 |
+
with open(pdf_path, 'rb') as src_f:
|
211 |
+
f.write(src_f.read())
|
212 |
+
|
213 |
+
# Clean up the temporary directory
|
214 |
+
os.remove(output_path)
|
215 |
+
|
216 |
+
|
217 |
+
def display_content_with_page_numbers(content, words_per_page=290):
|
218 |
+
# Split content into words
|
219 |
+
words = content.split()
|
220 |
+
total_pages = (len(words) // words_per_page) + (1 if len(words) % words_per_page != 0 else 0)
|
221 |
+
|
222 |
+
# Display content with page numbers
|
223 |
+
for i in range(total_pages):
|
224 |
+
start_index = i * words_per_page
|
225 |
+
end_index = start_index + words_per_page
|
226 |
+
page_content = ' '.join(words[start_index:end_index])
|
227 |
+
|
228 |
+
# st.markdown(f"#### Page {i + 1}")
|
229 |
+
# st.write(page_content)
|
230 |
+
st.markdown(f"#### Page {total_pages}")
|
231 |
+
|
232 |
+
def save_docx_as_pdf(input_path, output_path='output.pdf'):
|
233 |
+
if input_path.lower().endswith('.docx'):
|
234 |
+
try:
|
235 |
+
# Convert .docx to .pdf using LibreOffice
|
236 |
+
subprocess.run(['libreoffice', '--headless', '--convert-to', 'pdf', input_path, '--outdir', os.path.dirname(output_path)], check=True)
|
237 |
+
if not os.path.exists(output_path):
|
238 |
+
raise FileNotFoundError("Conversion failed; output PDF not found.")
|
239 |
+
except (FileNotFoundError, subprocess.CalledProcessError):
|
240 |
+
st.error("Failed to convert DOCX to PDF. Please check LibreOffice installation.")
|
241 |
+
elif input_path.lower().endswith('.pdf'):
|
242 |
+
shutil.copy(input_path, output_path)
|
243 |
+
else:
|
244 |
+
raise ValueError("Unsupported file format. Please upload a .docx or .pdf file.")
|
245 |
+
|
246 |
+
|
247 |
+
def display_pdf_page(pdf_path):
|
248 |
+
try:
|
249 |
+
# Open PDF file
|
250 |
+
with open(pdf_path, 'rb') as file:
|
251 |
+
reader = PyPDF2.PdfReader(file)
|
252 |
+
|
253 |
+
# Extract text from the first page
|
254 |
+
page = reader.pages[0]
|
255 |
+
x_object = page.extract_text()
|
256 |
+
|
257 |
+
# Convert text to image (using PIL)
|
258 |
+
img = Image.new('RGB', (800, 1000))
|
259 |
+
draw = ImageDraw.Draw(img)
|
260 |
+
font = ImageFont.truetype("arial.ttf", 20)
|
261 |
+
|
262 |
+
# Draw text on the image
|
263 |
+
draw.text((10, 10), x_object[:500], fill=(255, 255, 255), font=font)
|
264 |
+
|
265 |
+
# Display the image
|
266 |
+
display(img)
|
267 |
+
except Exception as e:
|
268 |
+
st.error(f"Failed to display image: {str(e)}")
|
269 |
+
|
270 |
+
# def display_pdf_pages_as_images(pdf_path):
|
271 |
+
# try:
|
272 |
+
# with pdfplumber.open(pdf_path) as pdf:
|
273 |
+
# for i, page in enumerate(pdf.pages):
|
274 |
+
# st.markdown(f"#### Page {i + 1}")
|
275 |
+
# # Convert the page to an image
|
276 |
+
# image = page.to_image()
|
277 |
+
# # Render the image using Streamlit
|
278 |
+
# # st.image(image.original, use_column_width=True)
|
279 |
+
# st.image(image.original, use_container_width=False)
|
280 |
+
# except Exception as e:
|
281 |
+
# st.error(f"Failed to display PDF as image: {str(e)}")
|
282 |
+
|
283 |
+
def display_pdf_pages_as_images(pdf_path):
|
284 |
+
try:
|
285 |
+
with pdfplumber.open(pdf_path) as pdf:
|
286 |
+
num_pages = len(pdf.pages)
|
287 |
+
# Create a container with columns for each page
|
288 |
+
columns = st.columns(num_pages)
|
289 |
+
|
290 |
+
for i, page in enumerate(pdf.pages):
|
291 |
+
# Convert the page to an image
|
292 |
+
image = page.to_image()
|
293 |
+
# Display each page image in its respective column
|
294 |
+
with columns[i]:
|
295 |
+
st.markdown(f"#### Page {i + 1}")
|
296 |
+
st.image(image.original, use_container_width=True)
|
297 |
+
|
298 |
+
except Exception as e:
|
299 |
+
st.error(f"Failed to display PDF as image: {str(e)}")
|
300 |
+
|
301 |
+
|
302 |
+
def display_doc_as_image2(pdf_path):
|
303 |
+
iframe_code = f"""
|
304 |
+
<iframe src="{pdf_path}" width="100%" height="600px"></iframe>
|
305 |
+
"""
|
306 |
+
st.markdown(iframe_code, unsafe_allow_html=True)
|
307 |
+
|
308 |
+
def add_bold_and_normal_text(paragraph, text):
|
309 |
+
"""Adds text to the paragraph, handling bold formatting."""
|
310 |
+
while "**" in text:
|
311 |
+
before, bold_part, after = text.partition("**")
|
312 |
+
if before:
|
313 |
+
paragraph.add_run(before)
|
314 |
+
if bold_part == "**":
|
315 |
+
bold_text, _, text = after.partition("**")
|
316 |
+
paragraph.add_run(bold_text).bold = True
|
317 |
+
else:
|
318 |
+
text = after
|
319 |
+
if text:
|
320 |
+
paragraph.add_run(text)
|
321 |
+
|
322 |
+
# Process if files are uploaded
|
323 |
+
if uploaded_resume and uploaded_job_description:
|
324 |
+
# Save files
|
325 |
+
resume_path = save_uploaded_file(uploaded_resume)
|
326 |
+
job_description_path = save_uploaded_file(uploaded_job_description)
|
327 |
+
|
328 |
+
# Similarity Score Section
|
329 |
+
st.markdown("---")
|
330 |
+
# st.subheader("Check Job Match")
|
331 |
+
|
332 |
+
if st.button("Resume-JD Matching"):
|
333 |
+
with st.spinner("Computing Match"):
|
334 |
+
similarity_score, pie_colors = get_score(resume_path, job_description_path)
|
335 |
+
display_score(similarity_score, pie_colors)
|
336 |
+
|
337 |
+
#Autoscroll
|
338 |
+
components.html("""
|
339 |
+
<script>
|
340 |
+
window.onload = function() {
|
341 |
+
window.scrollTo(0, document.body.scrollHeight);
|
342 |
+
};
|
343 |
+
</script>
|
344 |
+
""")
|
345 |
+
|
346 |
+
# Generate Tailored Resume Section
|
347 |
+
st.markdown("---")
|
348 |
+
# st.subheader("Tailor Resume")
|
349 |
+
|
350 |
+
if st.button("Tailor Resume"):
|
351 |
+
with st.spinner("Generating resume..."):
|
352 |
+
generated_resume, new_resume_path = generate_gemini(resume_path, job_description_path)
|
353 |
+
# resume_path = save_uploaded_file(generated_resume)
|
354 |
+
# st.markdown("Generated Tailored Resume:")
|
355 |
+
# st.write(generated_resume)
|
356 |
+
|
357 |
+
#Autoscroll
|
358 |
+
components.html("""
|
359 |
+
<script>
|
360 |
+
window.onload = function() {
|
361 |
+
window.scrollTo(0, document.body.scrollHeight);
|
362 |
+
};
|
363 |
+
</script>
|
364 |
+
""")
|
365 |
+
|
366 |
+
|
367 |
+
# with st.spinner("Computing Match"):
|
368 |
+
# similarity_score, pie_colors = get_score(resume_path, job_description_path)
|
369 |
+
# display_score(similarity_score, pie_colors)
|
370 |
+
|
371 |
+
if generated_resume is not None:
|
372 |
+
# st.markdown("---")
|
373 |
+
st.title("Uploaded Resume")
|
374 |
+
doc = Document()
|
375 |
+
|
376 |
+
# Split the text into lines for processing
|
377 |
+
lines = generated_resume.splitlines()
|
378 |
+
|
379 |
+
for line in lines:
|
380 |
+
if line.startswith("# "): # Top-level heading (Highest level)
|
381 |
+
paragraph = doc.add_heading(line[2:].strip(), level=0) # Level 0 is the highest heading in Word
|
382 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
383 |
+
elif line.startswith("## "): # Main heading (Level 1)
|
384 |
+
paragraph = doc.add_heading(line[3:].strip(), level=1)
|
385 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
386 |
+
elif line.startswith("### "): # Subheading (Level 2)
|
387 |
+
paragraph = doc.add_heading(line[4:].strip(), level=2)
|
388 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
389 |
+
elif line.startswith("- "): # Bullet points
|
390 |
+
paragraph = doc.add_paragraph(style="List Bullet")
|
391 |
+
add_bold_and_normal_text(paragraph, line[2:].strip())
|
392 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
393 |
+
elif line.startswith("* "): # Sub-bullet points or normal list items
|
394 |
+
paragraph = doc.add_paragraph(style="List Bullet 2")
|
395 |
+
add_bold_and_normal_text(paragraph, line[2:].strip())
|
396 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
397 |
+
elif line.strip(): # Normal text (ignores blank lines)
|
398 |
+
paragraph = doc.add_paragraph()
|
399 |
+
add_bold_and_normal_text(paragraph, line.strip())
|
400 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
401 |
+
|
402 |
+
# Save the generated document as a .docx file in memory
|
403 |
+
resume_bytes = BytesIO()
|
404 |
+
doc.save(resume_bytes)
|
405 |
+
resume_bytes.seek(0)
|
406 |
+
|
407 |
+
# Save the .docx to a temporary file
|
408 |
+
gen_docx_path = save_bytes_to_tempfile(resume_bytes.getvalue(), 'docx')
|
409 |
+
|
410 |
+
# Convert the generated .docx to a .pdf
|
411 |
+
gen_pdf_path = save_uploaded_file(gen_docx_path)
|
412 |
+
# st.write(display_docx_content(gen_pdf_path))
|
413 |
+
|
414 |
+
|
415 |
+
# st.markdown("### Uploaded Resume")
|
416 |
+
save_docx_as_pdf(resume_path, '/tmp/uploaded_resume.pdf')
|
417 |
+
display_pdf_pages_as_images(resume_path)
|
418 |
+
|
419 |
+
st.success(f"Download tailored resume")
|
420 |
+
st.download_button(
|
421 |
+
label="Generated Resume (Word)",
|
422 |
+
data=resume_bytes,
|
423 |
+
file_name="tailored_resume.docx",
|
424 |
+
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
425 |
+
)
|
426 |
+
|
427 |
+
|
428 |
+
# Display uploaded and generated resumes side-by-side
|
429 |
+
# col1, col2 = st.columns(2)
|
430 |
+
# with col1:
|
431 |
+
# st.markdown("### Uploaded Resume")
|
432 |
+
# save_docx_as_pdf(resume_path, '/tmp/uploaded_resume.pdf')
|
433 |
+
# display_pdf_pages_as_images(resume_path)
|
434 |
+
# with col2:
|
435 |
+
# st.markdown("### Tailored Resume")
|
436 |
+
# # display_pdf_pages_as_images(gen_pdf_path)
|
437 |
+
# display_content_with_page_numbers(generated_resume, 290)
|
438 |
+
# st.write(generated_resume)
|
439 |
+
# # display_content_with_page_numbers(generated_resume, 290)
|
440 |
+
|
441 |
+
# st.success(f"Download tailored resume")
|
442 |
+
# col1, col2 = st.columns(2)
|
443 |
+
# with col1:
|
444 |
+
# st.download_button(
|
445 |
+
# label="Generated Resume (PDF)",
|
446 |
+
# data=open(gen_pdf_path, 'rb').read(),
|
447 |
+
# file_name="tailored_resume.pdf",
|
448 |
+
# mime="application/pdf"
|
449 |
+
# )
|
450 |
+
# with col2:
|
451 |
+
# st.download_button(
|
452 |
+
# label="Generated Resume (Word)",
|
453 |
+
# data=resume_bytes,
|
454 |
+
# file_name="tailored_resume.docx",
|
455 |
+
# mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
456 |
+
# )
|
457 |
+
|
458 |
+
|
459 |
+
else:
|
460 |
+
st.warning("Please upload both the resume and job description files.")
|
apt.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
poppler-utils=0.90.0
|
2 |
+
poppler
|
3 |
+
unoconv
|
4 |
+
libreoffice
|
5 |
+
pandoc
|
firm-capsule-436804-b5-5f553d9f1043.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "service_account",
|
3 |
+
"project_id": "firm-capsule-436804-b5",
|
4 |
+
|
5 |
+
|
6 |
+
"client_email": "[email protected]",
|
7 |
+
|
8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/genai-328%40firm-capsule-436804-b5.iam.gserviceaccount.com",
|
12 |
+
"universe_domain": "googleapis.com"
|
13 |
+
}
|
logo.jpeg
ADDED
requirements.txt
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
PyPDF2
|
3 |
+
pdfplumber
|
4 |
+
pypandoc
|
5 |
+
langchain_community
|
6 |
+
langchain_google_genai
|
7 |
+
python-docx
|
8 |
+
docx2txt
|
9 |
+
faiss-gpu
|
10 |
+
google-generativeai
|
11 |
+
sentence_transformers
|
12 |
+
Transformers
|
13 |
+
openai==0.28.0
|
14 |
+
nltk
|
15 |
+
matplotlib
|
16 |
+
IPython
|
17 |
+
pdf2image
|
18 |
+
docx2pdf
|
19 |
+
Pillow
|
resume_class.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
# Load the RAG model
|
4 |
+
rag_model = RagTokenForGeneration.from_pretrained("rag_model")
|
5 |
+
|
6 |
+
job_description_text = """
|
7 |
+
We are looking for a Senior Data Scientist with 7+ years of experience in machine learning, deep learning, and advanced statistical modeling. The candidate should have a strong background in Python, TensorFlow, and PyTorch. Experience with cloud platforms like AWS or GCP is mandatory. Responsibilities include leading data science teams, designing predictive models, and optimizing business strategies through data insights. A PhD in Computer Science, Statistics, or a related field is highly preferred.
|
8 |
+
"""
|
9 |
+
|
10 |
+
resume_text = """
|
11 |
+
Hardworking construction worker with 2 years of experience in residential building projects. Skilled in operating heavy machinery, reading blueprints, and ensuring site safety. Proficient in the use of tools like drills, saws, and hammers. Strong knowledge of safety regulations and experience collaborating with contractors and architects. Dedicated to delivering quality results and meeting project deadlines.
|
12 |
+
"""
|
13 |
+
|
14 |
+
query = resume_text + " " + job_description_text +
|
15 |
+
f"Provide a score between 0 and 100% for the resume against the job description and explain your reasoning. Summarize the classification whether the candidate is a perfect fit, a good fit, a potential fit, or no fit"
|
16 |
+
|
17 |
+
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-base", trust_remote_code=True)
|
18 |
+
|
19 |
+
def test_new_data(query, model, tokenizer):
|
20 |
+
inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True)
|
21 |
+
with torch.no_grad():
|
22 |
+
outputs = model(
|
23 |
+
input_ids=inputs["input_ids"],
|
24 |
+
attention_mask=inputs["attention_mask"]
|
25 |
+
)
|
26 |
+
logits = outputs.logits
|
27 |
+
preds = torch.argmax(logits, axis=1)
|
28 |
+
return preds.item()
|
29 |
+
|
30 |
+
predicted_label = test_new_data(query, model, tokenizer)
|
31 |
+
print(f"Predicted Label: {predicted_label}")
|
32 |
+
|
33 |
+
response = model.generate(inputs_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
|
34 |
+
print(f"Jof Fitness: {response}")
|
35 |
+
|
36 |
+
|
resume_generation_gemini_pro.py
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Resume_generation_Gemini_pro.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/16z793IRwVmvKYCaOLGZFDYj-XOj8zEJL
|
8 |
+
"""
|
9 |
+
|
10 |
+
# from google.colab import drive,userdata
|
11 |
+
|
12 |
+
# drive.mount('/content/drive')
|
13 |
+
|
14 |
+
# !pip install streamlit -qq
|
15 |
+
|
16 |
+
# !pip install PyPDF2 -qq
|
17 |
+
|
18 |
+
# !pip install langchain_community -qq
|
19 |
+
|
20 |
+
# !pip install langchain_google_genai -qq
|
21 |
+
|
22 |
+
# !pip install python-docx -qq
|
23 |
+
|
24 |
+
# !pip install docx2txt -qq
|
25 |
+
|
26 |
+
# !pip install faiss-gpu -qq
|
27 |
+
|
28 |
+
# !pip install google-generativeai -qq
|
29 |
+
|
30 |
+
# !pip install --upgrade google-generativeai -qq
|
31 |
+
|
32 |
+
import docx2txt
|
33 |
+
import PyPDF2
|
34 |
+
|
35 |
+
def extract_text(file_path):
|
36 |
+
if file_path.endswith(".docx"):
|
37 |
+
# Extract text from DOCX file
|
38 |
+
return docx2txt.process(file_path)
|
39 |
+
|
40 |
+
elif file_path.endswith(".pdf"):
|
41 |
+
# Extract text from PDF file
|
42 |
+
text = ""
|
43 |
+
with open(file_path, 'rb') as file:
|
44 |
+
reader = PyPDF2.PdfReader(file)
|
45 |
+
for page_num in range(len(reader.pages)):
|
46 |
+
text += reader.pages[page_num].extract_text()
|
47 |
+
return text
|
48 |
+
|
49 |
+
else:
|
50 |
+
raise ValueError("Unsupported file type")
|
51 |
+
|
52 |
+
# from google.colab import auth
|
53 |
+
# auth.authenticate_user()
|
54 |
+
|
55 |
+
import os
|
56 |
+
|
57 |
+
GOOGLE_APPLICATION_CREDENTIALS = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
|
58 |
+
private_key_id = os.environ.get('PRIVATE_KEY_ID')
|
59 |
+
private_key = os.environ.get('PRIVATE_KEY')
|
60 |
+
client_id = os.environ.get('CLIENT_ID')
|
61 |
+
|
62 |
+
# !pip install python-docx
|
63 |
+
|
64 |
+
import os
|
65 |
+
import streamlit as st
|
66 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
67 |
+
from langchain_community.vectorstores.faiss import FAISS
|
68 |
+
# from google.colab import drive
|
69 |
+
from docx import Document
|
70 |
+
import google.generativeai as genai
|
71 |
+
from datetime import datetime
|
72 |
+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
73 |
+
|
74 |
+
api_key_google = os.environ.get('GOOGLE_GEMINI_KEY')
|
75 |
+
genai.configure(api_key=api_key_google)
|
76 |
+
|
77 |
+
# Mount Google Drive
|
78 |
+
# drive.mount('/content/drive')
|
79 |
+
|
80 |
+
model = genai.GenerativeModel('gemini-pro')
|
81 |
+
|
82 |
+
def save_resume_to_docx(tailored_resume, file_path):
|
83 |
+
doc = Document()
|
84 |
+
doc.add_heading('Tailored Resume', level=1)
|
85 |
+
doc.add_paragraph(tailored_resume)
|
86 |
+
doc.save(file_path)
|
87 |
+
|
88 |
+
# Function to read text from a .docx file
|
89 |
+
def read_docx(file_path):
|
90 |
+
doc = Document(file_path)
|
91 |
+
return "\n".join([para.text for para in doc.paragraphs])
|
92 |
+
|
93 |
+
# def generate_resume_text(resume_text):
|
94 |
+
# prompt = f"""
|
95 |
+
# Given the following resume content:
|
96 |
+
|
97 |
+
# [Resume Start]
|
98 |
+
# {resume_text}
|
99 |
+
# [Resume End]
|
100 |
+
|
101 |
+
# Format this resume content with appropriate section titles. Only use the information provided and avoid placeholders like "[Your Name]". Ensure it retains the structure and details exactly as shown.
|
102 |
+
# """
|
103 |
+
# try:
|
104 |
+
# response = model.generate_content(prompt)
|
105 |
+
# print(response)
|
106 |
+
# # Accessing the generated text content
|
107 |
+
# return response.candidates[0].content.parts[0].text
|
108 |
+
# except Exception as e:
|
109 |
+
# print("Error in generating resume text:", e)
|
110 |
+
# return None
|
111 |
+
|
112 |
+
def tailor_resume(resume_text, job_description):
|
113 |
+
# Use the generate_resume_text function to get the formatted resume content
|
114 |
+
# formatted_resume = generate_resume_text(resume_text)
|
115 |
+
# print("formatted resume:",resume_text)
|
116 |
+
prompt = f"""
|
117 |
+
Below is the candidate's original resume content:
|
118 |
+
[Resume Start]
|
119 |
+
{resume_text}
|
120 |
+
[Resume End]
|
121 |
+
Using the candidate's resume above and the job description below, create a tailored resume.
|
122 |
+
[Job Description Start]
|
123 |
+
{job_description}
|
124 |
+
[Job Description End]
|
125 |
+
|
126 |
+
|
127 |
+
Please generate a resume that:
|
128 |
+
1. Uses real data from the candidate's resume, including name, and education.
|
129 |
+
2. Avoids placeholders like "[Your Name]" and includes actual details. This is important.
|
130 |
+
3. In the experience section, emphasizes professional experiences and skills that are directly relevant to the job description.
|
131 |
+
4. Keeps only a maximum of the top three accomplishments/ responsibilities for each job position held so as to make the candidate standout in the new job role
|
132 |
+
5. Removes special characters from the section titles.
|
133 |
+
6. Only includes publications if the job description is research based.
|
134 |
+
7. Summarizes the skills and technical skills section into a brief profile.
|
135 |
+
8. Does not include courses, certification, references, skills and a technical skills sections if they are not relevant.
|
136 |
+
9. Only includes true information about the candidate.
|
137 |
+
10.Provide the text in markdown format that clearly identifies the headings and subheadings.
|
138 |
+
"""
|
139 |
+
|
140 |
+
|
141 |
+
try:
|
142 |
+
response = model.generate_content(prompt)
|
143 |
+
print(response.candidates[0].content.parts[0].text)
|
144 |
+
return response.candidates[0].content.parts[0].text
|
145 |
+
except Exception as e:
|
146 |
+
print("Error in tailoring resume:", e)
|
147 |
+
return None
|
148 |
+
|
149 |
+
def add_bold_and_normal_text(paragraph, text):
|
150 |
+
"""Adds text to the paragraph, handling bold formatting."""
|
151 |
+
while "**" in text:
|
152 |
+
before, bold_part, after = text.partition("**")
|
153 |
+
if before:
|
154 |
+
paragraph.add_run(before)
|
155 |
+
if bold_part == "**":
|
156 |
+
bold_text, _, text = after.partition("**")
|
157 |
+
paragraph.add_run(bold_text).bold = True
|
158 |
+
else:
|
159 |
+
text = after
|
160 |
+
if text:
|
161 |
+
paragraph.add_run(text)
|
162 |
+
|
163 |
+
def convert_resume_to_word(markdown_text,output_file):
|
164 |
+
# Create a new Word document
|
165 |
+
doc = Document()
|
166 |
+
|
167 |
+
# Split the text into lines for processing
|
168 |
+
lines = markdown_text.splitlines()
|
169 |
+
|
170 |
+
for line in lines:
|
171 |
+
if line.startswith("## "): # Main heading (Level 1)
|
172 |
+
paragraph = doc.add_heading(line[3:].strip(), level=1)
|
173 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
174 |
+
elif line.startswith("### "): # Subheading (Level 2)
|
175 |
+
paragraph = doc.add_heading(line[4:].strip(), level=2)
|
176 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
177 |
+
elif line.startswith("- "): # Bullet points
|
178 |
+
paragraph = doc.add_paragraph()
|
179 |
+
add_bold_and_normal_text(paragraph, line[2:].strip())
|
180 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
181 |
+
elif line.startswith("* "): # Sub-bullet points or normal list items
|
182 |
+
paragraph = doc.add_paragraph(style="List Bullet")
|
183 |
+
add_bold_and_normal_text(paragraph, line[2:].strip())
|
184 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
185 |
+
elif line.strip(): # Normal text (ignores blank lines)
|
186 |
+
paragraph = doc.add_paragraph()
|
187 |
+
add_bold_and_normal_text(paragraph, line.strip())
|
188 |
+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
|
189 |
+
|
190 |
+
# Save the Word document
|
191 |
+
|
192 |
+
doc.save(output_file)
|
193 |
+
print(f"Markdown converted and saved as {output_file}")
|
194 |
+
|
195 |
+
#Entry function for the model
|
196 |
+
def generate_gemini(current_resume,job_description):
|
197 |
+
st.header('Resume Tailoring')
|
198 |
+
|
199 |
+
# Load the resume and job description from Google Drive
|
200 |
+
resume_text = extract_text(current_resume)
|
201 |
+
job_description = extract_text(job_description)
|
202 |
+
|
203 |
+
# Tailor resume based on job description
|
204 |
+
tailored_resume = tailor_resume(resume_text, job_description)
|
205 |
+
output_file = f"Tailored_Resume_{datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
|
206 |
+
convert_resume_to_word(tailored_resume,output_file)
|
207 |
+
|
208 |
+
st.success(f"Tailored resume saved to {output_file}")
|
209 |
+
|
210 |
+
return tailored_resume, output_file
|
211 |
+
|
212 |
+
|
213 |
+
# Main function for Streamlit app
|
214 |
+
# def Gemini_pro_main(current_resume,job_description):
|
215 |
+
# st.header('Resume Tailoring')
|
216 |
+
|
217 |
+
# # Load the resume and job description from Google Drive
|
218 |
+
# resume_text = extract_text(current_resume)
|
219 |
+
# job_description = extract_text(job_description)
|
220 |
+
|
221 |
+
# # Tailor resume based on job description
|
222 |
+
# tailored_resume = tailor_resume(resume_text, job_description)
|
223 |
+
# st.write("**Tailored Resume:**")
|
224 |
+
# st.write(tailored_resume)
|
225 |
+
# print(tailored_resume)
|
226 |
+
|
227 |
+
# # Save the tailored resume to a .docx file
|
228 |
+
# if tailored_resume:
|
229 |
+
# file_path = f"Tailored_Resume_{datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
|
230 |
+
# save_resume_to_docx(tailored_resume, file_path)
|
231 |
+
# st.success(f"Tailored resume saved to {file_path}")
|
232 |
+
|
233 |
+
# if __name__ == '__main__':
|
234 |
+
# main()
|
235 |
+
|
setup.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
apt-get update && apt-get install -y poppler-utils
|
similarity_score_refined.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Similarity_score_refined (2).ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1c8mlCBnLbduLsI8rUGFEOYDuyBqdz2JJ
|
8 |
+
"""
|
9 |
+
|
10 |
+
# !pip install sentence_transformers
|
11 |
+
# !pip install openai==0.28
|
12 |
+
# !pip install docx2txt PyPDF2 transformers
|
13 |
+
|
14 |
+
# from google.colab import drive,userdata
|
15 |
+
# drive.mount("/content/drive")
|
16 |
+
# print("Google Drive mounted.")
|
17 |
+
|
18 |
+
import re
|
19 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
20 |
+
from nltk.corpus import stopwords
|
21 |
+
from nltk.stem import WordNetLemmatizer
|
22 |
+
import os
|
23 |
+
|
24 |
+
# Ensure you have downloaded stopwords and wordnet
|
25 |
+
import nltk
|
26 |
+
nltk.download('stopwords')
|
27 |
+
nltk.download('wordnet')
|
28 |
+
|
29 |
+
def extract_text(file_path):
|
30 |
+
import docx2txt
|
31 |
+
import PyPDF2
|
32 |
+
if file_path.endswith(".docx"):
|
33 |
+
# Extract text from DOCX file
|
34 |
+
return docx2txt.process(file_path)
|
35 |
+
|
36 |
+
elif file_path.endswith(".pdf"):
|
37 |
+
# Extract text from PDF file
|
38 |
+
text = ""
|
39 |
+
with open(file_path, 'rb') as file:
|
40 |
+
reader = PyPDF2.PdfReader(file)
|
41 |
+
for page_num in range(len(reader.pages)):
|
42 |
+
text += reader.pages[page_num].extract_text()
|
43 |
+
return text
|
44 |
+
|
45 |
+
else:
|
46 |
+
raise ValueError("Unsupported file type")
|
47 |
+
|
48 |
+
def preprocess(text):
|
49 |
+
# Lowercase the text
|
50 |
+
text = text.lower()
|
51 |
+
|
52 |
+
# Remove special characters and numbers
|
53 |
+
text = re.sub(r'[^a-z\s]', '', text)
|
54 |
+
|
55 |
+
# Tokenize the text by splitting on whitespace
|
56 |
+
words = text.split()
|
57 |
+
|
58 |
+
# Remove stop words
|
59 |
+
stop_words = set(stopwords.words('english'))
|
60 |
+
words = [word for word in words if word not in stop_words]
|
61 |
+
|
62 |
+
# Lemmatize the words (to get root form)
|
63 |
+
lemmatizer = WordNetLemmatizer()
|
64 |
+
words = [lemmatizer.lemmatize(word) for word in words]
|
65 |
+
|
66 |
+
# Join words back into a single string
|
67 |
+
return ' '.join(words)
|
68 |
+
|
69 |
+
def calculate_tfidf(doc):
|
70 |
+
vectorizer = TfidfVectorizer()
|
71 |
+
tfidf_matrix = vectorizer.fit_transform([doc]) # Only fit on the individual document
|
72 |
+
feature_names = vectorizer.get_feature_names_out()
|
73 |
+
dense_tfidf_matrix = tfidf_matrix.todense()
|
74 |
+
|
75 |
+
# Extract important terms from the document with a threshold
|
76 |
+
important_terms = [feature_names[i] for i in range(len(feature_names)) if dense_tfidf_matrix[0, i] > 0.2]
|
77 |
+
|
78 |
+
return ' '.join(important_terms)
|
79 |
+
|
80 |
+
def call_chatgpt_api(prompt, api_key,model="gpt-3.5-turbo"):
|
81 |
+
import openai
|
82 |
+
openai.api_key = api_key
|
83 |
+
response = openai.ChatCompletion.create(
|
84 |
+
model="gpt-3.5-turbo",
|
85 |
+
messages=[
|
86 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
87 |
+
{"role": "user", "content": prompt}
|
88 |
+
],
|
89 |
+
max_tokens=500,
|
90 |
+
temperature= 0,
|
91 |
+
top_p=1,
|
92 |
+
frequency_penalty= 0,
|
93 |
+
presence_penalty= 0
|
94 |
+
)
|
95 |
+
return response['choices'][0]['message']['content'].strip()
|
96 |
+
|
97 |
+
def calculate_similarity(resume, job_desc, model_name="sentence-transformers/all-MiniLM-L6-v2"):
|
98 |
+
from sentence_transformers import SentenceTransformer, util
|
99 |
+
model = SentenceTransformer(model_name)
|
100 |
+
|
101 |
+
# Convert texts to embeddings
|
102 |
+
embeddings1 = model.encode(resume, convert_to_tensor=True)
|
103 |
+
embeddings2 = model.encode(job_desc, convert_to_tensor=True)
|
104 |
+
|
105 |
+
# Calculate cosine similarity
|
106 |
+
similarity_score = util.pytorch_cos_sim(embeddings1, embeddings2)
|
107 |
+
return similarity_score.item() # return as a scalar
|
108 |
+
|
109 |
+
def similarity_main(resume_path,job_description_path):
|
110 |
+
|
111 |
+
# Extract text from files (replace with actual file paths)
|
112 |
+
Resume_text = extract_text(resume_path)
|
113 |
+
job_des = extract_text(job_description_path)
|
114 |
+
api_key=os.environ.get('OPENAI_KEY')
|
115 |
+
|
116 |
+
|
117 |
+
prompt=f"Extract the skills or competencies section from the resume. Avoid using name of the candidate:\n\n{Resume_text}"
|
118 |
+
resume_skills = call_chatgpt_api(prompt,api_key)
|
119 |
+
experience_prompt = f"Extract the experience of the candidate from the resume. Avoid using name of the candidate:\n\n{Resume_text}"
|
120 |
+
resume_experience = call_chatgpt_api(experience_prompt,api_key)
|
121 |
+
|
122 |
+
# Extract sections from job description (JD)
|
123 |
+
jd_skills_prompt = f"Extract the skills section from the job description:\n\n{job_des}"
|
124 |
+
jd_skills = call_chatgpt_api(jd_skills_prompt,api_key)
|
125 |
+
|
126 |
+
jd_experience_prompt = f"Extract the experience section from the job description:\n\n{job_des}"
|
127 |
+
jd_experience = call_chatgpt_api(jd_experience_prompt,api_key)
|
128 |
+
|
129 |
+
resume_skills_clean = preprocess(resume_skills)
|
130 |
+
jd_skills_clean = preprocess(jd_skills)
|
131 |
+
|
132 |
+
resume_experience_clean = preprocess(resume_experience)
|
133 |
+
jd_experience_clean = preprocess(jd_experience)
|
134 |
+
|
135 |
+
filtered_resume = calculate_tfidf(resume_skills_clean)
|
136 |
+
filtered_jd = calculate_tfidf(jd_skills_clean)
|
137 |
+
similarity_skills=calculate_similarity(filtered_resume,filtered_jd)
|
138 |
+
|
139 |
+
filtered_resume_ex = calculate_tfidf(resume_experience_clean)
|
140 |
+
filtered_jd_ex = calculate_tfidf(jd_experience_clean)
|
141 |
+
similarity_ex=calculate_similarity(filtered_resume_ex,filtered_jd_ex)
|
142 |
+
|
143 |
+
Average_Score=(similarity_skills+similarity_ex)/2
|
144 |
+
percentage= f"{Average_Score * 100:.2f}%"
|
145 |
+
return percentage
|
146 |
+
|
template_image.png
ADDED