Spaces:
Sleeping
Sleeping
Rename resume_class.py to resume_score_gemini.py
Browse files- resume_class.py +0 -36
- resume_score_gemini.py +115 -0
resume_class.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
# Load the RAG model
|
4 |
-
rag_model = RagTokenForGeneration.from_pretrained("rag_model")
|
5 |
-
|
6 |
-
job_description_text = """
|
7 |
-
We are looking for a Senior Data Scientist with 7+ years of experience in machine learning, deep learning, and advanced statistical modeling. The candidate should have a strong background in Python, TensorFlow, and PyTorch. Experience with cloud platforms like AWS or GCP is mandatory. Responsibilities include leading data science teams, designing predictive models, and optimizing business strategies through data insights. A PhD in Computer Science, Statistics, or a related field is highly preferred.
|
8 |
-
"""
|
9 |
-
|
10 |
-
resume_text = """
|
11 |
-
Hardworking construction worker with 2 years of experience in residential building projects. Skilled in operating heavy machinery, reading blueprints, and ensuring site safety. Proficient in the use of tools like drills, saws, and hammers. Strong knowledge of safety regulations and experience collaborating with contractors and architects. Dedicated to delivering quality results and meeting project deadlines.
|
12 |
-
"""
|
13 |
-
|
14 |
-
query = resume_text + " " + job_description_text +
|
15 |
-
f"Provide a score between 0 and 100% for the resume against the job description and explain your reasoning. Summarize the classification whether the candidate is a perfect fit, a good fit, a potential fit, or no fit"
|
16 |
-
|
17 |
-
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-base", trust_remote_code=True)
|
18 |
-
|
19 |
-
def test_new_data(query, model, tokenizer):
|
20 |
-
inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True)
|
21 |
-
with torch.no_grad():
|
22 |
-
outputs = model(
|
23 |
-
input_ids=inputs["input_ids"],
|
24 |
-
attention_mask=inputs["attention_mask"]
|
25 |
-
)
|
26 |
-
logits = outputs.logits
|
27 |
-
preds = torch.argmax(logits, axis=1)
|
28 |
-
return preds.item()
|
29 |
-
|
30 |
-
predicted_label = test_new_data(query, model, tokenizer)
|
31 |
-
print(f"Predicted Label: {predicted_label}")
|
32 |
-
|
33 |
-
response = model.generate(inputs_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
|
34 |
-
print(f"Jof Fitness: {response}")
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resume_score_gemini.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from docx.opc.exceptions import PackageNotFoundError
|
2 |
+
|
3 |
+
import os
|
4 |
+
from docx import Document
|
5 |
+
import google.generativeai as genai
|
6 |
+
import PyPDF2
|
7 |
+
|
8 |
+
|
9 |
+
def read_file(file_path):
|
10 |
+
"""
|
11 |
+
Reads the content of a file. If the file is a PDF, it extracts the text using PyPDF2.
|
12 |
+
If the file is a docx, it extracts the text using python-docx.
|
13 |
+
Otherwise, it reads the file as a text file, trying different encodings if 'utf-8' fails.
|
14 |
+
"""
|
15 |
+
|
16 |
+
# Check if the file exists before proceeding
|
17 |
+
if not os.path.exists(file_path):
|
18 |
+
raise FileNotFoundError(f"File not found: {file_path}")
|
19 |
+
|
20 |
+
if file_path.lower().endswith('.pdf'):
|
21 |
+
with open(file_path, 'rb') as file: # Open in binary read mode for PDFs
|
22 |
+
reader = PyPDF2.PdfReader(file)
|
23 |
+
text = ""
|
24 |
+
for page_num in range(len(reader.pages)):
|
25 |
+
page = reader.pages[page_num]
|
26 |
+
text += page.extract_text()
|
27 |
+
return text
|
28 |
+
elif file_path.lower().endswith('.docx'):
|
29 |
+
# Handle docx files using python-docx
|
30 |
+
try:
|
31 |
+
doc = Document(file_path)
|
32 |
+
text = ""
|
33 |
+
for paragraph in doc.paragraphs:
|
34 |
+
text += paragraph.text + "\n" # Add newline for paragraph separation
|
35 |
+
return text
|
36 |
+
# Use the imported exception class
|
37 |
+
except PackageNotFoundError:
|
38 |
+
# Provide a more informative error message if the file is not a valid docx
|
39 |
+
raise PackageNotFoundError(f"The file {file_path} is not a valid docx file. It may be corrupted or of a different format.")
|
40 |
+
|
41 |
+
|
42 |
+
api_key_google = os.environ.get('GOOGLE_GEMINI_KEY')
|
43 |
+
genai.configure(api_key=api_key_google)
|
44 |
+
|
45 |
+
|
46 |
+
model = genai.GenerativeModel('gemini-pro')
|
47 |
+
|
48 |
+
def similarity_main(tailored_resume_path, job_description_path):
|
49 |
+
"""
|
50 |
+
Use Gemini Pro to evaluate the relevance score between a tailored resume and job description.
|
51 |
+
Args:
|
52 |
+
- tailored_resume (str): Tailored resume content.
|
53 |
+
- job_description (str): Job description content.
|
54 |
+
Returns:
|
55 |
+
- dict: A dictionary containing the 'score' (scaled to 0–100) and 'reason'.
|
56 |
+
"""
|
57 |
+
|
58 |
+
resume_text = read_file(tailored_resume_path)
|
59 |
+
job_description = read_file(job_description_path)
|
60 |
+
prompt = f"""
|
61 |
+
You are a recruitment expert evaluating how well a tailored resume aligns with a job description. Provide a realistic and concise evaluation based on the following criteria:
|
62 |
+
1. Relevance of skills and experience: Do the candidate’s skills, accomplishments, and experience meet the job's core requirements?
|
63 |
+
2. Domain Match: Are the candidate's experiences and achievements relevant to the industry or role?
|
64 |
+
3. Clarity and Conciseness: Is the resume well-structured and focused on the job requirements?
|
65 |
+
4. Highlight any gaps or mismatched qualifications realistically.
|
66 |
+
Provide your response in this exact format:
|
67 |
+
Score: [Score between 0 and 1]
|
68 |
+
Reason: [One or two sentences explaining the score]
|
69 |
+
Here is the tailored resume:
|
70 |
+
[Resume Start]
|
71 |
+
{resume_text}
|
72 |
+
[Resume End]
|
73 |
+
And the job description below:
|
74 |
+
[Job Description Start]
|
75 |
+
{job_description}
|
76 |
+
[Job Description End]
|
77 |
+
"""
|
78 |
+
|
79 |
+
try:
|
80 |
+
# Get the response from Gemini Pro
|
81 |
+
response = model.generate_content(prompt)
|
82 |
+
candidates = response.candidates
|
83 |
+
if not candidates or len(candidates) == 0:
|
84 |
+
raise ValueError("No candidates found in the response.")
|
85 |
+
|
86 |
+
# Extract content text
|
87 |
+
content_text = candidates[0].content.parts[0].text
|
88 |
+
print(f"Response from Gemini Pro:\n{content_text}") # Debugging
|
89 |
+
|
90 |
+
# Extract score and reason with simple parsing
|
91 |
+
lines = content_text.split("\n")
|
92 |
+
score = None
|
93 |
+
reason = None
|
94 |
+
|
95 |
+
for line in lines:
|
96 |
+
if line.lower().startswith("score:"):
|
97 |
+
try:
|
98 |
+
score = float(line.split(":", 1)[1].strip()) # Scale to 0–100
|
99 |
+
score = f"{score * 100:.2f}%"
|
100 |
+
except ValueError:
|
101 |
+
raise ValueError(f"Invalid score format: {line}")
|
102 |
+
elif line.lower().startswith("reason:"):
|
103 |
+
reason = line.split(":", 1)[1].strip()
|
104 |
+
|
105 |
+
# Ensure both score and reason are extracted
|
106 |
+
if score is None:
|
107 |
+
raise ValueError("Failed to extract score from the response.")
|
108 |
+
if not reason:
|
109 |
+
reason = "No reason provided."
|
110 |
+
|
111 |
+
return {"score": score, "reason": reason}
|
112 |
+
|
113 |
+
except Exception as e:
|
114 |
+
print(f"Error in relevance checking: {e}")
|
115 |
+
return None
|