Spaces:
Sleeping
Sleeping
File size: 4,708 Bytes
ce00033 153302b ce00033 153302b ce00033 153302b ce00033 153302b ce00033 153302b ce00033 153302b ce00033 9ae6215 ce00033 5547efc ce00033 49bc227 ce00033 c77ff37 ce00033 49bc227 ce00033 ee11a28 ce00033 ee11a28 ce00033 ee11a28 ce00033 49bc227 ce00033 ee11a28 ce00033 153302b ce00033 ee11a28 80bcaed ce00033 689007d ce00033 ee11a28 689007d ce00033 ee11a28 ce00033 c77ff37 ce00033 ee11a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
from docx.opc.exceptions import PackageNotFoundError
def read_file(file_path):
"""
Reads the content of a file. If the file is a PDF, it extracts the text using PyPDF2.
If the file is a docx, it extracts the text using python-docx.
Otherwise, it reads the file as a text file, trying different encodings if 'utf-8' fails.
"""
# Check if the file exists before proceeding
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
if file_path.lower().endswith('.pdf'):
with open(file_path, 'rb') as file: # Open in binary read mode for PDFs
reader = PyPDF2.PdfReader(file)
text = ""
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text += page.extract_text()
return text
elif file_path.lower().endswith('.docx'):
# Handle docx files using python-docx
try:
doc = Document(file_path)
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + "\n" # Add newline for paragraph separation
return text
# Use the imported exception class
except PackageNotFoundError:
# Provide a more informative error message if the file is not a valid docx
raise PackageNotFoundError(f"The file {file_path} is not a valid docx file. It may be corrupted or of a different format.")
import os
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_community.vectorstores.faiss import FAISS
from docx import Document
import google.generativeai as genai
import PyPDF2
api_key_google = os.environ.get('GOOGLE_GEMINI_KEY')
genai.configure(api_key=api_key_google)
# Mount Google Drive
model = genai.GenerativeModel('gemini-pro')
def similarity_main(tailored_resume_path, job_description_path):
"""
Use Gemini Pro to evaluate the relevance score between a tailored resume and job description.
Args:
- tailored_resume (str): Tailored resume content.
- job_description (str): Job description content.
Returns:
- dict: A dictionary containing the 'score' (scaled to 0–100) and 'reason'.
"""
resume_text = read_file(tailored_resume_path)
job_description = read_file(job_description_path)
prompt = f"""
You are a recruitment expert evaluating how well a tailored resume aligns with a job description. Provide a realistic and concise evaluation based on the following criteria:
1. Relevance of skills and experience: Do the candidate’s skills, accomplishments, and experience meet the job's core requirements?
2. Domain Match: Are the candidate's experiences and achievements relevant to the industry or role?
3. Clarity and Conciseness: Is the resume focused on the job requirements?
4. Highlight any gaps or mismatched qualifications realistically.
Provide your response in this exact format and make sure that score is a floating point number.
Score: [Score between 0 and 1]
Reason: [One or two sentences explaining the score]
Here is the tailored resume:
[Resume Start]
{resume_text}
[Resume End]
And the job description below:
[Job Description Start]
{job_description}
[Job Description End]
"""
try:
# Get the response from Gemini Pro
response = model.generate_content(prompt,generation_config={"temperature": 0.0})
candidates = response.candidates
if not candidates or len(candidates) == 0:
raise ValueError("No candidates found in the response.")
# Extract content text
content_text = candidates[0].content.parts[0].text
# Extract score and reason with simple parsing
lines = content_text.split("\n")
score = None
reason = None
for line in lines:
if line.lower().startswith("score:"):
try:
line_cleaned = line.replace("**", "").strip()
score = float(line_cleaned.split(":", 1)[1].strip()) * 100
score = round(score, 2)
except ValueError:
raise ValueError(f"Invalid score format: {line}")
elif line.lower().startswith("reason:"):
reason = line.replace("**", "").split(":", 1)[1].strip()
# Ensure both score and reason are extracted
if score is None:
raise ValueError("Failed to extract score from the response.")
if not reason:
reason = "No reason provided."
return {"score": score, "reason": reason}
except Exception as e:
print(f"Error in relevance checking: {e}")
return None |