resumeMagic

Sleeping

App Files Files Community

Anushkabhat9 commited on Nov 8, 2024

Commit

de16466

verified ·

1 Parent(s): 9b9d05d

Upload 2 files

Browse files

Files changed (2) hide show

requirements.txt +4 -1
similarity_score_refined.py +144 -0

requirements.txt CHANGED Viewed

@@ -5,4 +5,7 @@ langchain_google_genai
 python-docx
 docx2txt
 faiss-gpu
-google-generativeai

 python-docx
 docx2txt
 faiss-gpu
+google-generativeai
+sentence_transformers
+Transformers
+openai

similarity_score_refined.py ADDED Viewed

	@@ -0,0 +1,144 @@

+# -*- coding: utf-8 -*-
+"""Similarity_score_refined (2).ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1c8mlCBnLbduLsI8rUGFEOYDuyBqdz2JJ
+"""
+# !pip install sentence_transformers
+# !pip install openai==0.28
+# !pip install docx2txt PyPDF2 transformers
+# from google.colab import drive,userdata
+# drive.mount("/content/drive")
+# print("Google Drive mounted.")
+import re
+  from sklearn.feature_extraction.text import TfidfVectorizer
+  from nltk.corpus import stopwords
+  from nltk.stem import WordNetLemmatizer
+# Ensure you have downloaded stopwords and wordnet
+  import nltk
+  nltk.download('stopwords')
+  nltk.download('wordnet')
+def extract_text(file_path):
+    import docx2txt
+    if file_path.endswith(".docx"):
+        # Extract text from DOCX file
+        return docx2txt.process(file_path)
+    elif file_path.endswith(".pdf"):
+        # Extract text from PDF file
+        text = ""
+        with open(file_path, 'rb') as file:
+            reader = PyPDF2.PdfReader(file)
+            for page_num in range(len(reader.pages)):
+                text += reader.pages[page_num].extract_text()
+        return text
+    else:
+        raise ValueError("Unsupported file type")
+def preprocess(text):
+    # Lowercase the text
+    text = text.lower()
+    # Remove special characters and numbers
+    text = re.sub(r'[^a-z\s]', '', text)
+    # Tokenize the text by splitting on whitespace
+    words = text.split()
+    # Remove stop words
+    stop_words = set(stopwords.words('english'))
+    words = [word for word in words if word not in stop_words]
+    # Lemmatize the words (to get root form)
+    lemmatizer = WordNetLemmatizer()
+    words = [lemmatizer.lemmatize(word) for word in words]
+    # Join words back into a single string
+    return ' '.join(words)
+def calculate_tfidf(doc):
+    vectorizer = TfidfVectorizer()
+    tfidf_matrix = vectorizer.fit_transform([doc])  # Only fit on the individual document
+    feature_names = vectorizer.get_feature_names_out()
+    dense_tfidf_matrix = tfidf_matrix.todense()
+    # Extract important terms from the document with a threshold
+    important_terms = [feature_names[i] for i in range(len(feature_names)) if dense_tfidf_matrix[0, i] > 0.2]
+    return ' '.join(important_terms)
+def call_chatgpt_api(prompt, api_key,model="gpt-3.5-turbo"):
+    import openai
+    openai.api_key = userdata.get('OPEN_API_KEY')
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=500,
+        temperature= 0,
+        top_p=1,
+        frequency_penalty= 0,
+        presence_penalty= 0
+    )
+    return response['choices'][0]['message']['content'].strip()
+def calculate_similarity(resume, job_desc, model_name="sentence-transformers/all-MiniLM-L6-v2"):
+    from sentence_transformers import SentenceTransformer, util
+    model = SentenceTransformer(model_name)
+    # Convert texts to embeddings
+    embeddings1 = model.encode(resume, convert_to_tensor=True)
+    embeddings2 = model.encode(job_desc, convert_to_tensor=True)
+    # Calculate cosine similarity
+    similarity_score = util.pytorch_cos_sim(embeddings1, embeddings2)
+    return similarity_score.item()  # return as a scalar
+def similarity_main(resume_path,job_description_path):
+    # Extract text from files (replace with actual file paths)
+    Resume_text = extract_text(resume_path)
+    job_des = extract_text(job_description_path)
+    api_key='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'
+    prompt=f"Extract the skills or competencies section from the resume. Avoid using name of the candidate:\n\n{Resume_text}"
+    resume_skills = call_chatgpt_api(prompt,api_key)
+    experience_prompt = f"Extract the experience of the candidate from the resume. Avoid using name of the candidate:\n\n{Resume_text}"
+    resume_experience = call_chatgpt_api(experience_prompt,api_key)
+    # Extract sections from job description (JD)
+    jd_skills_prompt = f"Extract the skills section from the job description:\n\n{job_des}"
+    jd_skills = call_chatgpt_api(jd_skills_prompt,api_key)
+    jd_experience_prompt = f"Extract the experience section from the job description:\n\n{job_des}"
+    jd_experience = call_chatgpt_api(jd_experience_prompt,api_key)
+    resume_skills_clean = preprocess(resume_skills)
+    jd_skills_clean = preprocess(jd_skills)
+    resume_experience_clean = preprocess(resume_experience)
+    jd_experience_clean = preprocess(jd_experience)
+    filtered_resume = calculate_tfidf(resume_skills_clean)
+    filtered_jd = calculate_tfidf(jd_skills_clean)
+    similarity_skills=calculate_similarity(filtered_resume,filtered_jd)
+    filtered_resume_ex = calculate_tfidf(resume_experience_clean)
+    filtered_jd_ex = calculate_tfidf(jd_experience_clean)
+    similarity_ex=calculate_similarity(filtered_resume_ex,filtered_jd_ex)
+    Average_Score=(similarity_skills+similarity_ex)/2
+    percentage= f"{Average_Score * 100:.2f}%"
+    print(percentage)