Spaces:
Sleeping
Sleeping
File size: 4,981 Bytes
4f4a4fe 1ca713e b8f8065 1ca713e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from langdetect import detect
#information about job descriptions
data = {
'Company': ['Google', 'Amazon', 'Microsoft', 'Facebook', 'Tesla'],
'Job_Description': [
"We are looking for a Senior Software Engineer with extensive experience in Python, Java, and cloud computing. The candidate should have experience working in an Agile environment and a deep understanding of machine learning.",
"The Data Analyst will analyze large datasets to uncover trends, patterns, and insights. Proficiency in SQL, Python, and data visualization tools like PowerBI or Tableau is required.",
"Hiring a Cloud Architect with experience in Azure, AWS, and cloud infrastructure design. The ideal candidate should have experience with Docker, Kubernetes, and network security.",
"AI Research Scientist with expertise in machine learning, deep learning, and natural language processing (NLP). Experience with TensorFlow, PyTorch, and data-driven research.",
"Looking for an Electrical Engineer with experience in circuit design, power electronics, and embedded systems. Proficiency in CAD tools and simulation software is a must."
]
}
# Load the job descriptions into a pandas DataFrame
df = pd.DataFrame(data)
# Load the Hugging Face model for semantic similarity
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M")
lang_code_mapping = {
'ar': 'arb_Arab', # Arabic
'fr': 'fra_Latn', # French
'es': 'spa_Latn', # Spanish
'de': 'deu_Latn', # German
}
# Take CV and translate it to English if not in English to compare to job descriptions and return English CV and detected language
def translate_to_english(cv_text):
detected_lang = lang_code_mapping.get(detect(cv_text), "eng_Latn")
# Translate to English if not already in English
if detected_lang != 'eng_Latn':
translation = translator(cv_text, src_lang=detected_lang, tgt_lang="eng_Latn")[0]['translation_text']
return translation, detected_lang
else:
return cv_text, detected_lang
#if an entered cv is not in English return the job description in the entered cv language
def translate_job_description_if_needed(job_description, target_lang):
if target_lang != 'en':
return translator(job_description, src_lang="eng_Latn", tgt_lang=target_lang)[0]['translation_text']
return job_description
# Function to find top 3 job descriptions matching the CV using semantic similarity
def find_top_matches(cv_text):
if not cv_text:
return "Error: CV is empty", None
# Translate CV to English if it contains Arabic text
cv_text, detected_lang = translate_to_english(cv_text)
# Get job descriptions from the DataFrame
descriptions = df['Job_Description'].tolist()
# Encode both the CV and job descriptions
descriptions_embeddings = model.encode(descriptions, convert_to_tensor=True)
cv_embedding = model.encode([cv_text], convert_to_tensor=True)
# Calculate cosine similarities between the CV and all job descriptions
similarities = util.pytorch_cos_sim(cv_embedding, descriptions_embeddings)[0]
# Get the top 3 matches based on similarity scores
top_3_indices = similarities.argsort(descending=True)[:3] # Get the indices of the top 3 matches
top_3_matches = df.iloc[top_3_indices]
top_3_similarities = similarities[top_3_indices].numpy()
#create vertical bar
plt.bar(top_3_matches['Company'], top_3_similarities, color='skyblue')
# Set the labels and title
plt.ylabel('Similarity Score')
plt.xlabel('Company')
plt.title('Top 3 Job Description Matches')
# Create a detailed summary for the top 3 job descriptions
job_summaries = ""
for _, row in top_3_matches.iterrows():
# Translate job description if the detected language is not English
job_desc_translated = translate_job_description_if_needed(row['Job_Description'], detected_lang)
# Show job description only in the detected language if it's not English
job_summaries += f"<strong>Company:</strong> {row['Company']}<br>"
job_summaries += f"<strong>Job Description :</strong> {job_desc_translated}<br><br>"
return job_summaries, plt
# Define Gradio interface
demo = gr.Interface(
fn=find_top_matches,
inputs=gr.Textbox(lines=15, placeholder="Enter your CV text here...", label="CV Text"),
outputs=[
gr.HTML(label="Job Summaries"),
gr.Plot(label="Top 3 Matching Job Descriptions")
],
title="'Match CV to Job Description",
description="Upload your CV to find the top 3 job descriptions that match from the available companies using semantic similarity."
)
# Launch the Gradio interface in Colab
demo.launch() |