Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from transformers import pipeline | |
from sentence_transformers import SentenceTransformer, util | |
from langdetect import detect | |
#information about job descriptions | |
data = { | |
'Company': ['Google', 'Amazon', 'Microsoft', 'Facebook', 'Tesla'], | |
'Job_Description': [ | |
"We are looking for a Senior Software Engineer with extensive experience in Python, Java, and cloud computing. The candidate should have experience working in an Agile environment and a deep understanding of machine learning.", | |
"The Data Analyst will analyze large datasets to uncover trends, patterns, and insights. Proficiency in SQL, Python, and data visualization tools like PowerBI or Tableau is required.", | |
"Hiring a Cloud Architect with experience in Azure, AWS, and cloud infrastructure design. The ideal candidate should have experience with Docker, Kubernetes, and network security.", | |
"AI Research Scientist with expertise in machine learning, deep learning, and natural language processing (NLP). Experience with TensorFlow, PyTorch, and data-driven research.", | |
"Looking for an Electrical Engineer with experience in circuit design, power electronics, and embedded systems. Proficiency in CAD tools and simulation software is a must." | |
] | |
} | |
# Load the job descriptions into a pandas DataFrame | |
df = pd.DataFrame(data) | |
# Load the Hugging Face model for semantic similarity | |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M") | |
lang_code_mapping = { | |
'ar': 'arb_Arab', # Arabic | |
'fr': 'fra_Latn', # French | |
'es': 'spa_Latn', # Spanish | |
'de': 'deu_Latn', # German | |
} | |
# Take CV and translate it to English if not in English to compare to job descriptions and return English CV and detected language | |
def translate_to_english(cv_text): | |
detected_lang = lang_code_mapping.get(detect(cv_text), "eng_Latn") | |
# Translate to English if not already in English | |
if detected_lang != 'eng_Latn': | |
translation = translator(cv_text, src_lang=detected_lang, tgt_lang="eng_Latn")[0]['translation_text'] | |
return translation, detected_lang | |
else: | |
return cv_text, detected_lang | |
#if an entered cv is not in English return the job description in the entered cv language | |
def translate_job_description_if_needed(job_description, target_lang): | |
if target_lang != 'en': | |
return translator(job_description, src_lang="eng_Latn", tgt_lang=target_lang)[0]['translation_text'] | |
return job_description | |
# Function to find top 3 job descriptions matching the CV using semantic similarity | |
def find_top_matches(cv_text): | |
if not cv_text: | |
return "Error: CV is empty", None | |
# Translate CV to English if it contains Arabic text | |
cv_text, detected_lang = translate_to_english(cv_text) | |
# Get job descriptions from the DataFrame | |
descriptions = df['Job_Description'].tolist() | |
# Encode both the CV and job descriptions | |
descriptions_embeddings = model.encode(descriptions, convert_to_tensor=True) | |
cv_embedding = model.encode([cv_text], convert_to_tensor=True) | |
# Calculate cosine similarities between the CV and all job descriptions | |
similarities = util.pytorch_cos_sim(cv_embedding, descriptions_embeddings)[0] | |
# Get the top 3 matches based on similarity scores | |
top_3_indices = similarities.argsort(descending=True)[:3] # Get the indices of the top 3 matches | |
top_3_matches = df.iloc[top_3_indices] | |
top_3_similarities = similarities[top_3_indices].numpy() | |
#create vertical bar | |
plt.bar(top_3_matches['Company'], top_3_similarities, color='skyblue') | |
# Set the labels and title | |
plt.ylabel('Similarity Score') | |
plt.xlabel('Company') | |
plt.title('Top 3 Job Description Matches') | |
# Create a detailed summary for the top 3 job descriptions | |
job_summaries = "" | |
for _, row in top_3_matches.iterrows(): | |
# Translate job description if the detected language is not English | |
job_desc_translated = translate_job_description_if_needed(row['Job_Description'], detected_lang) | |
# Show job description only in the detected language if it's not English | |
job_summaries += f"<strong>Company:</strong> {row['Company']}<br>" | |
job_summaries += f"<strong>Job Description :</strong> {job_desc_translated}<br><br>" | |
return job_summaries, plt | |
# Define Gradio interface | |
demo = gr.Interface( | |
fn=find_top_matches, | |
inputs=gr.Textbox(lines=15, placeholder="Enter your CV text here...", label="CV Text"), | |
outputs=[ | |
gr.HTML(label="Job Summaries"), | |
gr.Plot(label="Top 3 Matching Job Descriptions") | |
], | |
title="'Match CV to Job Description", | |
description="Upload your CV to find the top 3 job descriptions that match from the available companies using semantic similarity." | |
) | |
# Launch the Gradio interface in Colab | |
demo.launch() |