|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
import numpy as np |
|
import pickle |
|
|
|
|
|
|
|
|
|
|
|
|
|
examples = [ |
|
"High proficiency in Python and AI/ML frameworks, i.e. Pytorch.", |
|
"Experience with Unreal and/or Unity and/or native IOS/Android 3D development", |
|
] |
|
|
|
|
|
def aggregate_span(results): |
|
new_results = [] |
|
current_result = results[0] |
|
|
|
for result in results[1:]: |
|
if result["start"] == current_result["end"] + 1: |
|
current_result["word"] += " " + result["word"] |
|
current_result["end"] = result["end"] |
|
else: |
|
new_results.append(current_result) |
|
current_result = result |
|
|
|
new_results.append(current_result) |
|
|
|
return new_results |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
def load_skills_from_date(base_folder, date): |
|
date_folder = os.path.join(base_folder, date) |
|
all_skills = set() |
|
if os.path.exists(date_folder) and os.path.isdir(date_folder): |
|
for file_name in os.listdir(date_folder): |
|
file_path = os.path.join(date_folder, file_name) |
|
if file_name.endswith(".txt"): |
|
with open(file_path, 'r', encoding='utf-8') as f: |
|
all_skills.update(line.strip() for line in f if line.strip()) |
|
return list(all_skills) |
|
|
|
def visualize3D(reduced_embeddings, labels, skills, n_clusters, output_folder, date): |
|
|
|
fig = px.scatter_3d( |
|
x=reduced_embeddings[:, 0], |
|
y=reduced_embeddings[:, 1], |
|
z=reduced_embeddings[:, 2], |
|
color=labels, |
|
text=skills, |
|
title=f"KMeans Clustering with {n_clusters} Clusters ({date})" |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
import plotly.express as px |
|
import numpy as np |
|
|
|
specific_date = "03-01-2024" |
|
skills = load_skills_from_date('./tags', specific_date) |
|
embeddings = np.load(f"./vectorstore/{specific_date}_embeddings.npy") |
|
with open(f"./vectorstore/{specific_date}_metadata.pkl", "rb") as f: |
|
metadata = pickle.load(f) |
|
labels, skills = metadata["labels"], metadata["skills"] |
|
fig = visualize3D(embeddings, labels, skills, n_clusters=5, output_folder="./plots", date=specific_date) |
|
fig.update_layout( |
|
height=900 |
|
) |
|
|
|
with gr.Blocks() as demo: |
|
|
|
gr.Markdown("# 3D Visualization of Skills in ML Job Postings", elem_id="title") |
|
|
|
gr.Plot(fig) |
|
|
|
|
|
|
|
demo.launch() |