File size: 2,319 Bytes
c59a66a 762e05d c59a66a 762e05d c59a66a 762e05d c59a66a 762e05d c59a66a 762e05d c59a66a 762e05d c59a66a 762e05d c59a66a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
from transformers import pipeline
from embedding_gen import load_skills_from_date, visualize3D
import numpy as np
import pickle
token_skill_classifier = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
token_knowledge_classifier = pipeline(model="Robzy/jobbert_knowledge_extraction", aggregation_strategy="first")
examples = [
"High proficiency in Python and AI/ML frameworks, i.e. Pytorch.",
"Experience with Unreal and/or Unity and/or native IOS/Android 3D development",
]
def aggregate_span(results):
new_results = []
current_result = results[0]
for result in results[1:]:
if result["start"] == current_result["end"] + 1:
current_result["word"] += " " + result["word"]
current_result["end"] = result["end"]
else:
new_results.append(current_result)
current_result = result
new_results.append(current_result)
return new_results
def ner(text):
output_knowledge = token_knowledge_classifier(text)
for result in output_knowledge:
if result.get("entity_group"):
result["entity"] = "Knowledge"
del result["entity_group"]
if len(output_knowledge) > 0:
output_knowledge = aggregate_span(output_knowledge)
return {"text": text, "entities": output_knowledge}
import plotly.express as px
import numpy as np
specific_date = "03-01-2024" # Example date folder to process
skills = load_skills_from_date('./tags', specific_date)
embeddings = np.load(f"./vectorstore/{specific_date}_embeddings.npy")
with open(f"./vectorstore/{specific_date}_metadata.pkl", "rb") as f:
metadata = pickle.load(f)
labels, skills = metadata["labels"], metadata["skills"]
fig = visualize3D(embeddings, labels, skills, n_clusters=5, output_folder="./plots", date=specific_date)
fig.update_layout(
height=900
)
with gr.Blocks() as demo:
gr.Interface(fn=ner,
inputs=gr.Textbox(placeholder="Enter sentence here..."),
outputs=["highlight"],
examples=examples,
title="In-demand skills in machine learning (ML) industry"
)
# gr.Markdown("Embedding visualisation of sought skills in ML job posting in Stockholm, Sweden on LinkedIn")
gr.Plot(fig)
demo.launch() |