File size: 831 Bytes
13b4b59
 
 
 
383af42
724a34b
383af42
 
13b4b59
 
eabfbb5
13b4b59
 
bec338a
13b4b59
 
efb4b53
bec338a
cd4a4f2
bec338a
13b4b59
bec338a
13b4b59
 
eabfbb5
13b4b59
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import spacy
import streamlit as st
from spacy_streamlit import visualize_ner

try:
    spacy.load("en_core_web_sm")
except:
    spacy.cli.download("en_core_web_sm")

st.write("""
# Extract the tech keywords on Data Science jobs using NER.
""")


nlp = spacy.load("en_core_web_sm")
ruler = nlp.add_pipe("entity_ruler", before="ner")
ruler.from_disk("patterns.jsonl")

description = "Built with love ๐Ÿ’™ Python, Spacy, Streamlit and Huggingface ๐Ÿค—"
text = st.text_area(label='Job Description', value=description, placeholder='Please enter a job description')

doc = nlp(text)
visualize_ner(doc, labels=nlp.get_pipe("ner").labels)

st.subheader('Tech Keywords')
st.json(
    [{'label': entity.label_, 'text': entity.text, 'start': entity.start, 'end': entity.end} \
        for entity in doc.ents if entity.ent_id_ == 'SKILLS']
)