Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,31 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
|
|
3 |
|
4 |
-
# โหลด Tokenizer และ Model
|
5 |
model_name = "Nucha/Nucha_ITSkillNER_BERT"
|
|
|
|
|
6 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
7 |
-
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
8 |
|
9 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
|
11 |
|
12 |
-
#
|
13 |
ENTITY_COLORS = {
|
14 |
-
"HSKILL": "#FFD700", #
|
15 |
-
"SSKILL": "#87CEFA", #
|
16 |
}
|
17 |
|
18 |
-
# ฟังก์ชันรวม Entity (B-* และ I-*)
|
19 |
def merge_entities(entities):
|
20 |
merged = []
|
21 |
current_entity = None
|
@@ -23,13 +33,13 @@ def merge_entities(entities):
|
|
23 |
for entity in entities:
|
24 |
word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
|
25 |
|
26 |
-
if entity["entity"].startswith("B-"):
|
27 |
if current_entity:
|
28 |
merged.append(current_entity)
|
29 |
current_entity = {"word": word, "entity": label, "start": start, "end": end}
|
30 |
-
elif entity["entity"].startswith("I-") and current_entity:
|
31 |
current_entity["word"] += " " + word
|
32 |
-
current_entity["end"] = end
|
33 |
else:
|
34 |
if current_entity:
|
35 |
merged.append(current_entity)
|
@@ -40,85 +50,59 @@ def merge_entities(entities):
|
|
40 |
|
41 |
return merged
|
42 |
|
43 |
-
# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
|
44 |
def generate_ner_html(text, entities):
|
45 |
-
all_entities = sorted(entities, key=lambda e: e["start"])
|
46 |
-
|
47 |
last_idx = 0
|
48 |
annotated_text = ""
|
49 |
|
50 |
for entity in all_entities:
|
51 |
start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
|
52 |
-
color = ENTITY_COLORS.get(entity_type,
|
53 |
|
54 |
-
# เพิ่มข้อความก่อน Entity
|
55 |
annotated_text += text[last_idx:start]
|
56 |
-
|
57 |
-
# เพิ่ม Entity แบบมีไฮไลต์และ Tagging
|
58 |
annotated_text += f'''
|
59 |
<span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
|
60 |
{word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
|
61 |
</span>
|
62 |
'''
|
63 |
-
last_idx = end
|
64 |
|
65 |
-
# เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย
|
66 |
annotated_text += text[last_idx:]
|
67 |
-
|
68 |
return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
|
69 |
|
70 |
st.set_page_config(layout="wide")
|
71 |
|
72 |
-
# UI ด้วย Streamlit
|
73 |
col1, col2, col3 = st.columns([1, 1, 1])
|
74 |
|
75 |
with col1:
|
76 |
st.header("Input")
|
77 |
-
|
78 |
-
# แสดงผล Named Entity Annotation
|
79 |
st.markdown("##### Job Description")
|
80 |
|
81 |
default_text = """
|
82 |
Job Description:
|
83 |
-
We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc
|
84 |
-
Key Responsibilities:
|
85 |
-
Develop high-quality software design and architecture Identify, prioritize, and execute tasks in the software development life cycle Review and debug code Collaborate with other developers and engineers to ensure software quality
|
86 |
-
Required Qualifications:
|
87 |
-
Bachelor s degree in Computer Science or related field Proven experience as a Software Engineer or similar role Familiarity with Agile development methodologies Proficiency in programming languages such as Python or C# Strong problem-solving skills and the ability to a team work Preferred Qualifications: english language communication
|
88 |
"""
|
89 |
text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
|
90 |
analyze_button = st.button("Analyze")
|
91 |
|
92 |
with col2:
|
93 |
st.header("Result")
|
94 |
-
|
95 |
-
# แสดงผล Named Entity Annotation
|
96 |
st.markdown("##### Named Entity Annotation (Tagging)")
|
97 |
|
98 |
if analyze_button:
|
99 |
ner_results = ner_pipeline(text)
|
100 |
-
|
101 |
if ner_results:
|
102 |
-
# รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
|
103 |
merged_entities = merge_entities([
|
104 |
{"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
|
105 |
for entity in ner_results
|
106 |
])
|
107 |
-
|
108 |
-
|
109 |
ner_html = generate_ner_html(text, merged_entities)
|
110 |
st.markdown(ner_html, unsafe_allow_html=True)
|
111 |
-
|
112 |
else:
|
113 |
st.write("No entities found.")
|
114 |
-
|
115 |
with col3:
|
116 |
st.header("")
|
117 |
-
|
118 |
-
# แสดงข้อมูล Entity ในรูปแบบตาราง
|
119 |
st.markdown("##### Extracted Entities")
|
120 |
-
|
121 |
if analyze_button:
|
122 |
st.table(merged_entities)
|
123 |
-
|
124 |
-
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
3 |
+
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
4 |
|
|
|
5 |
model_name = "Nucha/Nucha_ITSkillNER_BERT"
|
6 |
+
|
7 |
+
# โหลด tokenizer
|
8 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
9 |
|
10 |
+
# โหลด model แบบ meta และโหลด weights อย่างถูกต้อง
|
11 |
+
with init_empty_weights():
|
12 |
+
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
13 |
+
|
14 |
+
model = load_checkpoint_and_dispatch(
|
15 |
+
model,
|
16 |
+
model_name,
|
17 |
+
device_map="auto"
|
18 |
+
)
|
19 |
+
|
20 |
+
# สร้าง NER pipeline
|
21 |
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
|
22 |
|
23 |
+
# Mapping สีของ Entity
|
24 |
ENTITY_COLORS = {
|
25 |
+
"HSKILL": "#FFD700", # ทักษะเชิงเทคนิค
|
26 |
+
"SSKILL": "#87CEFA", # ทักษะเชิงอ่อน
|
27 |
}
|
28 |
|
|
|
29 |
def merge_entities(entities):
|
30 |
merged = []
|
31 |
current_entity = None
|
|
|
33 |
for entity in entities:
|
34 |
word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
|
35 |
|
36 |
+
if entity["entity"].startswith("B-"):
|
37 |
if current_entity:
|
38 |
merged.append(current_entity)
|
39 |
current_entity = {"word": word, "entity": label, "start": start, "end": end}
|
40 |
+
elif entity["entity"].startswith("I-") and current_entity:
|
41 |
current_entity["word"] += " " + word
|
42 |
+
current_entity["end"] = end
|
43 |
else:
|
44 |
if current_entity:
|
45 |
merged.append(current_entity)
|
|
|
50 |
|
51 |
return merged
|
52 |
|
|
|
53 |
def generate_ner_html(text, entities):
|
54 |
+
all_entities = sorted(entities, key=lambda e: e["start"])
|
|
|
55 |
last_idx = 0
|
56 |
annotated_text = ""
|
57 |
|
58 |
for entity in all_entities:
|
59 |
start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
|
60 |
+
color = ENTITY_COLORS.get(entity_type, "#D3D3D3")
|
61 |
|
|
|
62 |
annotated_text += text[last_idx:start]
|
|
|
|
|
63 |
annotated_text += f'''
|
64 |
<span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
|
65 |
{word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
|
66 |
</span>
|
67 |
'''
|
68 |
+
last_idx = end
|
69 |
|
|
|
70 |
annotated_text += text[last_idx:]
|
|
|
71 |
return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
|
72 |
|
73 |
st.set_page_config(layout="wide")
|
74 |
|
|
|
75 |
col1, col2, col3 = st.columns([1, 1, 1])
|
76 |
|
77 |
with col1:
|
78 |
st.header("Input")
|
|
|
|
|
79 |
st.markdown("##### Job Description")
|
80 |
|
81 |
default_text = """
|
82 |
Job Description:
|
83 |
+
We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc...
|
|
|
|
|
|
|
|
|
84 |
"""
|
85 |
text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
|
86 |
analyze_button = st.button("Analyze")
|
87 |
|
88 |
with col2:
|
89 |
st.header("Result")
|
|
|
|
|
90 |
st.markdown("##### Named Entity Annotation (Tagging)")
|
91 |
|
92 |
if analyze_button:
|
93 |
ner_results = ner_pipeline(text)
|
|
|
94 |
if ner_results:
|
|
|
95 |
merged_entities = merge_entities([
|
96 |
{"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
|
97 |
for entity in ner_results
|
98 |
])
|
|
|
|
|
99 |
ner_html = generate_ner_html(text, merged_entities)
|
100 |
st.markdown(ner_html, unsafe_allow_html=True)
|
|
|
101 |
else:
|
102 |
st.write("No entities found.")
|
103 |
+
|
104 |
with col3:
|
105 |
st.header("")
|
|
|
|
|
106 |
st.markdown("##### Extracted Entities")
|
|
|
107 |
if analyze_button:
|
108 |
st.table(merged_entities)
|
|
|
|