Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline | |
# โหลด Tokenizer และ Model | |
model_name = "Nucha/Nucha_ITSkillNER_BERT" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForTokenClassification.from_pretrained(model_name) | |
# สร้าง NER Pipeline | |
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer) | |
# กำหนดสีของ Entity แต่ละประเภท | |
ENTITY_COLORS = { | |
"HSKILL": "#FFD700", # สีทอง | |
"SSKILL": "#87CEFA", # สีฟ้าอ่อน | |
} | |
# ฟังก์ชันรวม Entity (B-* และ I-*) | |
def merge_entities(entities): | |
merged = [] | |
current_entity = None | |
for entity in entities: | |
word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"] | |
if entity["entity"].startswith("B-"): # ถ้าเป็น Entity ตัวแรก | |
if current_entity: | |
merged.append(current_entity) | |
current_entity = {"word": word, "entity": label, "start": start, "end": end} | |
elif entity["entity"].startswith("I-") and current_entity: # ถ้าเป็น Entity ที่ต่อเนื่อง | |
current_entity["word"] += " " + word | |
current_entity["end"] = end # ปรับตำแหน่ง end ใหม่ | |
else: | |
if current_entity: | |
merged.append(current_entity) | |
current_entity = {"word": word, "entity": label, "start": start, "end": end} | |
if current_entity: | |
merged.append(current_entity) | |
return merged | |
# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging | |
def generate_ner_html(text, entities): | |
all_entities = sorted(entities, key=lambda e: e["start"]) # เรียงตามตำแหน่งเริ่มต้น | |
last_idx = 0 | |
annotated_text = "" | |
for entity in all_entities: | |
start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"] | |
color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS[entity_type]) | |
# เพิ่มข้อความก่อน Entity | |
annotated_text += text[last_idx:start] | |
# เพิ่ม Entity แบบมีไฮไลต์และ Tagging | |
annotated_text += f''' | |
<span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;"> | |
{word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span> | |
</span> | |
''' | |
last_idx = end # อัปเดตตำแหน่งล่าสุด | |
# เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย | |
annotated_text += text[last_idx:] | |
return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>' | |
st.set_page_config(layout="wide") | |
# UI ด้วย Streamlit | |
col1, col2, col3 = st.columns([1, 1, 1]) | |
with col1: | |
st.header("Input") | |
# แสดงผล Named Entity Annotation | |
st.markdown("##### Named Entity Annotation (Tagging)") | |
default_text = """ | |
Job Description: | |
We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients. | |
Key Responsibilities: | |
Develop high-quality software design and architecture Identify, prioritize, and execute tasks in the software development life cycle Review and debug code Collaborate with other developers and engineers to ensure software quality | |
Required Qualifications: | |
Bachelor s degree in Computer Science or related field Proven experience as a Software Engineer or similar role Familiarity with Agile development methodologies Proficiency in programming languages such as Python or C# Strong problem-solving skills and the ability to a team work Preferred Qualifications: english language communication | |
""" | |
text = st.text_area("Enter text for NER analysis:", value=default_text, height=400) | |
analyze_button = st.button("Analyze") | |
with col2: | |
st.header("Result") | |
# แสดงผล Named Entity Annotation | |
st.markdown("##### Named Entity Annotation (Tagging)") | |
if analyze_button: | |
ner_results = ner_pipeline(text) | |
if ner_results: | |
# รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน | |
merged_entities = merge_entities([ | |
{"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']} | |
for entity in ner_results | |
]) | |
ner_html = generate_ner_html(text, merged_entities) | |
st.markdown(ner_html, unsafe_allow_html=True) | |
else: | |
st.write("No entities found.") | |
with col3: | |
st.header("") | |
# แสดงข้อมูล Entity ในรูปแบบตาราง | |
st.markdown("##### Extracted Entities") | |
if analyze_button: | |
st.table(merged_entities) | |