Spaces:

Nucha
/

NuchaITSkillNER

Sleeping

File size: 5,319 Bytes

c15480b
8ff3ba1
7abeff1
8c3e6c9
6c2499f
8ff3ba1
 
 
8c3e6c9
c616fef
8ff3ba1
36fd8c7
 
cb27ba4
385f7b0
36fd8c7
 
2a9db15
 
 
 
 
 
cb27ba4
2a9db15
cb27ba4
 
2a9db15
cb27ba4
 
 
 
2a9db15
 
 
cb27ba4
2a9db15
 
 
 
 
 
53e75d7
cb27ba4
 
5481c65
 
53e75d7
5481c65
 
 
0ded3c8
5481c65
 
53e75d7
 
 
 
 
cb27ba4
53e75d7
 
5481c65
 
 
53e75d7
7faf30e
53e75d7
2a9db15
f310734
 
c15480b
7d1e8e1
8ff3ba1
827a18f
 
2be6d6d
 
 
 
cb27ba4
d1e7bdd
a0d334a
 
 
 
 
 
b9eee80
a0d334a
 
ccba021
d1e7bdd
 
cb27ba4
 
827a18f
6b5852a
827a18f
6f6166a
156ce5d
7faf30e
 
 
 
2a9db15
 
cb27ba4
7faf30e
2a9db15
 
cb27ba4
b9eee80
cb27ba4
 
f5cdda6
7faf30e
 
cdd6218
f5cdda6
cdd6218
 
 
 
 
f5cdda6

import streamlit as st
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# โหลด Tokenizer และ Model
model_name = "Nucha/Nucha_ITSkillNER_BERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# สร้าง NER Pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

# กำหนดสีของ Entity แต่ละประเภท
ENTITY_COLORS = {
    "HSKILL": "#FFD700",  # สีทอง
    "SSKILL": "#87CEFA",  # สีฟ้าอ่อน
}

# ฟังก์ชันรวม Entity (B-* และ I-*)
def merge_entities(entities):
    merged = []
    current_entity = None

    for entity in entities:
        word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]

        if entity["entity"].startswith("B-"):  # ถ้าเป็น Entity ตัวแรก
            if current_entity:
                merged.append(current_entity)
            current_entity = {"word": word, "entity": label, "start": start, "end": end}
        elif entity["entity"].startswith("I-") and current_entity:  # ถ้าเป็น Entity ที่ต่อเนื่อง
            current_entity["word"] += " " + word
            current_entity["end"] = end  # ปรับตำแหน่ง end ใหม่
        else:
            if current_entity:
                merged.append(current_entity)
            current_entity = {"word": word, "entity": label, "start": start, "end": end}

    if current_entity:
        merged.append(current_entity)

    return merged

# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
def generate_ner_html(text, entities):
    all_entities = sorted(entities, key=lambda e: e["start"])  # เรียงตามตำแหน่งเริ่มต้น

    last_idx = 0
    annotated_text = ""

    for entity in all_entities:
        start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
        color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS[entity_type])

        # เพิ่มข้อความก่อน Entity
        annotated_text += text[last_idx:start]

        # เพิ่ม Entity แบบมีไฮไลต์และ Tagging
        annotated_text += f'''
            <span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
                {word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
            </span>
        '''
        last_idx = end  # อัปเดตตำแหน่งล่าสุด

    # เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย
    annotated_text += text[last_idx:]

    return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'

st.set_page_config(layout="wide")

# UI ด้วย Streamlit
col1, col2, col3 = st.columns([1, 1, 1])

with col1:
    st.header("Input")
    
    # แสดงผล Named Entity Annotation
    st.markdown("##### Named Entity Annotation (Tagging)")

    default_text = """Job Description:
We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients.
Key Responsibilities:
Develop high-quality software design and architecture
Identify, prioritize, and execute tasks in the software development life cycle
Review and debug code
Collaborate with other developers and engineers to ensure software quality
Required Qualifications:
Bachelor s degree in Computer Science or related field
Proven experience as a Software Engineer or similar role
Familiarity with Agile development methodologies
Proficiency in programming languages such as Python or C#
Strong problem-solving skills and the ability to  a team work
Preferred Qualifications: english language communication
"""
    text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
    analyze_button = st.button("Analyze")

with col2:
    st.header("Result")

    if analyze_button:
        ner_results = ner_pipeline(text)

        if ner_results:
            # รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
            merged_entities = merge_entities([
                {"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
                for entity in ner_results
            ])

            # แสดงผล Named Entity Annotation
            st.markdown("##### Named Entity Annotation (Tagging)")
            ner_html = generate_ner_html(text, merged_entities)
            st.markdown(ner_html, unsafe_allow_html=True)
            
        else:
            st.write("No entities found.")
            
with col3:
    st.header("")

    # แสดงข้อมูล Entity ในรูปแบบตาราง
    st.markdown("##### Extracted Entities")
    st.table(merged_entities)