Spaces:

Nucha
/

NuchaITSkillNER

Sleeping

File size: 5,390 Bytes

c15480b
8ff3ba1
7abeff1
8c3e6c9
6c2499f
8ff3ba1
 
 
8c3e6c9
c616fef
8ff3ba1
36fd8c7
 
cb27ba4
385f7b0
36fd8c7
 
2a9db15
 
 
 
 
 
cb27ba4
2a9db15
cb27ba4
 
2a9db15
cb27ba4
 
 
 
2a9db15
 
 
cb27ba4
2a9db15
 
 
 
 
 
53e75d7
cb27ba4
 
5481c65
 
53e75d7
5481c65
 
 
0ded3c8
5481c65
 
53e75d7
 
 
 
 
cb27ba4
53e75d7
 
5481c65
 
 
53e75d7
7faf30e
53e75d7
2a9db15
f310734
 
c15480b
7d1e8e1
8ff3ba1
827a18f
 
2be6d6d
 
 
 
f4abe81
 
 
 
ef36361
f4abe81
ef36361
f4abe81
cb27ba4
827a18f
6b5852a
827a18f
6f6166a
156ce5d
06b9ec4
 
 
7faf30e
 
 
 
2a9db15
 
cb27ba4
7faf30e
2a9db15
 
06b9ec4
cb27ba4
 
f5cdda6
7faf30e
 
cdd6218
f5cdda6
cdd6218
 
 
 
325248e
 
 
f5cdda6

import streamlit as st
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# โหลด Tokenizer และ Model
model_name = "Nucha/Nucha_ITSkillNER_BERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# สร้าง NER Pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

# กำหนดสีของ Entity แต่ละประเภท
ENTITY_COLORS = {
    "HSKILL": "#FFD700",  # สีทอง
    "SSKILL": "#87CEFA",  # สีฟ้าอ่อน
}

# ฟังก์ชันรวม Entity (B-* และ I-*)
def merge_entities(entities):
    merged = []
    current_entity = None

    for entity in entities:
        word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]

        if entity["entity"].startswith("B-"):  # ถ้าเป็น Entity ตัวแรก
            if current_entity:
                merged.append(current_entity)
            current_entity = {"word": word, "entity": label, "start": start, "end": end}
        elif entity["entity"].startswith("I-") and current_entity:  # ถ้าเป็น Entity ที่ต่อเนื่อง
            current_entity["word"] += " " + word
            current_entity["end"] = end  # ปรับตำแหน่ง end ใหม่
        else:
            if current_entity:
                merged.append(current_entity)
            current_entity = {"word": word, "entity": label, "start": start, "end": end}

    if current_entity:
        merged.append(current_entity)

    return merged

# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
def generate_ner_html(text, entities):
    all_entities = sorted(entities, key=lambda e: e["start"])  # เรียงตามตำแหน่งเริ่มต้น

    last_idx = 0
    annotated_text = ""

    for entity in all_entities:
        start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
        color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS[entity_type])

        # เพิ่มข้อความก่อน Entity
        annotated_text += text[last_idx:start]

        # เพิ่ม Entity แบบมีไฮไลต์และ Tagging
        annotated_text += f'''
            <span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
                {word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
            </span>
        '''
        last_idx = end  # อัปเดตตำแหน่งล่าสุด

    # เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย
    annotated_text += text[last_idx:]

    return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'

st.set_page_config(layout="wide")

# UI ด้วย Streamlit
col1, col2, col3 = st.columns([1, 1, 1])

with col1:
    st.header("Input")
    
    # แสดงผล Named Entity Annotation
    st.markdown("##### Named Entity Annotation (Tagging)")

    default_text = """
        Job Description:
        We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients.
        Key Responsibilities:
        Develop high-quality software design and architecture Identify, prioritize, and execute tasks in the software development life cycle Review and debug code Collaborate with other developers and engineers to ensure software quality
        Required Qualifications:
        Bachelor s degree in Computer Science or related field Proven experience as a Software Engineer or similar role Familiarity with Agile development methodologies Proficiency in programming languages such as Python or C# Strong problem-solving skills and the ability to  a team work Preferred Qualifications: english language communication
    """
    text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
    analyze_button = st.button("Analyze")

with col2:
    st.header("Result")

    # แสดงผล Named Entity Annotation
    st.markdown("##### Named Entity Annotation (Tagging)")

    if analyze_button:
        ner_results = ner_pipeline(text)

        if ner_results:
            # รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
            merged_entities = merge_entities([
                {"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
                for entity in ner_results
            ])


            ner_html = generate_ner_html(text, merged_entities)
            st.markdown(ner_html, unsafe_allow_html=True)
            
        else:
            st.write("No entities found.")
            
with col3:
    st.header("")

    # แสดงข้อมูล Entity ในรูปแบบตาราง
    st.markdown("##### Extracted Entities")
    
    if analyze_button:
        st.table(merged_entities)