import streamlit as st from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline # โหลด Tokenizer และ Model model_name = "Nucha/Nucha_ITSkillNER_BERT" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForTokenClassification.from_pretrained(model_name) # สร้าง NER Pipeline ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer) # กำหนดสีของ Entity แต่ละประเภท ENTITY_COLORS = { "HSKILL": "#FFD700", # สีทอง "SSKILL": "#87CEFA", # สีฟ้าอ่อน } # ฟังก์ชันรวม Entity (B-* และ I-*) def merge_entities(entities): merged = [] current_entity = None for entity in entities: word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"] if entity["entity"].startswith("B-"): # ถ้าเป็น Entity ตัวแรก if current_entity: merged.append(current_entity) current_entity = {"word": word, "entity": label, "start": start, "end": end} elif entity["entity"].startswith("I-") and current_entity: # ถ้าเป็น Entity ที่ต่อเนื่อง current_entity["word"] += " " + word current_entity["end"] = end # ปรับตำแหน่ง end ใหม่ else: if current_entity: merged.append(current_entity) current_entity = {"word": word, "entity": label, "start": start, "end": end} if current_entity: merged.append(current_entity) return merged # ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging def generate_ner_html(text, entities): all_entities = sorted(entities, key=lambda e: e["start"]) # เรียงตามตำแหน่งเริ่มต้น last_idx = 0 annotated_text = "" for entity in all_entities: start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"] color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS[entity_type]) # เพิ่มข้อความก่อน Entity annotated_text += text[last_idx:start] # เพิ่ม Entity แบบมีไฮไลต์และ Tagging annotated_text += f''' {word} {entity_type} ''' last_idx = end # อัปเดตตำแหน่งล่าสุด # เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย annotated_text += text[last_idx:] return f'
{annotated_text}
' st.set_page_config(layout="wide") # UI ด้วย Streamlit col1, col2, col3 = st.columns([1, 1, 1]) with col1: st.header("Input") # แสดงผล Named Entity Annotation st.markdown("##### Named Entity Annotation (Tagging)") default_text = """ Job Description: We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients. Key Responsibilities: Develop high-quality software design and architecture Identify, prioritize, and execute tasks in the software development life cycle Review and debug code Collaborate with other developers and engineers to ensure software quality Required Qualifications: Bachelor s degree in Computer Science or related field Proven experience as a Software Engineer or similar role Familiarity with Agile development methodologies Proficiency in programming languages such as Python or C# Strong problem-solving skills and the ability to a team work Preferred Qualifications: english language communication """ text = st.text_area("Enter text for NER analysis:", value=default_text, height=400) analyze_button = st.button("Analyze") with col2: st.header("Result") # แสดงผล Named Entity Annotation st.markdown("##### Named Entity Annotation (Tagging)") if analyze_button: ner_results = ner_pipeline(text) if ner_results: # รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน merged_entities = merge_entities([ {"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']} for entity in ner_results ]) ner_html = generate_ner_html(text, merged_entities) st.markdown(ner_html, unsafe_allow_html=True) else: st.write("No entities found.") with col3: st.header("") # แสดงข้อมูล Entity ในรูปแบบตาราง st.markdown("##### Extracted Entities") if analyze_button: st.table(merged_entities)