NuchaITSkillNER / app.py
Nucha's picture
Update app.py
fa8c83f verified
import streamlit as st
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
# โหลด Tokenizer และ Model
model_name = "Nucha/Nucha_ITSkillNER_BERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
# สร้าง NER Pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
# กำหนดสีของ Entity แต่ละประเภท
ENTITY_COLORS = {
"HSKILL": "#FFD700", # สีทอง
"SSKILL": "#87CEFA", # สีฟ้าอ่อน
}
# ฟังก์ชันรวม Entity (B-* และ I-*)
def merge_entities(entities):
merged = []
current_entity = None
for entity in entities:
word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
if entity["entity"].startswith("B-"): # ถ้าเป็น Entity ตัวแรก
if current_entity:
merged.append(current_entity)
current_entity = {"word": word, "entity": label, "start": start, "end": end}
elif entity["entity"].startswith("I-") and current_entity: # ถ้าเป็น Entity ที่ต่อเนื่อง
current_entity["word"] += " " + word
current_entity["end"] = end # ปรับตำแหน่ง end ใหม่
else:
if current_entity:
merged.append(current_entity)
current_entity = {"word": word, "entity": label, "start": start, "end": end}
if current_entity:
merged.append(current_entity)
return merged
# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
def generate_ner_html(text, entities):
all_entities = sorted(entities, key=lambda e: e["start"]) # เรียงตามตำแหน่งเริ่มต้น
last_idx = 0
annotated_text = ""
for entity in all_entities:
start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS[entity_type])
# เพิ่มข้อความก่อน Entity
annotated_text += text[last_idx:start]
# เพิ่ม Entity แบบมีไฮไลต์และ Tagging
annotated_text += f'''
<span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
{word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
</span>
'''
last_idx = end # อัปเดตตำแหน่งล่าสุด
# เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย
annotated_text += text[last_idx:]
return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
st.set_page_config(layout="wide")
# UI ด้วย Streamlit
col1, col2, col3 = st.columns([1, 1, 1])
with col1:
st.header("Input")
# แสดงผล Named Entity Annotation
st.markdown("##### Named Entity Annotation (Tagging)")
default_text = """
Job Description:
We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients.
Key Responsibilities:
Develop high-quality software design and architecture Identify, prioritize, and execute tasks in the software development life cycle Review and debug code Collaborate with other developers and engineers to ensure software quality
Required Qualifications:
Bachelor s degree in Computer Science or related field Proven experience as a Software Engineer or similar role Familiarity with Agile development methodologies Proficiency in programming languages such as Python or C# Strong problem-solving skills and the ability to a team work Preferred Qualifications: english language communication
"""
text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
analyze_button = st.button("Analyze")
with col2:
st.header("Result")
# แสดงผล Named Entity Annotation
st.markdown("##### Named Entity Annotation (Tagging)")
if analyze_button:
ner_results = ner_pipeline(text)
if ner_results:
# รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
merged_entities = merge_entities([
{"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
for entity in ner_results
])
ner_html = generate_ner_html(text, merged_entities)
st.markdown(ner_html, unsafe_allow_html=True)
else:
st.write("No entities found.")
with col3:
st.header("")
# แสดงข้อมูล Entity ในรูปแบบตาราง
st.markdown("##### Extracted Entities")
if analyze_button:
st.table(merged_entities)