Spaces:
Sleeping
Sleeping
File size: 5,390 Bytes
c15480b 8ff3ba1 7abeff1 8c3e6c9 6c2499f 8ff3ba1 8c3e6c9 c616fef 8ff3ba1 36fd8c7 cb27ba4 385f7b0 36fd8c7 2a9db15 cb27ba4 2a9db15 cb27ba4 2a9db15 cb27ba4 2a9db15 cb27ba4 2a9db15 53e75d7 cb27ba4 5481c65 53e75d7 5481c65 0ded3c8 5481c65 53e75d7 cb27ba4 53e75d7 5481c65 53e75d7 7faf30e 53e75d7 2a9db15 f310734 c15480b 7d1e8e1 8ff3ba1 827a18f 2be6d6d f4abe81 ef36361 f4abe81 ef36361 f4abe81 cb27ba4 827a18f 6b5852a 827a18f 6f6166a 156ce5d 06b9ec4 7faf30e 2a9db15 cb27ba4 7faf30e 2a9db15 06b9ec4 cb27ba4 f5cdda6 7faf30e cdd6218 f5cdda6 cdd6218 325248e f5cdda6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
# โหลด Tokenizer และ Model
model_name = "Nucha/Nucha_ITSkillNER_BERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
# สร้าง NER Pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
# กำหนดสีของ Entity แต่ละประเภท
ENTITY_COLORS = {
"HSKILL": "#FFD700", # สีทอง
"SSKILL": "#87CEFA", # สีฟ้าอ่อน
}
# ฟังก์ชันรวม Entity (B-* และ I-*)
def merge_entities(entities):
merged = []
current_entity = None
for entity in entities:
word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
if entity["entity"].startswith("B-"): # ถ้าเป็น Entity ตัวแรก
if current_entity:
merged.append(current_entity)
current_entity = {"word": word, "entity": label, "start": start, "end": end}
elif entity["entity"].startswith("I-") and current_entity: # ถ้าเป็น Entity ที่ต่อเนื่อง
current_entity["word"] += " " + word
current_entity["end"] = end # ปรับตำแหน่ง end ใหม่
else:
if current_entity:
merged.append(current_entity)
current_entity = {"word": word, "entity": label, "start": start, "end": end}
if current_entity:
merged.append(current_entity)
return merged
# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
def generate_ner_html(text, entities):
all_entities = sorted(entities, key=lambda e: e["start"]) # เรียงตามตำแหน่งเริ่มต้น
last_idx = 0
annotated_text = ""
for entity in all_entities:
start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS[entity_type])
# เพิ่มข้อความก่อน Entity
annotated_text += text[last_idx:start]
# เพิ่ม Entity แบบมีไฮไลต์และ Tagging
annotated_text += f'''
<span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
{word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
</span>
'''
last_idx = end # อัปเดตตำแหน่งล่าสุด
# เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย
annotated_text += text[last_idx:]
return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
st.set_page_config(layout="wide")
# UI ด้วย Streamlit
col1, col2, col3 = st.columns([1, 1, 1])
with col1:
st.header("Input")
# แสดงผล Named Entity Annotation
st.markdown("##### Named Entity Annotation (Tagging)")
default_text = """
Job Description:
We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients.
Key Responsibilities:
Develop high-quality software design and architecture Identify, prioritize, and execute tasks in the software development life cycle Review and debug code Collaborate with other developers and engineers to ensure software quality
Required Qualifications:
Bachelor s degree in Computer Science or related field Proven experience as a Software Engineer or similar role Familiarity with Agile development methodologies Proficiency in programming languages such as Python or C# Strong problem-solving skills and the ability to a team work Preferred Qualifications: english language communication
"""
text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
analyze_button = st.button("Analyze")
with col2:
st.header("Result")
# แสดงผล Named Entity Annotation
st.markdown("##### Named Entity Annotation (Tagging)")
if analyze_button:
ner_results = ner_pipeline(text)
if ner_results:
# รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
merged_entities = merge_entities([
{"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
for entity in ner_results
])
ner_html = generate_ner_html(text, merged_entities)
st.markdown(ner_html, unsafe_allow_html=True)
else:
st.write("No entities found.")
with col3:
st.header("")
# แสดงข้อมูล Entity ในรูปแบบตาราง
st.markdown("##### Extracted Entities")
if analyze_button:
st.table(merged_entities)
|