Spaces:

Nucha
/

NuchaITSkillNER

Running

App Files Files Community

Nucha commited on 5 days ago

Commit

45bf2d1

verified ·

1 Parent(s): e73aae4

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -41

app.py CHANGED Viewed

@@ -1,21 +1,31 @@
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
-# โหลด Tokenizer และ Model
 model_name = "Nucha/Nucha_ITSkillNER_BERT"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForTokenClassification.from_pretrained(model_name)
-# สร้าง NER Pipeline
 ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
-# กำหนดสีของ Entity แต่ละประเภท
 ENTITY_COLORS = {
-    "HSKILL": "#FFD700",  # สีทอง
-    "SSKILL": "#87CEFA",  # สีฟ้าอ่อน
 }
-# ฟังก์ชันรวม Entity (B-* และ I-*)
 def merge_entities(entities):
     merged = []
     current_entity = None
@@ -23,13 +33,13 @@ def merge_entities(entities):
     for entity in entities:
         word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
-        if entity["entity"].startswith("B-"):  # ถ้าเป็น Entity ตัวแรก
             if current_entity:
                 merged.append(current_entity)
             current_entity = {"word": word, "entity": label, "start": start, "end": end}
-        elif entity["entity"].startswith("I-") and current_entity:  # ถ้าเป็น Entity ที่ต่อเนื่อง
             current_entity["word"] += " " + word
-            current_entity["end"] = end  # ปรับตำแหน่ง end ใหม่
         else:
             if current_entity:
                 merged.append(current_entity)
@@ -40,85 +50,59 @@ def merge_entities(entities):
     return merged
-# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
 def generate_ner_html(text, entities):
-    all_entities = sorted(entities, key=lambda e: e["start"])  # เรียงตามตำแหน่งเริ่มต้น
     last_idx = 0
     annotated_text = ""
     for entity in all_entities:
         start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
-        color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS[entity_type])
-        # เพิ่มข้อความก่อน Entity
         annotated_text += text[last_idx:start]
-        # เพิ่ม Entity แบบมีไฮไลต์และ Tagging
         annotated_text += f'''
             <span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
                 {word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
             </span>
         '''
-        last_idx = end  # อัปเดตตำแหน่งล่าสุด
-    # เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย
     annotated_text += text[last_idx:]
     return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
 st.set_page_config(layout="wide")
-# UI ด้วย Streamlit
 col1, col2, col3 = st.columns([1, 1, 1])
 with col1:
     st.header("Input")
-    # แสดงผล Named Entity Annotation
     st.markdown("##### Job Description")
     default_text = """
         Job Description:
-        We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients.
-        Key Responsibilities:
-        Develop high-quality software design and architecture Identify, prioritize, and execute tasks in the software development life cycle Review and debug code Collaborate with other developers and engineers to ensure software quality
-        Required Qualifications:
-        Bachelor s degree in Computer Science or related field Proven experience as a Software Engineer or similar role Familiarity with Agile development methodologies Proficiency in programming languages such as Python or C# Strong problem-solving skills and the ability to  a team work Preferred Qualifications: english language communication
     """
     text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
     analyze_button = st.button("Analyze")
 with col2:
     st.header("Result")
-    # แสดงผล Named Entity Annotation
     st.markdown("##### Named Entity Annotation (Tagging)")
     if analyze_button:
         ner_results = ner_pipeline(text)
         if ner_results:
-            # รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
             merged_entities = merge_entities([
                 {"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
                 for entity in ner_results
             ])
             ner_html = generate_ner_html(text, merged_entities)
             st.markdown(ner_html, unsafe_allow_html=True)
         else:
             st.write("No entities found.")
 with col3:
     st.header("")
-    # แสดงข้อมูล Entity ในรูปแบบตาราง
     st.markdown("##### Extracted Entities")
     if analyze_button:
         st.table(merged_entities)

 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+from accelerate import init_empty_weights, load_checkpoint_and_dispatch
 model_name = "Nucha/Nucha_ITSkillNER_BERT"
+# โหลด tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# โหลด model แบบ meta และโหลด weights อย่างถูกต้อง
+with init_empty_weights():
+    model = AutoModelForTokenClassification.from_pretrained(model_name)
+model = load_checkpoint_and_dispatch(
+    model,
+    model_name,
+    device_map="auto"
+)
+# สร้าง NER pipeline
 ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
+# Mapping สีของ Entity
 ENTITY_COLORS = {
+    "HSKILL": "#FFD700",  # ทักษะเชิงเทคนิค
+    "SSKILL": "#87CEFA",  # ทักษะเชิงอ่อน
 }
 def merge_entities(entities):
     merged = []
     current_entity = None
     for entity in entities:
         word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
+        if entity["entity"].startswith("B-"):
             if current_entity:
                 merged.append(current_entity)
             current_entity = {"word": word, "entity": label, "start": start, "end": end}
+        elif entity["entity"].startswith("I-") and current_entity:
             current_entity["word"] += " " + word
+            current_entity["end"] = end
         else:
             if current_entity:
                 merged.append(current_entity)
     return merged
 def generate_ner_html(text, entities):
+    all_entities = sorted(entities, key=lambda e: e["start"])
     last_idx = 0
     annotated_text = ""
     for entity in all_entities:
         start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
+        color = ENTITY_COLORS.get(entity_type, "#D3D3D3")
         annotated_text += text[last_idx:start]
         annotated_text += f'''
             <span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
                 {word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
             </span>
         '''
+        last_idx = end
     annotated_text += text[last_idx:]
     return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
 st.set_page_config(layout="wide")
 col1, col2, col3 = st.columns([1, 1, 1])
 with col1:
     st.header("Input")
     st.markdown("##### Job Description")
     default_text = """
         Job Description:
+        We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc...
     """
     text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
     analyze_button = st.button("Analyze")
 with col2:
     st.header("Result")
     st.markdown("##### Named Entity Annotation (Tagging)")
     if analyze_button:
         ner_results = ner_pipeline(text)
         if ner_results:
             merged_entities = merge_entities([
                 {"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
                 for entity in ner_results
             ])
             ner_html = generate_ner_html(text, merged_entities)
             st.markdown(ner_html, unsafe_allow_html=True)
         else:
             st.write("No entities found.")
 with col3:
     st.header("")
     st.markdown("##### Extracted Entities")
     if analyze_button:
         st.table(merged_entities)