Nucha commited on
Commit
45bf2d1
·
verified ·
1 Parent(s): e73aae4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -41
app.py CHANGED
@@ -1,21 +1,31 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
 
3
 
4
- # โหลด Tokenizer และ Model
5
  model_name = "Nucha/Nucha_ITSkillNER_BERT"
 
 
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForTokenClassification.from_pretrained(model_name)
8
 
9
- # สร้าง NER Pipeline
 
 
 
 
 
 
 
 
 
 
10
  ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
11
 
12
- # กำหนดสีของ Entity แต่ละประเภท
13
  ENTITY_COLORS = {
14
- "HSKILL": "#FFD700", # สีทอง
15
- "SSKILL": "#87CEFA", # สีฟ้าอ่อน
16
  }
17
 
18
- # ฟังก์ชันรวม Entity (B-* และ I-*)
19
  def merge_entities(entities):
20
  merged = []
21
  current_entity = None
@@ -23,13 +33,13 @@ def merge_entities(entities):
23
  for entity in entities:
24
  word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
25
 
26
- if entity["entity"].startswith("B-"): # ถ้าเป็น Entity ตัวแรก
27
  if current_entity:
28
  merged.append(current_entity)
29
  current_entity = {"word": word, "entity": label, "start": start, "end": end}
30
- elif entity["entity"].startswith("I-") and current_entity: # ถ้าเป็น Entity ที่ต่อเนื่อง
31
  current_entity["word"] += " " + word
32
- current_entity["end"] = end # ปรับตำแหน่ง end ใหม่
33
  else:
34
  if current_entity:
35
  merged.append(current_entity)
@@ -40,85 +50,59 @@ def merge_entities(entities):
40
 
41
  return merged
42
 
43
- # ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
44
  def generate_ner_html(text, entities):
45
- all_entities = sorted(entities, key=lambda e: e["start"]) # เรียงตามตำแหน่งเริ่มต้น
46
-
47
  last_idx = 0
48
  annotated_text = ""
49
 
50
  for entity in all_entities:
51
  start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
52
- color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS[entity_type])
53
 
54
- # เพิ่มข้อความก่อน Entity
55
  annotated_text += text[last_idx:start]
56
-
57
- # เพิ่ม Entity แบบมีไฮไลต์และ Tagging
58
  annotated_text += f'''
59
  <span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
60
  {word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
61
  </span>
62
  '''
63
- last_idx = end # อัปเดตตำแหน่งล่าสุด
64
 
65
- # เพิ่มข้อความที่เหลือหลังจาก Entity สุดท้าย
66
  annotated_text += text[last_idx:]
67
-
68
  return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
69
 
70
  st.set_page_config(layout="wide")
71
 
72
- # UI ด้วย Streamlit
73
  col1, col2, col3 = st.columns([1, 1, 1])
74
 
75
  with col1:
76
  st.header("Input")
77
-
78
- # แสดงผล Named Entity Annotation
79
  st.markdown("##### Job Description")
80
 
81
  default_text = """
82
  Job Description:
83
- We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients.
84
- Key Responsibilities:
85
- Develop high-quality software design and architecture Identify, prioritize, and execute tasks in the software development life cycle Review and debug code Collaborate with other developers and engineers to ensure software quality
86
- Required Qualifications:
87
- Bachelor s degree in Computer Science or related field Proven experience as a Software Engineer or similar role Familiarity with Agile development methodologies Proficiency in programming languages such as Python or C# Strong problem-solving skills and the ability to a team work Preferred Qualifications: english language communication
88
  """
89
  text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
90
  analyze_button = st.button("Analyze")
91
 
92
  with col2:
93
  st.header("Result")
94
-
95
- # แสดงผล Named Entity Annotation
96
  st.markdown("##### Named Entity Annotation (Tagging)")
97
 
98
  if analyze_button:
99
  ner_results = ner_pipeline(text)
100
-
101
  if ner_results:
102
- # รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
103
  merged_entities = merge_entities([
104
  {"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
105
  for entity in ner_results
106
  ])
107
-
108
-
109
  ner_html = generate_ner_html(text, merged_entities)
110
  st.markdown(ner_html, unsafe_allow_html=True)
111
-
112
  else:
113
  st.write("No entities found.")
114
-
115
  with col3:
116
  st.header("")
117
-
118
- # แสดงข้อมูล Entity ในรูปแบบตาราง
119
  st.markdown("##### Extracted Entities")
120
-
121
  if analyze_button:
122
  st.table(merged_entities)
123
-
124
-
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
+ from accelerate import init_empty_weights, load_checkpoint_and_dispatch
4
 
 
5
  model_name = "Nucha/Nucha_ITSkillNER_BERT"
6
+
7
+ # โหลด tokenizer
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
9
 
10
+ # โหลด model แบบ meta และโหลด weights อย่างถูกต้อง
11
+ with init_empty_weights():
12
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
13
+
14
+ model = load_checkpoint_and_dispatch(
15
+ model,
16
+ model_name,
17
+ device_map="auto"
18
+ )
19
+
20
+ # สร้าง NER pipeline
21
  ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
22
 
23
+ # Mapping สีของ Entity
24
  ENTITY_COLORS = {
25
+ "HSKILL": "#FFD700", # ทักษะเชิงเทคนิค
26
+ "SSKILL": "#87CEFA", # ทักษะเชิงอ่อน
27
  }
28
 
 
29
  def merge_entities(entities):
30
  merged = []
31
  current_entity = None
 
33
  for entity in entities:
34
  word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
35
 
36
+ if entity["entity"].startswith("B-"):
37
  if current_entity:
38
  merged.append(current_entity)
39
  current_entity = {"word": word, "entity": label, "start": start, "end": end}
40
+ elif entity["entity"].startswith("I-") and current_entity:
41
  current_entity["word"] += " " + word
42
+ current_entity["end"] = end
43
  else:
44
  if current_entity:
45
  merged.append(current_entity)
 
50
 
51
  return merged
52
 
 
53
  def generate_ner_html(text, entities):
54
+ all_entities = sorted(entities, key=lambda e: e["start"])
 
55
  last_idx = 0
56
  annotated_text = ""
57
 
58
  for entity in all_entities:
59
  start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
60
+ color = ENTITY_COLORS.get(entity_type, "#D3D3D3")
61
 
 
62
  annotated_text += text[last_idx:start]
 
 
63
  annotated_text += f'''
64
  <span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
65
  {word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
66
  </span>
67
  '''
68
+ last_idx = end
69
 
 
70
  annotated_text += text[last_idx:]
 
71
  return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
72
 
73
  st.set_page_config(layout="wide")
74
 
 
75
  col1, col2, col3 = st.columns([1, 1, 1])
76
 
77
  with col1:
78
  st.header("Input")
 
 
79
  st.markdown("##### Job Description")
80
 
81
  default_text = """
82
  Job Description:
83
+ We are seeking a talented Software Engineering to join our dynamic team at Technology Innovations Inc...
 
 
 
 
84
  """
85
  text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
86
  analyze_button = st.button("Analyze")
87
 
88
  with col2:
89
  st.header("Result")
 
 
90
  st.markdown("##### Named Entity Annotation (Tagging)")
91
 
92
  if analyze_button:
93
  ner_results = ner_pipeline(text)
 
94
  if ner_results:
 
95
  merged_entities = merge_entities([
96
  {"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
97
  for entity in ner_results
98
  ])
 
 
99
  ner_html = generate_ner_html(text, merged_entities)
100
  st.markdown(ner_html, unsafe_allow_html=True)
 
101
  else:
102
  st.write("No entities found.")
103
+
104
  with col3:
105
  st.header("")
 
 
106
  st.markdown("##### Extracted Entities")
 
107
  if analyze_button:
108
  st.table(merged_entities)