Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
-
import matplotlib
|
4 |
-
matplotlib.use('Agg') # ใช้ Agg เพื่อรองรับ Headless Environment
|
5 |
-
import matplotlib.pyplot as plt
|
6 |
-
|
7 |
-
import numpy as np
|
8 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
9 |
|
10 |
# โหลด Tokenizer และ Model
|
@@ -17,8 +11,8 @@ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
|
|
17 |
|
18 |
# กำหนดสีของ Entity แต่ละประเภท
|
19 |
ENTITY_COLORS = {
|
20 |
-
"
|
21 |
-
"
|
22 |
}
|
23 |
|
24 |
# ฟังก์ชันรวม Entity (B-* และ I-*)
|
@@ -27,47 +21,35 @@ def merge_entities(entities):
|
|
27 |
current_entity = None
|
28 |
|
29 |
for entity in entities:
|
30 |
-
word, label = entity["word"], entity["entity"]
|
31 |
|
32 |
-
if
|
33 |
-
if current_entity:
|
34 |
merged.append(current_entity)
|
35 |
-
current_entity = {"word": word, "entity": label
|
36 |
-
elif
|
37 |
-
current_entity["word"] += " " + word
|
|
|
38 |
else:
|
39 |
if current_entity:
|
40 |
merged.append(current_entity)
|
41 |
-
current_entity = {"word": word, "entity": label}
|
42 |
|
43 |
if current_entity:
|
44 |
merged.append(current_entity)
|
45 |
|
46 |
return merged
|
47 |
|
48 |
-
# ฟังก์ชันแยกเป็น Hard Skill (hskill) และ Soft Skill (sskill)
|
49 |
-
def categorize_entities(entities):
|
50 |
-
hskill = []
|
51 |
-
sskill = []
|
52 |
-
|
53 |
-
for entity in entities:
|
54 |
-
if entity["entity"] in ["HSKILL"]: # รวม B-* และ I-* เป็นหมวดเดียว
|
55 |
-
hskill.append(entity)
|
56 |
-
else:
|
57 |
-
sskill.append(entity)
|
58 |
-
|
59 |
-
return hskill, sskill
|
60 |
-
|
61 |
-
def highlight_entities(text, hskill, sskill):
|
62 |
# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
|
63 |
-
|
|
|
64 |
|
65 |
last_idx = 0
|
66 |
annotated_text = ""
|
67 |
|
68 |
for entity in all_entities:
|
69 |
start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
|
70 |
-
color = ENTITY_COLORS
|
71 |
|
72 |
# เพิ่มข้อความก่อน Entity
|
73 |
annotated_text += text[last_idx:start]
|
@@ -75,7 +57,7 @@ def highlight_entities(text, hskill, sskill):
|
|
75 |
# เพิ่ม Entity แบบมีไฮไลต์และ Tagging
|
76 |
annotated_text += f'''
|
77 |
<span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
|
78 |
-
{word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type
|
79 |
</span>
|
80 |
'''
|
81 |
last_idx = end # อัปเดตตำแหน่งล่าสุด
|
@@ -85,91 +67,50 @@ def highlight_entities(text, hskill, sskill):
|
|
85 |
|
86 |
return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
|
87 |
|
88 |
-
|
89 |
# UI ด้วย Streamlit
|
90 |
-
col1, col2
|
91 |
|
92 |
with col1:
|
93 |
st.header("Input")
|
94 |
-
default_text="""Job Description:
|
95 |
We are seeking a talented Software Engineer to join our dynamic team at Tech Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients.
|
96 |
-
|
97 |
Key Responsibilities:
|
98 |
-
|
99 |
Develop high-quality software design and architecture
|
100 |
Identify, prioritize, and execute tasks in the software development life cycle
|
101 |
Review and debug code
|
102 |
Collaborate with other developers and engineers to ensure software quality
|
103 |
Required Qualifications:
|
104 |
-
|
105 |
Bachelor’s degree in Computer Science or related field
|
106 |
Proven experience as a Software Engineer or similar role
|
107 |
Familiarity with Agile development methodologies
|
108 |
Proficiency in programming languages such as Java, Python, or C#
|
109 |
Strong problem-solving skills and the ability to work in a team
|
110 |
Preferred Qualifications:
|
111 |
-
|
112 |
-
|
113 |
-
"""
|
114 |
-
text = st.text_area("Enter text for NER analysis:", value=default_text, height=400, max_chars=None, key=None, help=None, placeholder=None)
|
115 |
analyze_button = st.button("Analyze")
|
116 |
|
117 |
-
st.write("""**Example Inputs:**
|
118 |
-
- Experience with cloud services (AWS, Azure)
|
119 |
-
- Knowledge of databases (SQL, NoSQL)
|
120 |
-
- Familiarity with front-end technologies (HTML, CSS, JavaScript)""")
|
121 |
-
|
122 |
with col2:
|
123 |
st.header("Result")
|
124 |
|
125 |
-
# ใช้ st.markdown กับ CSS เพื่อปรับขนาดฟอนต์
|
126 |
-
st.markdown("<span style='font-size: 14px;'>Press button [Analyze]</span>", unsafe_allow_html=True)
|
127 |
-
|
128 |
-
if analyze_button:
|
129 |
-
ner_results = ner_pipeline(text)
|
130 |
-
|
131 |
-
|
132 |
-
# Display results in a structured output block
|
133 |
-
if ner_results:
|
134 |
-
output_data = [{"Entity": entity['word'], "Label": entity['entity'], "Score": f"{entity['score']:.4f}"} for entity in ner_results]
|
135 |
-
st.table(output_data) # Display as a table
|
136 |
-
else:
|
137 |
-
st.write("No entities found.")
|
138 |
-
|
139 |
-
# ใช้ st.markdown กับ CSS เพื่อปรับขนาดฟอนต์
|
140 |
-
st.markdown("<span style='font-size: 14px;'>JSON</span>", unsafe_allow_html=True)
|
141 |
-
|
142 |
-
st.write(ner_results)
|
143 |
-
|
144 |
-
with col3:
|
145 |
-
st.header("Annotation")
|
146 |
-
|
147 |
if analyze_button:
|
148 |
ner_results = ner_pipeline(text)
|
149 |
|
150 |
if ner_results:
|
151 |
# รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
|
152 |
merged_entities = merge_entities([
|
153 |
-
{"word": entity['word'], "entity": entity['entity']}
|
154 |
for entity in ner_results
|
155 |
])
|
156 |
|
157 |
-
#
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
st.markdown("### Annotated Text (Tagged)")
|
162 |
-
highlighted_html = highlight_entities(text, hskill, sskill)
|
163 |
-
st.markdown(highlighted_html, unsafe_allow_html=True)
|
164 |
|
165 |
-
# แสดงข้อมูล
|
166 |
-
st.markdown("###
|
167 |
-
st.table(
|
168 |
-
|
169 |
-
# แสดงข้อมูล Soft Skill
|
170 |
-
st.markdown("### Soft Skills (sskill)")
|
171 |
-
st.table(sskill)
|
172 |
|
173 |
else:
|
174 |
st.write("No entities found.")
|
175 |
-
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
3 |
|
4 |
# โหลด Tokenizer และ Model
|
|
|
11 |
|
12 |
# กำหนดสีของ Entity แต่ละประเภท
|
13 |
ENTITY_COLORS = {
|
14 |
+
"HSKILL": "#FFD700", # สีทอง
|
15 |
+
"SKILL": "#87CEFA", # สีฟ้าอ่อน
|
16 |
}
|
17 |
|
18 |
# ฟังก์ชันรวม Entity (B-* และ I-*)
|
|
|
21 |
current_entity = None
|
22 |
|
23 |
for entity in entities:
|
24 |
+
word, label, start, end = entity["word"], entity["entity"].split("-")[-1], entity["start"], entity["end"]
|
25 |
|
26 |
+
if entity["entity"].startswith("B-"): # ถ้าเป็น Entity ตัวแรก
|
27 |
+
if current_entity:
|
28 |
merged.append(current_entity)
|
29 |
+
current_entity = {"word": word, "entity": label, "start": start, "end": end}
|
30 |
+
elif entity["entity"].startswith("I-") and current_entity: # ถ้าเป็น Entity ที่ต่อเนื่อง
|
31 |
+
current_entity["word"] += " " + word
|
32 |
+
current_entity["end"] = end # ปรับตำแหน่ง end ใหม่
|
33 |
else:
|
34 |
if current_entity:
|
35 |
merged.append(current_entity)
|
36 |
+
current_entity = {"word": word, "entity": label, "start": start, "end": end}
|
37 |
|
38 |
if current_entity:
|
39 |
merged.append(current_entity)
|
40 |
|
41 |
return merged
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# ฟังก์ชันสร้าง Named Entity Annotation แสดงแบบ Tagging
|
44 |
+
def generate_ner_html(text, entities):
|
45 |
+
all_entities = sorted(entities, key=lambda e: e["start"]) # เรียงตามตำแหน่งเริ่มต้น
|
46 |
|
47 |
last_idx = 0
|
48 |
annotated_text = ""
|
49 |
|
50 |
for entity in all_entities:
|
51 |
start, end, word, entity_type = entity["start"], entity["end"], entity["word"], entity["entity"]
|
52 |
+
color = ENTITY_COLORS.get(entity_type, ENTITY_COLORS["OTHER"])
|
53 |
|
54 |
# เพิ่มข้อความก่อน Entity
|
55 |
annotated_text += text[last_idx:start]
|
|
|
57 |
# เพิ่ม Entity แบบมีไฮไลต์และ Tagging
|
58 |
annotated_text += f'''
|
59 |
<span style="background-color: {color}; padding: 3px 6px; border-radius: 5px; font-weight: bold;">
|
60 |
+
{word} <span style="background: white; color: black; font-size: 12px; padding: 1px 4px; border-radius: 3px;">{entity_type}</span>
|
61 |
</span>
|
62 |
'''
|
63 |
last_idx = end # อัปเดตตำแหน่งล่าสุด
|
|
|
67 |
|
68 |
return f'<div style="font-size:16px; line-height:1.6;">{annotated_text}</div>'
|
69 |
|
|
|
70 |
# UI ด้วย Streamlit
|
71 |
+
col1, col2 = st.columns([1, 1])
|
72 |
|
73 |
with col1:
|
74 |
st.header("Input")
|
75 |
+
default_text = """Job Description:
|
76 |
We are seeking a talented Software Engineer to join our dynamic team at Tech Innovations Inc. You will be responsible for designing, developing, and maintaining software applications that meet the needs of our clients.
|
|
|
77 |
Key Responsibilities:
|
|
|
78 |
Develop high-quality software design and architecture
|
79 |
Identify, prioritize, and execute tasks in the software development life cycle
|
80 |
Review and debug code
|
81 |
Collaborate with other developers and engineers to ensure software quality
|
82 |
Required Qualifications:
|
|
|
83 |
Bachelor’s degree in Computer Science or related field
|
84 |
Proven experience as a Software Engineer or similar role
|
85 |
Familiarity with Agile development methodologies
|
86 |
Proficiency in programming languages such as Java, Python, or C#
|
87 |
Strong problem-solving skills and the ability to work in a team
|
88 |
Preferred Qualifications:
|
89 |
+
"""
|
90 |
+
text = st.text_area("Enter text for NER analysis:", value=default_text, height=400)
|
|
|
|
|
91 |
analyze_button = st.button("Analyze")
|
92 |
|
|
|
|
|
|
|
|
|
|
|
93 |
with col2:
|
94 |
st.header("Result")
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
if analyze_button:
|
97 |
ner_results = ner_pipeline(text)
|
98 |
|
99 |
if ner_results:
|
100 |
# รวม Entity ที่มี B- และ I- ให้เป็นคำเดียวกัน
|
101 |
merged_entities = merge_entities([
|
102 |
+
{"word": entity['word'], "entity": entity['entity'], "start": entity['start'], "end": entity['end']}
|
103 |
for entity in ner_results
|
104 |
])
|
105 |
|
106 |
+
# แสดงผล Named Entity Annotation
|
107 |
+
st.markdown("### Named Entity Annotation (Tagging)")
|
108 |
+
ner_html = generate_ner_html(text, merged_entities)
|
109 |
+
st.markdown(ner_html, unsafe_allow_html=True)
|
|
|
|
|
|
|
110 |
|
111 |
+
# แสดงข้อมูล Entity ในรูปแบบตาราง
|
112 |
+
st.markdown("### Extracted Entities")
|
113 |
+
st.table(merged_entities)
|
|
|
|
|
|
|
|
|
114 |
|
115 |
else:
|
116 |
st.write("No entities found.")
|
|