Spaces:
Sleeping
Sleeping
Commit
·
9985fd7
1
Parent(s):
a952df1
nen ui
Browse files- README.md +30 -35
- app.py +39 -268
- app_logic.py +100 -0
- app_ui.py +139 -0
README.md
CHANGED
@@ -14,6 +14,25 @@ pinned: false
|
|
14 |
## Overview
|
15 |
medKGC is a medical text knowledge graph construction and review system. It supports entity recognition, relation extraction, and visualization of medical reports, providing a convenient review interface.
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
## Core Features
|
18 |
|
19 |
### 1. Data Processing
|
@@ -80,36 +99,16 @@ def find_relations_with_entities(entities, entities_data):
|
|
80 |
"""Rebuild relations based on entity text matching"""
|
81 |
```
|
82 |
|
83 |
-
##
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
```bash
|
94 |
-
git clone https://github.com/your-repo/medKGC.git
|
95 |
-
```
|
96 |
-
|
97 |
-
2. Install dependencies
|
98 |
-
```bash
|
99 |
-
pip install -r requirements.txt
|
100 |
-
```
|
101 |
-
|
102 |
-
3. Run application
|
103 |
-
```bash
|
104 |
-
streamlit run app.py
|
105 |
-
```
|
106 |
-
|
107 |
-
## Future Plans
|
108 |
-
1. [ ] Add relation editing functionality
|
109 |
-
2. [ ] Support custom entity types
|
110 |
-
3. [ ] Add data export functionality
|
111 |
-
4. [ ] Integrate machine learning models
|
112 |
-
5. [ ] Add annotation functionality
|
113 |
|
114 |
## Contributing
|
115 |
Welcome to contribute through:
|
@@ -118,8 +117,4 @@ Welcome to contribute through:
|
|
118 |
3. Improve documentation and comments
|
119 |
|
120 |
## License
|
121 |
-
MIT License
|
122 |
-
|
123 |
-
---
|
124 |
-
|
125 |
-
[Chinese version above]
|
|
|
14 |
## Overview
|
15 |
medKGC is a medical text knowledge graph construction and review system. It supports entity recognition, relation extraction, and visualization of medical reports, providing a convenient review interface.
|
16 |
|
17 |
+
## Deployment
|
18 |
+
|
19 |
+
### Installation
|
20 |
+
1. Create conda environment
|
21 |
+
```bash
|
22 |
+
conda create -n medkgc python=3.10
|
23 |
+
conda activate medkgc
|
24 |
+
```
|
25 |
+
|
26 |
+
2. Install dependencies
|
27 |
+
```bash
|
28 |
+
pip install -r requirements.txt
|
29 |
+
```
|
30 |
+
|
31 |
+
3. Run application
|
32 |
+
```bash
|
33 |
+
streamlit run app.py
|
34 |
+
```
|
35 |
+
|
36 |
## Core Features
|
37 |
|
38 |
### 1. Data Processing
|
|
|
99 |
"""Rebuild relations based on entity text matching"""
|
100 |
```
|
101 |
|
102 |
+
## TODO
|
103 |
+
1. [ ] Add data export functionality
|
104 |
+
2. [ ] Named Entity Recognition
|
105 |
+
1. [ ] 增加输入框
|
106 |
+
2. [ ] 调用llms
|
107 |
+
3. [ ] Relation Extraction
|
108 |
+
1. [ ] Add relation editing functionality
|
109 |
+
4. [ ] 数据在哪里
|
110 |
+
1. [ ] 从某个地方读取,git上
|
111 |
+
2. [ ] 存到某个地方,存有点麻烦(commit吗)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
## Contributing
|
114 |
Welcome to contribute through:
|
|
|
117 |
3. Improve documentation and comments
|
118 |
|
119 |
## License
|
120 |
+
MIT License
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,265 +1,33 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
from
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
with open('dev.json', 'r') as f:
|
18 |
-
return json.load(f)
|
19 |
-
|
20 |
-
|
21 |
-
def save_data(data):
|
22 |
-
"""Save data to dev.json"""
|
23 |
-
with open('dev.json', 'w') as f:
|
24 |
-
json.dump(data, f, indent=4)
|
25 |
-
|
26 |
-
|
27 |
-
def get_label_color(label):
|
28 |
-
"""Return color based on label type"""
|
29 |
-
color_map = {
|
30 |
-
'OBS-DP': '#FF6B6B', # Red - Observation definitely present
|
31 |
-
'ANAT-DP': '#4ECDC4', # Cyan - Anatomy definitely present
|
32 |
-
'OBS-U': '#FFD93D', # Yellow - Observation uncertain
|
33 |
-
'OBS-DA': '#95A5A6', # Gray - Observation definitely absent
|
34 |
-
}
|
35 |
-
return color_map.get(label, '#666666') # Default color
|
36 |
-
|
37 |
-
|
38 |
-
def create_graph(entities, relations):
|
39 |
-
"""Create entity relationship graph, merge nodes with same text"""
|
40 |
-
# Track created nodes using dict, key is entity text
|
41 |
-
nodes_dict = {}
|
42 |
-
nodes = []
|
43 |
-
|
44 |
-
# First create all unique nodes
|
45 |
-
for entity in entities:
|
46 |
-
if entity.text not in nodes_dict:
|
47 |
-
# Create new node
|
48 |
-
node = Node(
|
49 |
-
id=entity.text,
|
50 |
-
label=f"{entity.text}\n({entity.labels[0]})",
|
51 |
-
size=25,
|
52 |
-
color=get_label_color(entity.labels[0])
|
53 |
-
)
|
54 |
-
nodes.append(node)
|
55 |
-
nodes_dict[entity.text] = node
|
56 |
-
|
57 |
-
# Create edges using node text as source and target
|
58 |
-
edges = []
|
59 |
-
for relation in relations:
|
60 |
-
# Check if source and target exist
|
61 |
-
if relation.source.text in nodes_dict and relation.target.text in nodes_dict:
|
62 |
-
edge = Edge(
|
63 |
-
source=relation.source.text,
|
64 |
-
target=relation.target.text,
|
65 |
-
label=relation.label,
|
66 |
-
color="#666666" # Unified edge color
|
67 |
-
)
|
68 |
-
edges.append(edge)
|
69 |
-
|
70 |
-
config = Config(
|
71 |
-
width=750,
|
72 |
-
height=500,
|
73 |
-
directed=True,
|
74 |
-
physics=True,
|
75 |
-
hierarchical=False,
|
76 |
-
nodeHighlightBehavior=True,
|
77 |
-
highlightColor="#F7A7A6",
|
78 |
-
)
|
79 |
-
|
80 |
-
return agraph(nodes=nodes, edges=edges, config=config)
|
81 |
-
|
82 |
-
|
83 |
-
def word_to_char_position(text, word_index):
|
84 |
-
"""Convert word position to character position"""
|
85 |
-
words = text.split()
|
86 |
-
char_start = 0
|
87 |
-
|
88 |
-
# If word_index out of range, return text end
|
89 |
-
if word_index >= len(words):
|
90 |
-
return len(text)
|
91 |
-
|
92 |
-
# Traverse all words before target word
|
93 |
-
for i in range(word_index):
|
94 |
-
char_start += len(words[i]) + 1 # +1 for space
|
95 |
-
|
96 |
-
return char_start
|
97 |
-
|
98 |
-
|
99 |
-
def word_to_char_span(text, start_ix, end_ix):
|
100 |
-
"""Convert word start and end positions to character span"""
|
101 |
-
char_start = word_to_char_position(text, start_ix)
|
102 |
-
# If start equals end, it's a single word
|
103 |
-
if start_ix == end_ix:
|
104 |
-
char_end = char_start + len(text.split()[start_ix])
|
105 |
-
else:
|
106 |
-
# If multiple words, calculate to end position
|
107 |
-
char_end = word_to_char_position(
|
108 |
-
text, end_ix) + len(text.split()[end_ix])
|
109 |
-
return char_start, char_end
|
110 |
-
|
111 |
-
|
112 |
-
def entities2Selection(text, entities_data):
|
113 |
-
"""Convert entities data to Selection objects list"""
|
114 |
-
selections = []
|
115 |
-
for entity_id, entity in entities_data.items():
|
116 |
-
# Convert word positions to char positions
|
117 |
-
char_start, char_end = word_to_char_span(
|
118 |
-
text,
|
119 |
-
entity['start_ix'],
|
120 |
-
entity['end_ix']
|
121 |
-
)
|
122 |
-
|
123 |
-
selection = Selection(
|
124 |
-
start=char_start,
|
125 |
-
end=char_end,
|
126 |
-
text=entity['tokens'],
|
127 |
-
labels=[entity['label']],
|
128 |
-
)
|
129 |
-
selections.append(selection)
|
130 |
-
return selections
|
131 |
-
|
132 |
-
|
133 |
-
def selection2entities(selections):
|
134 |
-
"""Convert Selection objects list to entities data"""
|
135 |
-
entities = {}
|
136 |
-
for i, selection in enumerate(selections, 1):
|
137 |
-
entities[str(i)] = {
|
138 |
-
"tokens": selection.text,
|
139 |
-
"label": selection.labels[0],
|
140 |
-
"start_ix": selection.start,
|
141 |
-
"end_ix": selection.end,
|
142 |
-
"relations": [] # Initialize empty relations list
|
143 |
-
}
|
144 |
-
return entities
|
145 |
-
|
146 |
-
|
147 |
-
def find_relations_with_entities(entities, entities_data):
|
148 |
-
"""Find relations between current entities based on original entities_data"""
|
149 |
-
# Create text to entity mapping
|
150 |
-
text_to_entity = {e.text: e for e in entities}
|
151 |
-
|
152 |
-
# Create tokens to entity_id mapping
|
153 |
-
tokens_to_id = {entity['tokens']: entity_id
|
154 |
-
for entity_id, entity in entities_data.items()}
|
155 |
-
|
156 |
-
# Create id to tokens mapping
|
157 |
-
id_to_tokens = {entity_id: entity['tokens']
|
158 |
-
for entity_id, entity in entities_data.items()}
|
159 |
-
|
160 |
-
relations = []
|
161 |
-
# Iterate through each entity in current entities
|
162 |
-
for source_text, source_entity in text_to_entity.items():
|
163 |
-
# Find corresponding entity ID in original data
|
164 |
-
for entity_id, entity in entities_data.items():
|
165 |
-
if entity['tokens'] == source_text:
|
166 |
-
# Iterate through all relations of this entity
|
167 |
-
for relation in entity.get('relations', []):
|
168 |
-
target_id = relation[1]
|
169 |
-
# Get target entity text
|
170 |
-
target_text = id_to_tokens.get(target_id)
|
171 |
-
# If target entity exists in current entities
|
172 |
-
if target_text and target_text in text_to_entity:
|
173 |
-
relations.append(Relation(
|
174 |
-
source=source_entity,
|
175 |
-
target=text_to_entity[target_text],
|
176 |
-
label=relation[0]
|
177 |
-
))
|
178 |
-
|
179 |
-
return relations
|
180 |
-
|
181 |
-
|
182 |
-
def setup_report_selection():
|
183 |
-
"""Setup report selection columns and return selected report"""
|
184 |
-
col1, col2 = st.columns(2)
|
185 |
-
|
186 |
-
with col1:
|
187 |
-
st.subheader("Reports to Review")
|
188 |
-
unreviewed_reports = [
|
189 |
-
report_id for report_id, content in st.session_state.reports_json.items()
|
190 |
-
if 'reviewed' not in content
|
191 |
-
]
|
192 |
-
selected_report = st.selectbox(
|
193 |
-
"Select Report",
|
194 |
-
unreviewed_reports,
|
195 |
-
key="unreviewed"
|
196 |
-
)
|
197 |
-
|
198 |
-
with col2:
|
199 |
-
st.subheader("Reviewed Reports")
|
200 |
-
reviewed_reports = [
|
201 |
-
report_id for report_id, content in st.session_state.reports_json.items()
|
202 |
-
if content.get('reviewed', False)
|
203 |
-
]
|
204 |
-
st.selectbox(
|
205 |
-
"Completed Reports",
|
206 |
-
reviewed_reports if reviewed_reports else ['None'],
|
207 |
-
key="reviewed"
|
208 |
-
)
|
209 |
-
|
210 |
-
return selected_report
|
211 |
-
|
212 |
-
|
213 |
-
def display_report_content(report_data):
|
214 |
-
"""Display the report text content"""
|
215 |
-
st.subheader("Report Content:")
|
216 |
-
st.markdown(report_data['text'])
|
217 |
-
|
218 |
-
|
219 |
-
def display_entities(report_text, entities):
|
220 |
-
"""Setup and display entity annotation interface"""
|
221 |
-
|
222 |
-
st.subheader("Entity Annotation:")
|
223 |
-
selections = label_select(
|
224 |
-
body=report_text,
|
225 |
-
labels=list(set(e.labels[0] for e in entities)),
|
226 |
-
selections=entities,
|
227 |
-
)
|
228 |
-
st.write(selections)
|
229 |
-
return selections, entities
|
230 |
-
|
231 |
-
|
232 |
-
def display_relationship_graph(entities, entities_data):
|
233 |
-
st.subheader("Entity Relationship Graph:")
|
234 |
-
relations = find_relations_with_entities(entities, entities_data)
|
235 |
-
create_graph(entities, relations)
|
236 |
-
|
237 |
-
|
238 |
-
def handle_review_submission(selected_report, selections, entities_data):
|
239 |
-
"""Handle the review submission process"""
|
240 |
-
if st.button("Mark as Reviewed"):
|
241 |
-
updated_entities = selection2entities(selections)
|
242 |
-
|
243 |
-
for entity_id, entity in updated_entities.items():
|
244 |
-
if entity_id in entities_data:
|
245 |
-
entity['relations'] = entities_data[entity_id]['relations']
|
246 |
-
|
247 |
-
st.session_state.reports_json[selected_report]['reviewed'] = {
|
248 |
-
'entities': updated_entities
|
249 |
-
}
|
250 |
-
|
251 |
-
save_data(st.session_state.reports_json)
|
252 |
-
st.success("Review status saved!")
|
253 |
-
st.rerun()
|
254 |
-
|
255 |
|
256 |
def main():
|
257 |
"""Main application"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
st.title("Medical Report Review System")
|
259 |
|
260 |
-
#
|
261 |
-
|
262 |
-
st.session_state.reports_json = load_data()
|
263 |
|
264 |
# Setup report selection
|
265 |
selected_report = setup_report_selection()
|
@@ -267,25 +35,28 @@ def main():
|
|
267 |
if selected_report:
|
268 |
report_data = st.session_state.reports_json[selected_report]
|
269 |
entities_data = report_data['entities']
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
# Display report content
|
272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
-
# Setup entity annotation
|
275 |
-
selections = entities2Selection(report_data['text'], entities_data)
|
276 |
# Display entities
|
277 |
-
|
278 |
-
|
279 |
-
# Display NEN
|
280 |
-
|
281 |
-
# 使用当前选择或原始实体创建关系图
|
282 |
-
current_entities = selections if st.button(
|
283 |
-
"Update Graph") else selections
|
284 |
-
display_relationship_graph(current_entities, entities_data)
|
285 |
|
286 |
# Handle review submission
|
287 |
handle_review_submission(selected_report, selections, entities_data)
|
288 |
|
289 |
-
|
290 |
if __name__ == "__main__":
|
291 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
+
from app_logic import load_data, entities2Selection
|
3 |
+
from app_ui import (
|
4 |
+
setup_report_selection,
|
5 |
+
display_report_content,
|
6 |
+
display_entities,
|
7 |
+
display_relationship_graph,
|
8 |
+
handle_review_submission
|
9 |
+
)
|
10 |
+
|
11 |
+
def initialize_session_state():
|
12 |
+
"""Initialize session state variables"""
|
13 |
+
if 'reports_json' not in st.session_state:
|
14 |
+
st.session_state.reports_json = load_data()
|
15 |
+
if 'selected_entity' not in st.session_state:
|
16 |
+
st.session_state.selected_entity = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def main():
|
19 |
"""Main application"""
|
20 |
+
# 设置页面为 wide mode
|
21 |
+
st.set_page_config(
|
22 |
+
page_title="Medical Report Review System",
|
23 |
+
layout="wide",
|
24 |
+
initial_sidebar_state="expanded"
|
25 |
+
)
|
26 |
+
|
27 |
st.title("Medical Report Review System")
|
28 |
|
29 |
+
# Initialize session state
|
30 |
+
initialize_session_state()
|
|
|
31 |
|
32 |
# Setup report selection
|
33 |
selected_report = setup_report_selection()
|
|
|
35 |
if selected_report:
|
36 |
report_data = st.session_state.reports_json[selected_report]
|
37 |
entities_data = report_data['entities']
|
38 |
+
|
39 |
+
# Setup entity annotation
|
40 |
+
selections_og = entities2Selection(report_data['text'], entities_data)
|
41 |
+
|
42 |
+
# 创建两列布局
|
43 |
+
col1, col2 = st.columns([2, 2]) # 调整列宽比例
|
44 |
|
45 |
# Display report content
|
46 |
+
with col1:
|
47 |
+
display_report_content(report_data)
|
48 |
+
# Display relationship graph
|
49 |
+
current_entities = selections_og
|
50 |
+
if st.button("Update Graph", key="update_graph"):
|
51 |
+
current_entities = selections_og
|
52 |
+
display_relationship_graph(current_entities, entities_data)
|
53 |
|
|
|
|
|
54 |
# Display entities
|
55 |
+
with col2:
|
56 |
+
selections = display_entities(report_data['text'], selections_og)
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
# Handle review submission
|
59 |
handle_review_submission(selected_report, selections, entities_data)
|
60 |
|
|
|
61 |
if __name__ == "__main__":
|
62 |
main()
|
app_logic.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from streamlit_text_label import Selection
|
4 |
+
|
5 |
+
@dataclass
|
6 |
+
class Relation:
|
7 |
+
source: Selection
|
8 |
+
target: Selection
|
9 |
+
label: str
|
10 |
+
|
11 |
+
def load_data():
|
12 |
+
"""Load data from dev.json"""
|
13 |
+
with open('dev.json', 'r') as f:
|
14 |
+
return json.load(f)
|
15 |
+
|
16 |
+
def save_data(data):
|
17 |
+
"""Save data to dev.json"""
|
18 |
+
with open('dev.json', 'w') as f:
|
19 |
+
json.dump(data, f, indent=4)
|
20 |
+
|
21 |
+
def get_label_color(label):
|
22 |
+
"""Return color based on label type"""
|
23 |
+
color_map = {
|
24 |
+
'OBS-DP': '#FF6B6B',
|
25 |
+
'ANAT-DP': '#4ECDC4',
|
26 |
+
'OBS-U': '#FFD93D',
|
27 |
+
'OBS-DA': '#95A5A6',
|
28 |
+
}
|
29 |
+
return color_map.get(label, '#666666')
|
30 |
+
|
31 |
+
def word_to_char_position(text, word_index):
|
32 |
+
"""Convert word position to character position"""
|
33 |
+
words = text.split()
|
34 |
+
if word_index >= len(words):
|
35 |
+
return len(text)
|
36 |
+
char_start = 0
|
37 |
+
for i in range(word_index):
|
38 |
+
char_start += len(words[i]) + 1
|
39 |
+
return char_start
|
40 |
+
|
41 |
+
def word_to_char_span(text, start_ix, end_ix):
|
42 |
+
"""Convert word start and end positions to character span"""
|
43 |
+
char_start = word_to_char_position(text, start_ix)
|
44 |
+
if start_ix == end_ix:
|
45 |
+
char_end = char_start + len(text.split()[start_ix])
|
46 |
+
else:
|
47 |
+
char_end = word_to_char_position(text, end_ix) + len(text.split()[end_ix])
|
48 |
+
return char_start, char_end
|
49 |
+
|
50 |
+
def entities2Selection(text, entities_data):
|
51 |
+
"""Convert entities data to Selection objects list"""
|
52 |
+
selections = []
|
53 |
+
for entity_id, entity in entities_data.items():
|
54 |
+
char_start, char_end = word_to_char_span(
|
55 |
+
text,
|
56 |
+
entity['start_ix'],
|
57 |
+
entity['end_ix']
|
58 |
+
)
|
59 |
+
selection = Selection(
|
60 |
+
start=char_start,
|
61 |
+
end=char_end,
|
62 |
+
text=entity['tokens'],
|
63 |
+
labels=[entity['label']],
|
64 |
+
)
|
65 |
+
selections.append(selection)
|
66 |
+
return selections
|
67 |
+
|
68 |
+
def selection2entities(selections):
|
69 |
+
"""Convert Selection objects list to entities data"""
|
70 |
+
entities = {}
|
71 |
+
for i, selection in enumerate(selections, 1):
|
72 |
+
entities[str(i)] = {
|
73 |
+
"tokens": selection.text,
|
74 |
+
"label": selection.labels[0],
|
75 |
+
"start_ix": selection.start,
|
76 |
+
"end_ix": selection.end,
|
77 |
+
"relations": []
|
78 |
+
}
|
79 |
+
return entities
|
80 |
+
|
81 |
+
def find_relations_with_entities(entities, entities_data):
|
82 |
+
"""Find relations between current entities based on original entities_data"""
|
83 |
+
text_to_entity = {e.text: e for e in entities}
|
84 |
+
tokens_to_id = {entity['tokens']: entity_id for entity_id, entity in entities_data.items()}
|
85 |
+
id_to_tokens = {entity_id: entity['tokens'] for entity_id, entity in entities_data.items()}
|
86 |
+
|
87 |
+
relations = []
|
88 |
+
for source_text, source_entity in text_to_entity.items():
|
89 |
+
for entity_id, entity in entities_data.items():
|
90 |
+
if entity['tokens'] == source_text:
|
91 |
+
for relation in entity.get('relations', []):
|
92 |
+
target_id = relation[1]
|
93 |
+
target_text = id_to_tokens.get(target_id)
|
94 |
+
if target_text and target_text in text_to_entity:
|
95 |
+
relations.append(Relation(
|
96 |
+
source=source_entity,
|
97 |
+
target=text_to_entity[target_text],
|
98 |
+
label=relation[0]
|
99 |
+
))
|
100 |
+
return relations
|
app_ui.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_agraph import agraph, Node, Edge, Config
|
3 |
+
from streamlit_text_label import label_select
|
4 |
+
from app_logic import *
|
5 |
+
|
6 |
+
|
7 |
+
def display_entity_selections(selections):
|
8 |
+
"""Display entity selections in a grid layout"""
|
9 |
+
st.subheader("Selected Entities:")
|
10 |
+
|
11 |
+
# 使用columns来水平排列按钮
|
12 |
+
cols = st.columns(4) # 每行4个按钮
|
13 |
+
|
14 |
+
for i, entity in enumerate(selections):
|
15 |
+
col_idx = i % 4
|
16 |
+
with cols[col_idx]:
|
17 |
+
if st.button(
|
18 |
+
f"{entity.text} ({entity.labels[0]})",
|
19 |
+
key=f"entity_{i}",
|
20 |
+
help=f"Start: {entity.start}, End: {entity.end}"
|
21 |
+
):
|
22 |
+
st.session_state.selected_entity = entity
|
23 |
+
|
24 |
+
|
25 |
+
def create_graph(entities, relations):
|
26 |
+
"""Create entity relationship graph"""
|
27 |
+
nodes_dict = {}
|
28 |
+
nodes = []
|
29 |
+
|
30 |
+
for entity in entities:
|
31 |
+
if entity.text not in nodes_dict:
|
32 |
+
node = Node(
|
33 |
+
id=entity.text,
|
34 |
+
label=f"{entity.text}\n({entity.labels[0]})",
|
35 |
+
size=25,
|
36 |
+
color=get_label_color(entity.labels[0])
|
37 |
+
)
|
38 |
+
nodes.append(node)
|
39 |
+
nodes_dict[entity.text] = node
|
40 |
+
|
41 |
+
edges = []
|
42 |
+
for relation in relations:
|
43 |
+
if relation.source.text in nodes_dict and relation.target.text in nodes_dict:
|
44 |
+
edge = Edge(
|
45 |
+
source=relation.source.text,
|
46 |
+
target=relation.target.text,
|
47 |
+
label=relation.label,
|
48 |
+
color="#666666"
|
49 |
+
)
|
50 |
+
edges.append(edge)
|
51 |
+
|
52 |
+
config = Config(
|
53 |
+
width=750,
|
54 |
+
height=500,
|
55 |
+
directed=True,
|
56 |
+
physics=True,
|
57 |
+
hierarchical=False,
|
58 |
+
nodeHighlightBehavior=True,
|
59 |
+
highlightColor="#F7A7A6",
|
60 |
+
)
|
61 |
+
|
62 |
+
return agraph(nodes=nodes, edges=edges, config=config)
|
63 |
+
|
64 |
+
|
65 |
+
def setup_report_selection():
|
66 |
+
"""Setup report selection columns and return selected report"""
|
67 |
+
col1, col2 = st.columns(2)
|
68 |
+
|
69 |
+
with col1:
|
70 |
+
st.subheader("Reports to Review")
|
71 |
+
unreviewed_reports = [
|
72 |
+
report_id for report_id, content in st.session_state.reports_json.items()
|
73 |
+
if 'reviewed' not in content
|
74 |
+
]
|
75 |
+
selected_report = st.selectbox(
|
76 |
+
"Select Report",
|
77 |
+
unreviewed_reports,
|
78 |
+
key="unreviewed"
|
79 |
+
)
|
80 |
+
|
81 |
+
with col2:
|
82 |
+
st.subheader("Reviewed Reports")
|
83 |
+
reviewed_reports = [
|
84 |
+
report_id for report_id, content in st.session_state.reports_json.items()
|
85 |
+
if content.get('reviewed', False)
|
86 |
+
]
|
87 |
+
st.selectbox(
|
88 |
+
"Completed Reports",
|
89 |
+
reviewed_reports if reviewed_reports else ['None'],
|
90 |
+
key="reviewed"
|
91 |
+
)
|
92 |
+
|
93 |
+
return selected_report
|
94 |
+
|
95 |
+
|
96 |
+
def display_report_content(report_data):
|
97 |
+
"""Display the report text content"""
|
98 |
+
st.subheader("Report Content:")
|
99 |
+
st.markdown(report_data['text'])
|
100 |
+
|
101 |
+
|
102 |
+
def display_entities(report_text, entities):
|
103 |
+
"""Setup and display entity annotation interface"""
|
104 |
+
st.subheader("Entity Annotation:")
|
105 |
+
selections = label_select(
|
106 |
+
body=report_text,
|
107 |
+
labels=list(set(e.labels[0] for e in entities)),
|
108 |
+
selections=entities,
|
109 |
+
)
|
110 |
+
|
111 |
+
# 显示实体选择
|
112 |
+
display_entity_selections(selections)
|
113 |
+
|
114 |
+
return selections
|
115 |
+
|
116 |
+
|
117 |
+
def display_relationship_graph(entities: list[Selection], entities_data: dict):
|
118 |
+
"""Display the relationship graph"""
|
119 |
+
st.subheader("Entity Relationship Graph:")
|
120 |
+
relations = find_relations_with_entities(entities, entities_data)
|
121 |
+
create_graph(entities, relations)
|
122 |
+
|
123 |
+
|
124 |
+
def handle_review_submission(selected_report, selections, entities_data):
|
125 |
+
"""Handle the review submission process"""
|
126 |
+
if st.button("Mark as Reviewed"):
|
127 |
+
updated_entities = selection2entities(selections)
|
128 |
+
|
129 |
+
for entity_id, entity in updated_entities.items():
|
130 |
+
if entity_id in entities_data:
|
131 |
+
entity['relations'] = entities_data[entity_id]['relations']
|
132 |
+
|
133 |
+
st.session_state.reports_json[selected_report]['reviewed'] = {
|
134 |
+
'entities': updated_entities
|
135 |
+
}
|
136 |
+
|
137 |
+
save_data(st.session_state.reports_json)
|
138 |
+
st.success("Review status saved!")
|
139 |
+
st.rerun()
|