Spaces:

hanbinChen
/

medKGC

Sleeping

App Files Files Community

hanbinChen commited on Nov 22, 2024

Commit

9985fd7

1 Parent(s): a952df1

nen ui

Browse files

Files changed (4) hide show

README.md +30 -35
app.py +39 -268
app_logic.py +100 -0
app_ui.py +139 -0

README.md CHANGED Viewed

@@ -14,6 +14,25 @@ pinned: false
 ## Overview
 medKGC is a medical text knowledge graph construction and review system. It supports entity recognition, relation extraction, and visualization of medical reports, providing a convenient review interface.
 ## Core Features
 ### 1. Data Processing
@@ -80,36 +99,16 @@ def find_relations_with_entities(entities, entities_data):
     """Rebuild relations based on entity text matching"""
 ```
-## Deployment
-### Requirements
-- Python 3.7+
-- Streamlit 1.39.0+
-- streamlit_text_label
-- streamlit_agraph
-### Installation
-1. Clone repository
-```bash
-git clone https://github.com/your-repo/medKGC.git
-```
-2. Install dependencies
-```bash
-pip install -r requirements.txt
-```
-3. Run application
-```bash
-streamlit run app.py
-```
-## Future Plans
-1. [ ] Add relation editing functionality
-2. [ ] Support custom entity types
-3. [ ] Add data export functionality
-4. [ ] Integrate machine learning models
-5. [ ] Add annotation functionality
 ## Contributing
 Welcome to contribute through:
@@ -118,8 +117,4 @@ Welcome to contribute through:
 3. Improve documentation and comments
 ## License
-MIT License
----
-[Chinese version above]

 ## Overview
 medKGC is a medical text knowledge graph construction and review system. It supports entity recognition, relation extraction, and visualization of medical reports, providing a convenient review interface.
+## Deployment
+### Installation
+1. Create conda environment
+```bash
+conda create -n medkgc python=3.10
+conda activate medkgc
+```
+2. Install dependencies
+```bash
+pip install -r requirements.txt
+```
+3. Run application
+```bash
+streamlit run app.py
+```
 ## Core Features
 ### 1. Data Processing
     """Rebuild relations based on entity text matching"""
 ```
+## TODO
+1. [ ] Add data export functionality
+2. [ ] Named Entity Recognition
+   1. [ ] 增加输入框
+   2. [ ] 调用llms
+3. [ ] Relation Extraction
+   1. [ ] Add relation editing functionality
+4. [ ] 数据在哪里
+   1. [ ] 从某个地方读取，git上
+   2. [ ] 存到某个地方，存有点麻烦（commit吗）
 ## Contributing
 Welcome to contribute through:
 3. Improve documentation and comments
 ## License
+MIT License

app.py CHANGED Viewed

@@ -1,265 +1,33 @@
 import streamlit as st
-import json
-from streamlit_text_label import label_select, Selection
-from streamlit_agraph import agraph, Node, Edge, Config
-from dataclasses import dataclass
-@dataclass
-class Relation:
-    source: Selection
-    target: Selection
-    label: str
-def load_data():
-    """Load data from dev.json"""
-    with open('dev.json', 'r') as f:
-        return json.load(f)
-def save_data(data):
-    """Save data to dev.json"""
-    with open('dev.json', 'w') as f:
-        json.dump(data, f, indent=4)
-def get_label_color(label):
-    """Return color based on label type"""
-    color_map = {
-        'OBS-DP': '#FF6B6B',  # Red - Observation definitely present
-        'ANAT-DP': '#4ECDC4',  # Cyan - Anatomy definitely present
-        'OBS-U': '#FFD93D',    # Yellow - Observation uncertain
-        'OBS-DA': '#95A5A6',   # Gray - Observation definitely absent
-    }
-    return color_map.get(label, '#666666')  # Default color
-def create_graph(entities, relations):
-    """Create entity relationship graph, merge nodes with same text"""
-    # Track created nodes using dict, key is entity text
-    nodes_dict = {}
-    nodes = []
-    # First create all unique nodes
-    for entity in entities:
-        if entity.text not in nodes_dict:
-            # Create new node
-            node = Node(
-                id=entity.text,
-                label=f"{entity.text}\n({entity.labels[0]})",
-                size=25,
-                color=get_label_color(entity.labels[0])
-            )
-            nodes.append(node)
-            nodes_dict[entity.text] = node
-    # Create edges using node text as source and target
-    edges = []
-    for relation in relations:
-        # Check if source and target exist
-        if relation.source.text in nodes_dict and relation.target.text in nodes_dict:
-            edge = Edge(
-                source=relation.source.text,
-                target=relation.target.text,
-                label=relation.label,
-                color="#666666"  # Unified edge color
-            )
-            edges.append(edge)
-    config = Config(
-        width=750,
-        height=500,
-        directed=True,
-        physics=True,
-        hierarchical=False,
-        nodeHighlightBehavior=True,
-        highlightColor="#F7A7A6",
-    )
-    return agraph(nodes=nodes, edges=edges, config=config)
-def word_to_char_position(text, word_index):
-    """Convert word position to character position"""
-    words = text.split()
-    char_start = 0
-    # If word_index out of range, return text end
-    if word_index >= len(words):
-        return len(text)
-    # Traverse all words before target word
-    for i in range(word_index):
-        char_start += len(words[i]) + 1  # +1 for space
-    return char_start
-def word_to_char_span(text, start_ix, end_ix):
-    """Convert word start and end positions to character span"""
-    char_start = word_to_char_position(text, start_ix)
-    # If start equals end, it's a single word
-    if start_ix == end_ix:
-        char_end = char_start + len(text.split()[start_ix])
-    else:
-        # If multiple words, calculate to end position
-        char_end = word_to_char_position(
-            text, end_ix) + len(text.split()[end_ix])
-    return char_start, char_end
-def entities2Selection(text, entities_data):
-    """Convert entities data to Selection objects list"""
-    selections = []
-    for entity_id, entity in entities_data.items():
-        # Convert word positions to char positions
-        char_start, char_end = word_to_char_span(
-            text,
-            entity['start_ix'],
-            entity['end_ix']
-        )
-        selection = Selection(
-            start=char_start,
-            end=char_end,
-            text=entity['tokens'],
-            labels=[entity['label']],
-        )
-        selections.append(selection)
-    return selections
-def selection2entities(selections):
-    """Convert Selection objects list to entities data"""
-    entities = {}
-    for i, selection in enumerate(selections, 1):
-        entities[str(i)] = {
-            "tokens": selection.text,
-            "label": selection.labels[0],
-            "start_ix": selection.start,
-            "end_ix": selection.end,
-            "relations": []  # Initialize empty relations list
-        }
-    return entities
-def find_relations_with_entities(entities, entities_data):
-    """Find relations between current entities based on original entities_data"""
-    # Create text to entity mapping
-    text_to_entity = {e.text: e for e in entities}
-    # Create tokens to entity_id mapping
-    tokens_to_id = {entity['tokens']: entity_id
-                    for entity_id, entity in entities_data.items()}
-    # Create id to tokens mapping
-    id_to_tokens = {entity_id: entity['tokens']
-                    for entity_id, entity in entities_data.items()}
-    relations = []
-    # Iterate through each entity in current entities
-    for source_text, source_entity in text_to_entity.items():
-        # Find corresponding entity ID in original data
-        for entity_id, entity in entities_data.items():
-            if entity['tokens'] == source_text:
-                # Iterate through all relations of this entity
-                for relation in entity.get('relations', []):
-                    target_id = relation[1]
-                    # Get target entity text
-                    target_text = id_to_tokens.get(target_id)
-                    # If target entity exists in current entities
-                    if target_text and target_text in text_to_entity:
-                        relations.append(Relation(
-                            source=source_entity,
-                            target=text_to_entity[target_text],
-                            label=relation[0]
-                        ))
-    return relations
-def setup_report_selection():
-    """Setup report selection columns and return selected report"""
-    col1, col2 = st.columns(2)
-    with col1:
-        st.subheader("Reports to Review")
-        unreviewed_reports = [
-            report_id for report_id, content in st.session_state.reports_json.items()
-            if 'reviewed' not in content
-        ]
-        selected_report = st.selectbox(
-            "Select Report",
-            unreviewed_reports,
-            key="unreviewed"
-        )
-    with col2:
-        st.subheader("Reviewed Reports")
-        reviewed_reports = [
-            report_id for report_id, content in st.session_state.reports_json.items()
-            if content.get('reviewed', False)
-        ]
-        st.selectbox(
-            "Completed Reports",
-            reviewed_reports if reviewed_reports else ['None'],
-            key="reviewed"
-        )
-    return selected_report
-def display_report_content(report_data):
-    """Display the report text content"""
-    st.subheader("Report Content:")
-    st.markdown(report_data['text'])
-def display_entities(report_text, entities):
-    """Setup and display entity annotation interface"""
-    st.subheader("Entity Annotation:")
-    selections = label_select(
-        body=report_text,
-        labels=list(set(e.labels[0] for e in entities)),
-        selections=entities,
-    )
-    st.write(selections)
-    return selections, entities
-def display_relationship_graph(entities, entities_data):
-    st.subheader("Entity Relationship Graph:")
-    relations = find_relations_with_entities(entities, entities_data)
-    create_graph(entities, relations)
-def handle_review_submission(selected_report, selections, entities_data):
-    """Handle the review submission process"""
-    if st.button("Mark as Reviewed"):
-        updated_entities = selection2entities(selections)
-        for entity_id, entity in updated_entities.items():
-            if entity_id in entities_data:
-                entity['relations'] = entities_data[entity_id]['relations']
-        st.session_state.reports_json[selected_report]['reviewed'] = {
-            'entities': updated_entities
-        }
-        save_data(st.session_state.reports_json)
-        st.success("Review status saved!")
-        st.rerun()
 def main():
     """Main application"""
     st.title("Medical Report Review System")
-    # Load data
-    if 'reports_json' not in st.session_state:
-        st.session_state.reports_json = load_data()
     # Setup report selection
     selected_report = setup_report_selection()
@@ -267,25 +35,28 @@ def main():
     if selected_report:
         report_data = st.session_state.reports_json[selected_report]
         entities_data = report_data['entities']
         # Display report content
-        display_report_content(report_data)
-        # Setup entity annotation
-        selections = entities2Selection(report_data['text'], entities_data)
         # Display entities
-        display_entities(report_data['text'], selections)
-        # Display NEN
-        # 使用当前选择或原始实体创建关系图
-        current_entities = selections if st.button(
-            "Update Graph") else selections
-        display_relationship_graph(current_entities, entities_data)
         # Handle review submission
         handle_review_submission(selected_report, selections, entities_data)
 if __name__ == "__main__":
     main()

 import streamlit as st
+from app_logic import load_data, entities2Selection
+from app_ui import (
+    setup_report_selection,
+    display_report_content,
+    display_entities,
+    display_relationship_graph,
+    handle_review_submission
+)
+def initialize_session_state():
+    """Initialize session state variables"""
+    if 'reports_json' not in st.session_state:
+        st.session_state.reports_json = load_data()
+    if 'selected_entity' not in st.session_state:
+        st.session_state.selected_entity = None
 def main():
     """Main application"""
+    # 设置页面为 wide mode
+    st.set_page_config(
+        page_title="Medical Report Review System",
+        layout="wide",
+        initial_sidebar_state="expanded"
+    )
     st.title("Medical Report Review System")
+    # Initialize session state
+    initialize_session_state()
     # Setup report selection
     selected_report = setup_report_selection()
     if selected_report:
         report_data = st.session_state.reports_json[selected_report]
         entities_data = report_data['entities']
+        # Setup entity annotation
+        selections_og = entities2Selection(report_data['text'], entities_data)
+        # 创建两列布局
+        col1, col2 = st.columns([2, 2])  # 调整列宽比例
         # Display report content
+        with col1:
+            display_report_content(report_data)
+            # Display relationship graph
+            current_entities = selections_og
+            if st.button("Update Graph", key="update_graph"):
+                current_entities = selections_og
+            display_relationship_graph(current_entities, entities_data)
         # Display entities
+        with col2:
+            selections = display_entities(report_data['text'], selections_og)
         # Handle review submission
         handle_review_submission(selected_report, selections, entities_data)
 if __name__ == "__main__":
     main()

app_logic.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import json
+from dataclasses import dataclass
+from streamlit_text_label import Selection
+@dataclass
+class Relation:
+    source: Selection
+    target: Selection
+    label: str
+def load_data():
+    """Load data from dev.json"""
+    with open('dev.json', 'r') as f:
+        return json.load(f)
+def save_data(data):
+    """Save data to dev.json"""
+    with open('dev.json', 'w') as f:
+        json.dump(data, f, indent=4)
+def get_label_color(label):
+    """Return color based on label type"""
+    color_map = {
+        'OBS-DP': '#FF6B6B',
+        'ANAT-DP': '#4ECDC4',
+        'OBS-U': '#FFD93D',
+        'OBS-DA': '#95A5A6',
+    }
+    return color_map.get(label, '#666666')
+def word_to_char_position(text, word_index):
+    """Convert word position to character position"""
+    words = text.split()
+    if word_index >= len(words):
+        return len(text)
+    char_start = 0
+    for i in range(word_index):
+        char_start += len(words[i]) + 1
+    return char_start
+def word_to_char_span(text, start_ix, end_ix):
+    """Convert word start and end positions to character span"""
+    char_start = word_to_char_position(text, start_ix)
+    if start_ix == end_ix:
+        char_end = char_start + len(text.split()[start_ix])
+    else:
+        char_end = word_to_char_position(text, end_ix) + len(text.split()[end_ix])
+    return char_start, char_end
+def entities2Selection(text, entities_data):
+    """Convert entities data to Selection objects list"""
+    selections = []
+    for entity_id, entity in entities_data.items():
+        char_start, char_end = word_to_char_span(
+            text,
+            entity['start_ix'],
+            entity['end_ix']
+        )
+        selection = Selection(
+            start=char_start,
+            end=char_end,
+            text=entity['tokens'],
+            labels=[entity['label']],
+        )
+        selections.append(selection)
+    return selections
+def selection2entities(selections):
+    """Convert Selection objects list to entities data"""
+    entities = {}
+    for i, selection in enumerate(selections, 1):
+        entities[str(i)] = {
+            "tokens": selection.text,
+            "label": selection.labels[0],
+            "start_ix": selection.start,
+            "end_ix": selection.end,
+            "relations": []
+        }
+    return entities
+def find_relations_with_entities(entities, entities_data):
+    """Find relations between current entities based on original entities_data"""
+    text_to_entity = {e.text: e for e in entities}
+    tokens_to_id = {entity['tokens']: entity_id for entity_id, entity in entities_data.items()}
+    id_to_tokens = {entity_id: entity['tokens'] for entity_id, entity in entities_data.items()}
+    relations = []
+    for source_text, source_entity in text_to_entity.items():
+        for entity_id, entity in entities_data.items():
+            if entity['tokens'] == source_text:
+                for relation in entity.get('relations', []):
+                    target_id = relation[1]
+                    target_text = id_to_tokens.get(target_id)
+                    if target_text and target_text in text_to_entity:
+                        relations.append(Relation(
+                            source=source_entity,
+                            target=text_to_entity[target_text],
+                            label=relation[0]
+                        ))
+    return relations

app_ui.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import streamlit as st
+from streamlit_agraph import agraph, Node, Edge, Config
+from streamlit_text_label import label_select
+from app_logic import *
+def display_entity_selections(selections):
+    """Display entity selections in a grid layout"""
+    st.subheader("Selected Entities:")
+    # 使用columns来水平排列按钮
+    cols = st.columns(4)  # 每行4个按钮
+    for i, entity in enumerate(selections):
+        col_idx = i % 4
+        with cols[col_idx]:
+            if st.button(
+                f"{entity.text} ({entity.labels[0]})",
+                key=f"entity_{i}",
+                help=f"Start: {entity.start}, End: {entity.end}"
+            ):
+                st.session_state.selected_entity = entity
+def create_graph(entities, relations):
+    """Create entity relationship graph"""
+    nodes_dict = {}
+    nodes = []
+    for entity in entities:
+        if entity.text not in nodes_dict:
+            node = Node(
+                id=entity.text,
+                label=f"{entity.text}\n({entity.labels[0]})",
+                size=25,
+                color=get_label_color(entity.labels[0])
+            )
+            nodes.append(node)
+            nodes_dict[entity.text] = node
+    edges = []
+    for relation in relations:
+        if relation.source.text in nodes_dict and relation.target.text in nodes_dict:
+            edge = Edge(
+                source=relation.source.text,
+                target=relation.target.text,
+                label=relation.label,
+                color="#666666"
+            )
+            edges.append(edge)
+    config = Config(
+        width=750,
+        height=500,
+        directed=True,
+        physics=True,
+        hierarchical=False,
+        nodeHighlightBehavior=True,
+        highlightColor="#F7A7A6",
+    )
+    return agraph(nodes=nodes, edges=edges, config=config)
+def setup_report_selection():
+    """Setup report selection columns and return selected report"""
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("Reports to Review")
+        unreviewed_reports = [
+            report_id for report_id, content in st.session_state.reports_json.items()
+            if 'reviewed' not in content
+        ]
+        selected_report = st.selectbox(
+            "Select Report",
+            unreviewed_reports,
+            key="unreviewed"
+        )
+    with col2:
+        st.subheader("Reviewed Reports")
+        reviewed_reports = [
+            report_id for report_id, content in st.session_state.reports_json.items()
+            if content.get('reviewed', False)
+        ]
+        st.selectbox(
+            "Completed Reports",
+            reviewed_reports if reviewed_reports else ['None'],
+            key="reviewed"
+        )
+    return selected_report
+def display_report_content(report_data):
+    """Display the report text content"""
+    st.subheader("Report Content:")
+    st.markdown(report_data['text'])
+def display_entities(report_text, entities):
+    """Setup and display entity annotation interface"""
+    st.subheader("Entity Annotation:")
+    selections = label_select(
+        body=report_text,
+        labels=list(set(e.labels[0] for e in entities)),
+        selections=entities,
+    )
+    # 显示实体选择
+    display_entity_selections(selections)
+    return selections
+def display_relationship_graph(entities: list[Selection], entities_data: dict):
+    """Display the relationship graph"""
+    st.subheader("Entity Relationship Graph:")
+    relations = find_relations_with_entities(entities, entities_data)
+    create_graph(entities, relations)
+def handle_review_submission(selected_report, selections, entities_data):
+    """Handle the review submission process"""
+    if st.button("Mark as Reviewed"):
+        updated_entities = selection2entities(selections)
+        for entity_id, entity in updated_entities.items():
+            if entity_id in entities_data:
+                entity['relations'] = entities_data[entity_id]['relations']
+        st.session_state.reports_json[selected_report]['reviewed'] = {
+            'entities': updated_entities
+        }
+        save_data(st.session_state.reports_json)
+        st.success("Review status saved!")
+        st.rerun()