Kolloquium
Browse files- .gitignore +1 -0
- __pycache__/flow.cpython-311.pyc +0 -0
- app.py +6 -4
- flow.py +76 -0
- plot.py +8 -4
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
__pycache__/plot.cpython-311.pyc
|
|
|
|
1 |
__pycache__/plot.cpython-311.pyc
|
2 |
+
img/
|
__pycache__/flow.cpython-311.pyc
ADDED
Binary file (4.55 kB). View file
|
|
app.py
CHANGED
@@ -2,17 +2,19 @@ import streamlit as st
|
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
4 |
from annotated_text import annotated_text
|
|
|
|
|
5 |
import os
|
6 |
-
from plot import Plot # Assuming the class is saved in diagram_generator.py
|
7 |
|
8 |
# Define initial threshold values at the top of the script
|
9 |
-
default_cause_threshold =
|
10 |
default_indicator_threshold = 15
|
11 |
default_cause_threshold_sankey = 20
|
12 |
default_indicator_threshold_sankey = 15
|
13 |
|
14 |
# Initialize Plots
|
15 |
plot = Plot()
|
|
|
16 |
|
17 |
# Load the trained model and tokenizer
|
18 |
model_directory = "norygano/causalBERT"
|
@@ -33,7 +35,7 @@ st.markdown(
|
|
33 |
""",
|
34 |
unsafe_allow_html=True
|
35 |
)
|
36 |
-
st.markdown("[Weights](https://huggingface.co/norygano/causalBERT) | [Data](https://huggingface.co/datasets/norygano/causenv)
|
37 |
st.write("Indicators and causes in explicit attributions of causality.")
|
38 |
|
39 |
# Create tabs
|
@@ -151,4 +153,4 @@ with tab5:
|
|
151 |
)
|
152 |
indicator_threshold_sankey = st.slider(
|
153 |
"Indicator >=", min_value=1, max_value=100, value=default_indicator_threshold_sankey, key="indicator_threshold_sankey"
|
154 |
-
)
|
|
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
4 |
from annotated_text import annotated_text
|
5 |
+
from plot import Plot
|
6 |
+
from flow import FlowChart
|
7 |
import os
|
|
|
8 |
|
9 |
# Define initial threshold values at the top of the script
|
10 |
+
default_cause_threshold = 25
|
11 |
default_indicator_threshold = 15
|
12 |
default_cause_threshold_sankey = 20
|
13 |
default_indicator_threshold_sankey = 15
|
14 |
|
15 |
# Initialize Plots
|
16 |
plot = Plot()
|
17 |
+
flow_chart = FlowChart()
|
18 |
|
19 |
# Load the trained model and tokenizer
|
20 |
model_directory = "norygano/causalBERT"
|
|
|
35 |
""",
|
36 |
unsafe_allow_html=True
|
37 |
)
|
38 |
+
st.markdown("[Weights](https://huggingface.co/norygano/causalBERT) | [Data](https://huggingface.co/datasets/norygano/causenv)")
|
39 |
st.write("Indicators and causes in explicit attributions of causality.")
|
40 |
|
41 |
# Create tabs
|
|
|
153 |
)
|
154 |
indicator_threshold_sankey = st.slider(
|
155 |
"Indicator >=", min_value=1, max_value=100, value=default_indicator_threshold_sankey, key="indicator_threshold_sankey"
|
156 |
+
)
|
flow.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_flow import streamlit_flow
|
3 |
+
from streamlit_flow.elements import StreamlitFlowNode, StreamlitFlowEdge
|
4 |
+
from streamlit_flow.state import StreamlitFlowState
|
5 |
+
from streamlit_flow.layouts import LayeredLayout
|
6 |
+
import base64
|
7 |
+
|
8 |
+
class FlowChart:
|
9 |
+
def __init__(self):
|
10 |
+
# Initialize the flowchart state in session state if it doesn't exist
|
11 |
+
if "flowchart_state" not in st.session_state:
|
12 |
+
st.session_state.flowchart_state = self.create_default_state()
|
13 |
+
|
14 |
+
def create_default_state(self):
|
15 |
+
"""Initialize default nodes and edges."""
|
16 |
+
|
17 |
+
# Convert SVG to Base64 string
|
18 |
+
image_path = "data/img/Deutscher_Bundestag.svg"
|
19 |
+
with open(image_path, "rb") as image_file:
|
20 |
+
image_base64 = base64.b64encode(image_file.read()).decode("utf-8")
|
21 |
+
|
22 |
+
nodes = [
|
23 |
+
# Source
|
24 |
+
StreamlitFlowNode("bundestag",(0, 0), {"content": "Bundestag"}, "input", "right"),
|
25 |
+
StreamlitFlowNode("faz", (0, 0), {'content': "FAZ"}, 'input', 'right'),
|
26 |
+
StreamlitFlowNode("taz", (0, 0), {"content": "taz"}, "input", "right"),
|
27 |
+
StreamlitFlowNode("spiegel", (0, 0), {"content": "Spiegel"}, "input", "right"),
|
28 |
+
StreamlitFlowNode("sz", (0, 0), {"content": "Süddeutsche Zeitung"}, "input", "right"),
|
29 |
+
StreamlitFlowNode("bild", (0, 0), {"content": "Bild"}, "input", "right"),
|
30 |
+
StreamlitFlowNode("zeit", (0, 0), {"content": "zeit"}, "input", "right"),
|
31 |
+
StreamlitFlowNode("nzz", (0, 0), {"content": "Neu Zürcher Zeitung"}, "input", "right"),
|
32 |
+
|
33 |
+
# corpus
|
34 |
+
StreamlitFlowNode("corpus",(0, 0), {'content': "corpus"}, 'default', 'right', 'left'),
|
35 |
+
# BERTopic
|
36 |
+
StreamlitFlowNode("bertopic",(0, 0), {'content': """### BERToptic"""}, 'default', 'right', 'left'),
|
37 |
+
# spacy
|
38 |
+
StreamlitFlowNode("spacy",(0, 0), {'content': """### spacy"""}, 'default', 'right', 'left'),
|
39 |
+
# inception
|
40 |
+
StreamlitFlowNode("inception",(0, 0), {'content': """### INCEpTION"""}, 'default', 'right', 'left'),
|
41 |
+
# inception
|
42 |
+
StreamlitFlowNode("causalbert",(0, 0), {'content': """### CausalBERT"""}, 'default', 'right', 'left'),
|
43 |
+
|
44 |
+
]
|
45 |
+
|
46 |
+
edges = [
|
47 |
+
StreamlitFlowEdge("Bundestag-corpus", "bundestag", "corpus", animated=True),
|
48 |
+
StreamlitFlowEdge("FAZ-corpus", "faz", "corpus", animated=True),
|
49 |
+
StreamlitFlowEdge("taz-corpus", "taz", "corpus", animated=True),
|
50 |
+
StreamlitFlowEdge("spiegel-corpus", "spiegel", "corpus", animated=True),
|
51 |
+
StreamlitFlowEdge("sz-corpus", "sz", "corpus", animated=True),
|
52 |
+
StreamlitFlowEdge("bild-corpus", "bild", "corpus", animated=True),
|
53 |
+
StreamlitFlowEdge("zeit-corpus", "zeit", "corpus", animated=True),
|
54 |
+
StreamlitFlowEdge("nzz-corpus", "nzz", "corpus", animated=True),
|
55 |
+
StreamlitFlowEdge("corpus-BERTopic", "corpus", "bertopic", animated=True),
|
56 |
+
StreamlitFlowEdge("corpus-spacy", "corpus", "spacy", animated=True),
|
57 |
+
StreamlitFlowEdge("spacy-inception", "spacy", "inception", animated=True),
|
58 |
+
StreamlitFlowEdge("inception-causalbert", "inception", "causalbert", animated=True)
|
59 |
+
]
|
60 |
+
return StreamlitFlowState(nodes, edges)
|
61 |
+
|
62 |
+
def render(self):
|
63 |
+
"""Render the flowchart with TreeLayout and restricted interactions."""
|
64 |
+
st.session_state.flowchart_state = streamlit_flow(
|
65 |
+
"markdown_node_flow",
|
66 |
+
st.session_state.flowchart_state,
|
67 |
+
layout=LayeredLayout(direction='right'),
|
68 |
+
fit_view=True,
|
69 |
+
show_minimap=False,
|
70 |
+
show_controls=True,
|
71 |
+
hide_watermark=True,
|
72 |
+
allow_new_edges=False,
|
73 |
+
enable_node_menu=False,
|
74 |
+
enable_edge_menu=False,
|
75 |
+
enable_pane_menu=False
|
76 |
+
)
|
plot.py
CHANGED
@@ -46,6 +46,7 @@ class Plot:
|
|
46 |
)
|
47 |
fig.update_traces(
|
48 |
textposition='inside',
|
|
|
49 |
texttemplate='%{text}',
|
50 |
textfont=dict(color='rgb(255, 255, 255)')
|
51 |
)
|
@@ -79,6 +80,7 @@ class Plot:
|
|
79 |
fig.update_traces(
|
80 |
texttemplate='%{y}',
|
81 |
textposition='inside',
|
|
|
82 |
textfont=dict(color='rgb(255, 255, 255)')
|
83 |
)
|
84 |
|
@@ -99,6 +101,7 @@ class Plot:
|
|
99 |
fig.update_traces(
|
100 |
textposition='inside',
|
101 |
texttemplate='%{text}',
|
|
|
102 |
textfont=dict(color='rgb(255, 255, 255)')
|
103 |
)
|
104 |
|
@@ -129,6 +132,7 @@ class Plot:
|
|
129 |
fig.update_traces(
|
130 |
texttemplate='%{y}',
|
131 |
textposition='inside',
|
|
|
132 |
textfont=dict(color='rgb(255, 255, 255)')
|
133 |
)
|
134 |
return fig
|
@@ -166,18 +170,18 @@ class Plot:
|
|
166 |
# Perform UMAP dimensionality reduction
|
167 |
reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=50, n_jobs=1, metric='cosine')
|
168 |
reduced_features = reducer.fit_transform(features_clean)
|
169 |
-
df_reduced = pd.DataFrame(reduced_features, columns=['
|
170 |
df_reduced = pd.concat([df_reduced, metadata.reset_index(drop=True)], axis=1)
|
171 |
|
172 |
# Plotting the scatter plot
|
173 |
-
hover_data = {'cause': True, '
|
174 |
if include_modality:
|
175 |
hover_data['Modality'] = True
|
176 |
|
177 |
fig = px.scatter(
|
178 |
df_reduced,
|
179 |
-
x='
|
180 |
-
y='
|
181 |
color='subfolder',
|
182 |
symbol='indicator',
|
183 |
labels={'subfolder': 'Effect'},
|
|
|
46 |
)
|
47 |
fig.update_traces(
|
48 |
textposition='inside',
|
49 |
+
insidetextanchor='middle',
|
50 |
texttemplate='%{text}',
|
51 |
textfont=dict(color='rgb(255, 255, 255)')
|
52 |
)
|
|
|
80 |
fig.update_traces(
|
81 |
texttemplate='%{y}',
|
82 |
textposition='inside',
|
83 |
+
insidetextanchor='middle',
|
84 |
textfont=dict(color='rgb(255, 255, 255)')
|
85 |
)
|
86 |
|
|
|
101 |
fig.update_traces(
|
102 |
textposition='inside',
|
103 |
texttemplate='%{text}',
|
104 |
+
insidetextanchor='middle',
|
105 |
textfont=dict(color='rgb(255, 255, 255)')
|
106 |
)
|
107 |
|
|
|
132 |
fig.update_traces(
|
133 |
texttemplate='%{y}',
|
134 |
textposition='inside',
|
135 |
+
insidetextanchor='middle',
|
136 |
textfont=dict(color='rgb(255, 255, 255)')
|
137 |
)
|
138 |
return fig
|
|
|
170 |
# Perform UMAP dimensionality reduction
|
171 |
reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=50, n_jobs=1, metric='cosine')
|
172 |
reduced_features = reducer.fit_transform(features_clean)
|
173 |
+
df_reduced = pd.DataFrame(reduced_features, columns=['UMAP x', 'UMAP y'])
|
174 |
df_reduced = pd.concat([df_reduced, metadata.reset_index(drop=True)], axis=1)
|
175 |
|
176 |
# Plotting the scatter plot
|
177 |
+
hover_data = {'cause': True, 'UMAP x': False, 'UMAP y': False}
|
178 |
if include_modality:
|
179 |
hover_data['Modality'] = True
|
180 |
|
181 |
fig = px.scatter(
|
182 |
df_reduced,
|
183 |
+
x='UMAP x',
|
184 |
+
y='UMAP y',
|
185 |
color='subfolder',
|
186 |
symbol='indicator',
|
187 |
labels={'subfolder': 'Effect'},
|