norygano commited on
Commit
adb4a34
·
1 Parent(s): 2e68043

Kolloquium

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. __pycache__/flow.cpython-311.pyc +0 -0
  3. app.py +6 -4
  4. flow.py +76 -0
  5. plot.py +8 -4
.gitignore CHANGED
@@ -1 +1,2 @@
1
  __pycache__/plot.cpython-311.pyc
 
 
1
  __pycache__/plot.cpython-311.pyc
2
+ img/
__pycache__/flow.cpython-311.pyc ADDED
Binary file (4.55 kB). View file
 
app.py CHANGED
@@ -2,17 +2,19 @@ import streamlit as st
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForTokenClassification
4
  from annotated_text import annotated_text
 
 
5
  import os
6
- from plot import Plot # Assuming the class is saved in diagram_generator.py
7
 
8
  # Define initial threshold values at the top of the script
9
- default_cause_threshold = 20
10
  default_indicator_threshold = 15
11
  default_cause_threshold_sankey = 20
12
  default_indicator_threshold_sankey = 15
13
 
14
  # Initialize Plots
15
  plot = Plot()
 
16
 
17
  # Load the trained model and tokenizer
18
  model_directory = "norygano/causalBERT"
@@ -33,7 +35,7 @@ st.markdown(
33
  """,
34
  unsafe_allow_html=True
35
  )
36
- st.markdown("[Weights](https://huggingface.co/norygano/causalBERT) | [Data](https://huggingface.co/datasets/norygano/causenv) | [Project](https://www.uni-trier.de/universitaet/fachbereiche-faecher/fachbereich-ii/faecher/germanistik/professurenfachteile/germanistische-linguistik/professoren/prof-dr-martin-wengeler/kontroverse-diskurse/individium-gesellschaft)")
37
  st.write("Indicators and causes in explicit attributions of causality.")
38
 
39
  # Create tabs
@@ -151,4 +153,4 @@ with tab5:
151
  )
152
  indicator_threshold_sankey = st.slider(
153
  "Indicator >=", min_value=1, max_value=100, value=default_indicator_threshold_sankey, key="indicator_threshold_sankey"
154
- )
 
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForTokenClassification
4
  from annotated_text import annotated_text
5
+ from plot import Plot
6
+ from flow import FlowChart
7
  import os
 
8
 
9
  # Define initial threshold values at the top of the script
10
+ default_cause_threshold = 25
11
  default_indicator_threshold = 15
12
  default_cause_threshold_sankey = 20
13
  default_indicator_threshold_sankey = 15
14
 
15
  # Initialize Plots
16
  plot = Plot()
17
+ flow_chart = FlowChart()
18
 
19
  # Load the trained model and tokenizer
20
  model_directory = "norygano/causalBERT"
 
35
  """,
36
  unsafe_allow_html=True
37
  )
38
+ st.markdown("[Weights](https://huggingface.co/norygano/causalBERT) | [Data](https://huggingface.co/datasets/norygano/causenv)")
39
  st.write("Indicators and causes in explicit attributions of causality.")
40
 
41
  # Create tabs
 
153
  )
154
  indicator_threshold_sankey = st.slider(
155
  "Indicator >=", min_value=1, max_value=100, value=default_indicator_threshold_sankey, key="indicator_threshold_sankey"
156
+ )
flow.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_flow import streamlit_flow
3
+ from streamlit_flow.elements import StreamlitFlowNode, StreamlitFlowEdge
4
+ from streamlit_flow.state import StreamlitFlowState
5
+ from streamlit_flow.layouts import LayeredLayout
6
+ import base64
7
+
8
+ class FlowChart:
9
+ def __init__(self):
10
+ # Initialize the flowchart state in session state if it doesn't exist
11
+ if "flowchart_state" not in st.session_state:
12
+ st.session_state.flowchart_state = self.create_default_state()
13
+
14
+ def create_default_state(self):
15
+ """Initialize default nodes and edges."""
16
+
17
+ # Convert SVG to Base64 string
18
+ image_path = "data/img/Deutscher_Bundestag.svg"
19
+ with open(image_path, "rb") as image_file:
20
+ image_base64 = base64.b64encode(image_file.read()).decode("utf-8")
21
+
22
+ nodes = [
23
+ # Source
24
+ StreamlitFlowNode("bundestag",(0, 0), {"content": "Bundestag"}, "input", "right"),
25
+ StreamlitFlowNode("faz", (0, 0), {'content': "FAZ"}, 'input', 'right'),
26
+ StreamlitFlowNode("taz", (0, 0), {"content": "taz"}, "input", "right"),
27
+ StreamlitFlowNode("spiegel", (0, 0), {"content": "Spiegel"}, "input", "right"),
28
+ StreamlitFlowNode("sz", (0, 0), {"content": "Süddeutsche Zeitung"}, "input", "right"),
29
+ StreamlitFlowNode("bild", (0, 0), {"content": "Bild"}, "input", "right"),
30
+ StreamlitFlowNode("zeit", (0, 0), {"content": "zeit"}, "input", "right"),
31
+ StreamlitFlowNode("nzz", (0, 0), {"content": "Neu Zürcher Zeitung"}, "input", "right"),
32
+
33
+ # corpus
34
+ StreamlitFlowNode("corpus",(0, 0), {'content': "corpus"}, 'default', 'right', 'left'),
35
+ # BERTopic
36
+ StreamlitFlowNode("bertopic",(0, 0), {'content': """### BERToptic"""}, 'default', 'right', 'left'),
37
+ # spacy
38
+ StreamlitFlowNode("spacy",(0, 0), {'content': """### spacy"""}, 'default', 'right', 'left'),
39
+ # inception
40
+ StreamlitFlowNode("inception",(0, 0), {'content': """### INCEpTION"""}, 'default', 'right', 'left'),
41
+ # inception
42
+ StreamlitFlowNode("causalbert",(0, 0), {'content': """### CausalBERT"""}, 'default', 'right', 'left'),
43
+
44
+ ]
45
+
46
+ edges = [
47
+ StreamlitFlowEdge("Bundestag-corpus", "bundestag", "corpus", animated=True),
48
+ StreamlitFlowEdge("FAZ-corpus", "faz", "corpus", animated=True),
49
+ StreamlitFlowEdge("taz-corpus", "taz", "corpus", animated=True),
50
+ StreamlitFlowEdge("spiegel-corpus", "spiegel", "corpus", animated=True),
51
+ StreamlitFlowEdge("sz-corpus", "sz", "corpus", animated=True),
52
+ StreamlitFlowEdge("bild-corpus", "bild", "corpus", animated=True),
53
+ StreamlitFlowEdge("zeit-corpus", "zeit", "corpus", animated=True),
54
+ StreamlitFlowEdge("nzz-corpus", "nzz", "corpus", animated=True),
55
+ StreamlitFlowEdge("corpus-BERTopic", "corpus", "bertopic", animated=True),
56
+ StreamlitFlowEdge("corpus-spacy", "corpus", "spacy", animated=True),
57
+ StreamlitFlowEdge("spacy-inception", "spacy", "inception", animated=True),
58
+ StreamlitFlowEdge("inception-causalbert", "inception", "causalbert", animated=True)
59
+ ]
60
+ return StreamlitFlowState(nodes, edges)
61
+
62
+ def render(self):
63
+ """Render the flowchart with TreeLayout and restricted interactions."""
64
+ st.session_state.flowchart_state = streamlit_flow(
65
+ "markdown_node_flow",
66
+ st.session_state.flowchart_state,
67
+ layout=LayeredLayout(direction='right'),
68
+ fit_view=True,
69
+ show_minimap=False,
70
+ show_controls=True,
71
+ hide_watermark=True,
72
+ allow_new_edges=False,
73
+ enable_node_menu=False,
74
+ enable_edge_menu=False,
75
+ enable_pane_menu=False
76
+ )
plot.py CHANGED
@@ -46,6 +46,7 @@ class Plot:
46
  )
47
  fig.update_traces(
48
  textposition='inside',
 
49
  texttemplate='%{text}',
50
  textfont=dict(color='rgb(255, 255, 255)')
51
  )
@@ -79,6 +80,7 @@ class Plot:
79
  fig.update_traces(
80
  texttemplate='%{y}',
81
  textposition='inside',
 
82
  textfont=dict(color='rgb(255, 255, 255)')
83
  )
84
 
@@ -99,6 +101,7 @@ class Plot:
99
  fig.update_traces(
100
  textposition='inside',
101
  texttemplate='%{text}',
 
102
  textfont=dict(color='rgb(255, 255, 255)')
103
  )
104
 
@@ -129,6 +132,7 @@ class Plot:
129
  fig.update_traces(
130
  texttemplate='%{y}',
131
  textposition='inside',
 
132
  textfont=dict(color='rgb(255, 255, 255)')
133
  )
134
  return fig
@@ -166,18 +170,18 @@ class Plot:
166
  # Perform UMAP dimensionality reduction
167
  reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=50, n_jobs=1, metric='cosine')
168
  reduced_features = reducer.fit_transform(features_clean)
169
- df_reduced = pd.DataFrame(reduced_features, columns=['Component 1', 'Component 2'])
170
  df_reduced = pd.concat([df_reduced, metadata.reset_index(drop=True)], axis=1)
171
 
172
  # Plotting the scatter plot
173
- hover_data = {'cause': True, 'Component 1': False, 'Component 2': False}
174
  if include_modality:
175
  hover_data['Modality'] = True
176
 
177
  fig = px.scatter(
178
  df_reduced,
179
- x='Component 1',
180
- y='Component 2',
181
  color='subfolder',
182
  symbol='indicator',
183
  labels={'subfolder': 'Effect'},
 
46
  )
47
  fig.update_traces(
48
  textposition='inside',
49
+ insidetextanchor='middle',
50
  texttemplate='%{text}',
51
  textfont=dict(color='rgb(255, 255, 255)')
52
  )
 
80
  fig.update_traces(
81
  texttemplate='%{y}',
82
  textposition='inside',
83
+ insidetextanchor='middle',
84
  textfont=dict(color='rgb(255, 255, 255)')
85
  )
86
 
 
101
  fig.update_traces(
102
  textposition='inside',
103
  texttemplate='%{text}',
104
+ insidetextanchor='middle',
105
  textfont=dict(color='rgb(255, 255, 255)')
106
  )
107
 
 
132
  fig.update_traces(
133
  texttemplate='%{y}',
134
  textposition='inside',
135
+ insidetextanchor='middle',
136
  textfont=dict(color='rgb(255, 255, 255)')
137
  )
138
  return fig
 
170
  # Perform UMAP dimensionality reduction
171
  reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=50, n_jobs=1, metric='cosine')
172
  reduced_features = reducer.fit_transform(features_clean)
173
+ df_reduced = pd.DataFrame(reduced_features, columns=['UMAP x', 'UMAP y'])
174
  df_reduced = pd.concat([df_reduced, metadata.reset_index(drop=True)], axis=1)
175
 
176
  # Plotting the scatter plot
177
+ hover_data = {'cause': True, 'UMAP x': False, 'UMAP y': False}
178
  if include_modality:
179
  hover_data['Modality'] = True
180
 
181
  fig = px.scatter(
182
  df_reduced,
183
+ x='UMAP x',
184
+ y='UMAP y',
185
  color='subfolder',
186
  symbol='indicator',
187
  labels={'subfolder': 'Effect'},