norygano commited on
Commit
3a965ad
·
1 Parent(s): 4be4c1c

Presentation

Browse files
Files changed (1) hide show
  1. app.py +42 -16
app.py CHANGED
@@ -15,25 +15,33 @@ model.eval()
15
  label_map = {0: "O", 1: "B-INDICATOR", 2: "I-INDICATOR", 3: "B-CAUSE", 4: "I-CAUSE"}
16
 
17
  # Streamlit App
18
- st.title("Attribution of Causality")
19
- st.write("Tags indicators and causes. GER only (atm)")
 
 
 
 
 
 
 
20
 
21
- # Text input for sentences
22
- sentences_input = st.text_area("Sentences (one per line)", "\n".join([
 
 
 
23
  "Laub könnte verantwortlich für den Klimawandel sein.",
24
- #"Backenzähne verursachen Artensterben.",
25
  "Fußball führt zu Waldschäden.",
26
- #"Das hängt mit vielen Faktoren zusammen.",
27
  "Haustüren tragen zum Betonsterben bei.",
28
- #"Autos stehen im verdacht, Bienensterben auszulösen.",
29
- #"Lösen Straßen Waldsterben aus?"
30
- ]))
31
 
32
  # Split the input text into individual sentences
33
  sentences = [sentence.strip() for sentence in sentences_input.splitlines() if sentence.strip()]
34
 
35
  # Button to run the model
36
- if st.button("Analyze Sentences"):
37
  for sentence in sentences:
38
  # Tokenize the sentence
39
  inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
@@ -54,17 +62,35 @@ if st.button("Analyze Sentences"):
54
 
55
  # Reconstruct words from subwords and prepare for annotated_text
56
  annotations = []
 
 
 
57
  for token, label in zip(tokens, predicted_labels):
58
  if token in ['[CLS]', '[SEP]']: # Exclude special tokens
59
  continue
 
60
  if token.startswith("##"):
61
- annotations[-1] = (annotations[-1][0] + token[2:], annotations[-1][1]) # Append subword
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  else:
63
- # Append token with label, or None if it's "O" (outside)
64
- if label != "O":
65
- annotations.append((token, label))
66
- else:
67
- annotations.append(token)
68
 
69
  # Display annotated text
70
  st.write(f"**Sentence:** {sentence}")
 
15
  label_map = {0: "O", 1: "B-INDICATOR", 2: "I-INDICATOR", 3: "B-CAUSE", 4: "I-CAUSE"}
16
 
17
  # Streamlit App
18
+ st.markdown(
19
+ """
20
+ <div style="display: flex; align-items: center; justify-content: left; font-size: 60px; font-weight: bold;">
21
+ <span>CAUSEN</span>
22
+ <span style="transform: rotate(270deg); display: inline-block; margin-left: 5px;">V</span>
23
+ </div>
24
+ """,
25
+ unsafe_allow_html=True
26
+ )
27
 
28
+ # Add a description with a link to the model
29
+ st.write("Tags indicators and causes in explicit attribution of causality. GER only (atm)")
30
+
31
+ # Text input for sentences with italic placeholder text
32
+ sentences_input = st.text_area("*Sentences (one per line)*", "\n".join([
33
  "Laub könnte verantwortlich für den Klimawandel sein.",
 
34
  "Fußball führt zu Waldschäden.",
 
35
  "Haustüren tragen zum Betonsterben bei.",
36
+ ])
37
+ , placeholder="Your Sentences here.")
38
+ st.markdown("[Model](https://huggingface.co/norygano/causalBERT)")
39
 
40
  # Split the input text into individual sentences
41
  sentences = [sentence.strip() for sentence in sentences_input.splitlines() if sentence.strip()]
42
 
43
  # Button to run the model
44
+ if st.button("Analyze"):
45
  for sentence in sentences:
46
  # Tokenize the sentence
47
  inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
 
62
 
63
  # Reconstruct words from subwords and prepare for annotated_text
64
  annotations = []
65
+ current_word = ""
66
+ current_label = "O"
67
+
68
  for token, label in zip(tokens, predicted_labels):
69
  if token in ['[CLS]', '[SEP]']: # Exclude special tokens
70
  continue
71
+
72
  if token.startswith("##"):
73
+ # Append subword without "##" prefix to the current word
74
+ current_word += token[2:]
75
+ else:
76
+ # If we have accumulated a word, add it to annotations with a space
77
+ if current_word:
78
+ if current_label != "O":
79
+ annotations.append((current_word, current_label))
80
+ else:
81
+ annotations.append(current_word)
82
+ annotations.append(" ") # Add a space between words
83
+
84
+ # Start a new word
85
+ current_word = token
86
+ current_label = label
87
+
88
+ # Add the last accumulated word
89
+ if current_word:
90
+ if current_label != "O":
91
+ annotations.append((current_word, current_label))
92
  else:
93
+ annotations.append(current_word)
 
 
 
 
94
 
95
  # Display annotated text
96
  st.write(f"**Sentence:** {sentence}")