Spaces:

pdjohn
/

causev

Running

App Files Files Community

pdjohn commited on Oct 27, 2024

Commit

3a965ad

1 Parent(s): 4be4c1c

Presentation

Browse files

Files changed (1) hide show

app.py +42 -16

app.py CHANGED Viewed

@@ -15,25 +15,33 @@ model.eval()
 label_map = {0: "O", 1: "B-INDICATOR", 2: "I-INDICATOR", 3: "B-CAUSE", 4: "I-CAUSE"}
 # Streamlit App
-st.title("Attribution of Causality")
-st.write("Tags indicators and causes. GER only (atm)")
-# Text input for sentences
-sentences_input = st.text_area("Sentences (one per line)", "\n".join([
     "Laub könnte verantwortlich für den Klimawandel sein.",
-    #"Backenzähne verursachen Artensterben.",
     "Fußball führt zu Waldschäden.",
-    #"Das hängt mit vielen Faktoren zusammen.",
     "Haustüren tragen zum Betonsterben bei.",
-    #"Autos stehen im verdacht, Bienensterben auszulösen.",
-    #"Lösen Straßen Waldsterben aus?"
-]))
 # Split the input text into individual sentences
 sentences = [sentence.strip() for sentence in sentences_input.splitlines() if sentence.strip()]
 # Button to run the model
-if st.button("Analyze Sentences"):
     for sentence in sentences:
         # Tokenize the sentence
         inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
@@ -54,17 +62,35 @@ if st.button("Analyze Sentences"):
         # Reconstruct words from subwords and prepare for annotated_text
         annotations = []
         for token, label in zip(tokens, predicted_labels):
             if token in ['[CLS]', '[SEP]']:  # Exclude special tokens
                 continue
             if token.startswith("##"):
-                annotations[-1] = (annotations[-1][0] + token[2:], annotations[-1][1])  # Append subword
             else:
-                # Append token with label, or None if it's "O" (outside)
-                if label != "O":
-                    annotations.append((token, label))
-                else:
-                    annotations.append(token)
         # Display annotated text
         st.write(f"**Sentence:** {sentence}")

 label_map = {0: "O", 1: "B-INDICATOR", 2: "I-INDICATOR", 3: "B-CAUSE", 4: "I-CAUSE"}
 # Streamlit App
+st.markdown(
+    """
+    <div style="display: flex; align-items: center; justify-content: left; font-size: 60px; font-weight: bold;">
+        <span>CAUSEN</span>
+        <span style="transform: rotate(270deg); display: inline-block; margin-left: 5px;">V</span>
+    </div>
+    """,
+    unsafe_allow_html=True
+)
+# Add a description with a link to the model
+st.write("Tags indicators and causes in explicit attribution of causality. GER only (atm)")
+# Text input for sentences with italic placeholder text
+sentences_input = st.text_area("*Sentences (one per line)*", "\n".join([
     "Laub könnte verantwortlich für den Klimawandel sein.",
     "Fußball führt zu Waldschäden.",
     "Haustüren tragen zum Betonsterben bei.",
+])
+, placeholder="Your Sentences here.")
+st.markdown("[Model](https://huggingface.co/norygano/causalBERT)")
 # Split the input text into individual sentences
 sentences = [sentence.strip() for sentence in sentences_input.splitlines() if sentence.strip()]
 # Button to run the model
+if st.button("Analyze"):
     for sentence in sentences:
         # Tokenize the sentence
         inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
         # Reconstruct words from subwords and prepare for annotated_text
         annotations = []
+        current_word = ""
+        current_label = "O"
         for token, label in zip(tokens, predicted_labels):
             if token in ['[CLS]', '[SEP]']:  # Exclude special tokens
                 continue
             if token.startswith("##"):
+                # Append subword without "##" prefix to the current word
+                current_word += token[2:]
+            else:
+                # If we have accumulated a word, add it to annotations with a space
+                if current_word:
+                    if current_label != "O":
+                        annotations.append((current_word, current_label))
+                    else:
+                        annotations.append(current_word)
+                    annotations.append(" ")  # Add a space between words
+                # Start a new word
+                current_word = token
+                current_label = label
+        # Add the last accumulated word
+        if current_word:
+            if current_label != "O":
+                annotations.append((current_word, current_label))
             else:
+                annotations.append(current_word)
         # Display annotated text
         st.write(f"**Sentence:** {sentence}")