Spaces:

pdjohn
/

causev

Running

App Files Files Community

pdjohn commited on Oct 27, 2024

Commit

4be4c1c

1 Parent(s): a8d29d4

Annotated Text

Browse files

Files changed (2) hide show

app.py +11 -20
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import torch
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 # Load the trained model and tokenizer
 model_directory = "norygano/causalBERT"
@@ -51,31 +52,21 @@ if st.button("Analyze Sentences"):
         # Map label IDs to human-readable labels
         predicted_labels = [label_map[label_id.item()] for label_id in predicted_label_ids[0]]
-        # Reconstruct words from subwords
-        reconstructed_tokens = []
-        reconstructed_labels = []
         for token, label in zip(tokens, predicted_labels):
             if token in ['[CLS]', '[SEP]']:  # Exclude special tokens
                 continue
             if token.startswith("##"):
-                reconstructed_tokens[-1] += token[2:]  # Append subword
             else:
-                reconstructed_tokens.append(token)
-                reconstructed_labels.append(label)
-        # Format output with square brackets
-        formatted_output = []
-        for token, label in zip(reconstructed_tokens, reconstructed_labels):
-            if label != "O":
-                # Use square brackets around label names
-                formatted_output.append(f"[{label}] <b>{token}</b> [/{label}]")
-            else:
-                formatted_output.append(token)
-        # Join tokens for display
-        output_sentence = " ".join(formatted_output)
-        # Display formatted sentence with Streamlit
         st.write(f"**Sentence:** {sentence}")
-        st.markdown(f"**Labeled:** {output_sentence}", unsafe_allow_html=True)
         st.write("---")

 import streamlit as st
 import torch
 from transformers import AutoTokenizer, AutoModelForTokenClassification
+from annotated_text import annotated_text
 # Load the trained model and tokenizer
 model_directory = "norygano/causalBERT"
         # Map label IDs to human-readable labels
         predicted_labels = [label_map[label_id.item()] for label_id in predicted_label_ids[0]]
+        # Reconstruct words from subwords and prepare for annotated_text
+        annotations = []
         for token, label in zip(tokens, predicted_labels):
             if token in ['[CLS]', '[SEP]']:  # Exclude special tokens
                 continue
             if token.startswith("##"):
+                annotations[-1] = (annotations[-1][0] + token[2:], annotations[-1][1])  # Append subword
             else:
+                # Append token with label, or None if it's "O" (outside)
+                if label != "O":
+                    annotations.append((token, label))
+                else:
+                    annotations.append(token)
+        # Display annotated text
         st.write(f"**Sentence:** {sentence}")
+        annotated_text(*annotations)
         st.write("---")

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 torch
-transformers

 torch
+transformers
+st-annotated-text