Annotated Text
Browse files- app.py +11 -20
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
|
|
4 |
|
5 |
# Load the trained model and tokenizer
|
6 |
model_directory = "norygano/causalBERT"
|
@@ -51,31 +52,21 @@ if st.button("Analyze Sentences"):
|
|
51 |
# Map label IDs to human-readable labels
|
52 |
predicted_labels = [label_map[label_id.item()] for label_id in predicted_label_ids[0]]
|
53 |
|
54 |
-
# Reconstruct words from subwords
|
55 |
-
|
56 |
-
reconstructed_labels = []
|
57 |
for token, label in zip(tokens, predicted_labels):
|
58 |
if token in ['[CLS]', '[SEP]']: # Exclude special tokens
|
59 |
continue
|
60 |
if token.startswith("##"):
|
61 |
-
|
62 |
else:
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
65 |
|
66 |
-
#
|
67 |
-
formatted_output = []
|
68 |
-
for token, label in zip(reconstructed_tokens, reconstructed_labels):
|
69 |
-
if label != "O":
|
70 |
-
# Use square brackets around label names
|
71 |
-
formatted_output.append(f"[{label}] <b>{token}</b> [/{label}]")
|
72 |
-
else:
|
73 |
-
formatted_output.append(token)
|
74 |
-
|
75 |
-
# Join tokens for display
|
76 |
-
output_sentence = " ".join(formatted_output)
|
77 |
-
|
78 |
-
# Display formatted sentence with Streamlit
|
79 |
st.write(f"**Sentence:** {sentence}")
|
80 |
-
|
81 |
st.write("---")
|
|
|
1 |
import streamlit as st
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
4 |
+
from annotated_text import annotated_text
|
5 |
|
6 |
# Load the trained model and tokenizer
|
7 |
model_directory = "norygano/causalBERT"
|
|
|
52 |
# Map label IDs to human-readable labels
|
53 |
predicted_labels = [label_map[label_id.item()] for label_id in predicted_label_ids[0]]
|
54 |
|
55 |
+
# Reconstruct words from subwords and prepare for annotated_text
|
56 |
+
annotations = []
|
|
|
57 |
for token, label in zip(tokens, predicted_labels):
|
58 |
if token in ['[CLS]', '[SEP]']: # Exclude special tokens
|
59 |
continue
|
60 |
if token.startswith("##"):
|
61 |
+
annotations[-1] = (annotations[-1][0] + token[2:], annotations[-1][1]) # Append subword
|
62 |
else:
|
63 |
+
# Append token with label, or None if it's "O" (outside)
|
64 |
+
if label != "O":
|
65 |
+
annotations.append((token, label))
|
66 |
+
else:
|
67 |
+
annotations.append(token)
|
68 |
|
69 |
+
# Display annotated text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
st.write(f"**Sentence:** {sentence}")
|
71 |
+
annotated_text(*annotations)
|
72 |
st.write("---")
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
torch
|
2 |
-
transformers
|
|
|
|
1 |
torch
|
2 |
+
transformers
|
3 |
+
st-annotated-text
|