norygano commited on
Commit
4be4c1c
·
1 Parent(s): a8d29d4

Annotated Text

Browse files
Files changed (2) hide show
  1. app.py +11 -20
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForTokenClassification
 
4
 
5
  # Load the trained model and tokenizer
6
  model_directory = "norygano/causalBERT"
@@ -51,31 +52,21 @@ if st.button("Analyze Sentences"):
51
  # Map label IDs to human-readable labels
52
  predicted_labels = [label_map[label_id.item()] for label_id in predicted_label_ids[0]]
53
 
54
- # Reconstruct words from subwords
55
- reconstructed_tokens = []
56
- reconstructed_labels = []
57
  for token, label in zip(tokens, predicted_labels):
58
  if token in ['[CLS]', '[SEP]']: # Exclude special tokens
59
  continue
60
  if token.startswith("##"):
61
- reconstructed_tokens[-1] += token[2:] # Append subword
62
  else:
63
- reconstructed_tokens.append(token)
64
- reconstructed_labels.append(label)
 
 
 
65
 
66
- # Format output with square brackets
67
- formatted_output = []
68
- for token, label in zip(reconstructed_tokens, reconstructed_labels):
69
- if label != "O":
70
- # Use square brackets around label names
71
- formatted_output.append(f"[{label}] <b>{token}</b> [/{label}]")
72
- else:
73
- formatted_output.append(token)
74
-
75
- # Join tokens for display
76
- output_sentence = " ".join(formatted_output)
77
-
78
- # Display formatted sentence with Streamlit
79
  st.write(f"**Sentence:** {sentence}")
80
- st.markdown(f"**Labeled:** {output_sentence}", unsafe_allow_html=True)
81
  st.write("---")
 
1
  import streamlit as st
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForTokenClassification
4
+ from annotated_text import annotated_text
5
 
6
  # Load the trained model and tokenizer
7
  model_directory = "norygano/causalBERT"
 
52
  # Map label IDs to human-readable labels
53
  predicted_labels = [label_map[label_id.item()] for label_id in predicted_label_ids[0]]
54
 
55
+ # Reconstruct words from subwords and prepare for annotated_text
56
+ annotations = []
 
57
  for token, label in zip(tokens, predicted_labels):
58
  if token in ['[CLS]', '[SEP]']: # Exclude special tokens
59
  continue
60
  if token.startswith("##"):
61
+ annotations[-1] = (annotations[-1][0] + token[2:], annotations[-1][1]) # Append subword
62
  else:
63
+ # Append token with label, or None if it's "O" (outside)
64
+ if label != "O":
65
+ annotations.append((token, label))
66
+ else:
67
+ annotations.append(token)
68
 
69
+ # Display annotated text
 
 
 
 
 
 
 
 
 
 
 
 
70
  st.write(f"**Sentence:** {sentence}")
71
+ annotated_text(*annotations)
72
  st.write("---")
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  torch
2
- transformers
 
 
1
  torch
2
+ transformers
3
+ st-annotated-text