Pranava Kailash commited on
Commit
6cc3bb2
·
1 Parent(s): e09363d

CyNER2.0 Runtime Memory Optimized

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -1,12 +1,15 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
 
3
 
4
  # Load model and tokenizer
5
  path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base'
6
- tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True, max_length=768)
7
  model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint)
8
  ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
9
 
 
 
10
  def tag_sentence(sentence, entities_dict):
11
  """
12
  Add HTML tags to entities for visualization.
@@ -50,12 +53,10 @@ def perform_ner(text):
50
  Run NER pipeline and prepare results for display.
51
  """
52
  entities = ner_pipeline(text)
53
- entities_dict = {}
 
54
  for entity in entities:
55
- entity_type = entity['entity']
56
- if entity_type not in entities_dict:
57
- entities_dict[entity_type] = []
58
- entities_dict[entity_type].append({
59
  "entity": entity['entity'],
60
  "score": entity['score'],
61
  "index": entity['index'],
@@ -63,9 +64,9 @@ def perform_ner(text):
63
  "start": entity['start'],
64
  "end": entity['end']
65
  })
66
-
67
  tagged_sentence = tag_sentence(text, entities_dict)
68
- return entities_dict, tagged_sentence
69
 
70
  # Streamlit UI
71
  st.title("CyNER 2.0 - Named Entity Recognition")
@@ -74,7 +75,9 @@ st.write("Enter text to get named entity recognition results.")
74
  input_text = st.text_area("Input Text", "Type your text here...")
75
 
76
  if st.button("Analyze"):
77
- if input_text.strip():
 
 
78
  entities_dict, tagged_sentence = perform_ner(input_text)
79
 
80
  # Display results
@@ -84,4 +87,4 @@ if st.button("Analyze"):
84
  st.subheader("Entities and Details")
85
  st.json(entities_dict)
86
  else:
87
- st.warning("Please enter some text for analysis.")
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
+ from collections import defaultdict
4
 
5
  # Load model and tokenizer
6
  path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base'
7
+ tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True)
8
  model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint)
9
  ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
10
 
11
+ MAX_INPUT_LENGTH = 500 # Set an appropriate length limit
12
+
13
  def tag_sentence(sentence, entities_dict):
14
  """
15
  Add HTML tags to entities for visualization.
 
53
  Run NER pipeline and prepare results for display.
54
  """
55
  entities = ner_pipeline(text)
56
+
57
+ entities_dict = defaultdict(list)
58
  for entity in entities:
59
+ entities_dict[entity['entity']].append({
 
 
 
60
  "entity": entity['entity'],
61
  "score": entity['score'],
62
  "index": entity['index'],
 
64
  "start": entity['start'],
65
  "end": entity['end']
66
  })
67
+
68
  tagged_sentence = tag_sentence(text, entities_dict)
69
+ return dict(entities_dict), tagged_sentence
70
 
71
  # Streamlit UI
72
  st.title("CyNER 2.0 - Named Entity Recognition")
 
75
  input_text = st.text_area("Input Text", "Type your text here...")
76
 
77
  if st.button("Analyze"):
78
+ if len(input_text) > MAX_INPUT_LENGTH:
79
+ st.warning(f"Text is too long! Please enter less than {MAX_INPUT_LENGTH} characters.")
80
+ elif input_text.strip():
81
  entities_dict, tagged_sentence = perform_ner(input_text)
82
 
83
  # Display results
 
87
  st.subheader("Entities and Details")
88
  st.json(entities_dict)
89
  else:
90
+ st.warning("Please enter some text for analysis.")