Pranava Kailash
commited on
Commit
·
6cc3bb2
1
Parent(s):
e09363d
CyNER2.0 Runtime Memory Optimized
Browse files
app.py
CHANGED
@@ -1,12 +1,15 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
|
|
3 |
|
4 |
# Load model and tokenizer
|
5 |
path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base'
|
6 |
-
tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True
|
7 |
model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint)
|
8 |
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
|
9 |
|
|
|
|
|
10 |
def tag_sentence(sentence, entities_dict):
|
11 |
"""
|
12 |
Add HTML tags to entities for visualization.
|
@@ -50,12 +53,10 @@ def perform_ner(text):
|
|
50 |
Run NER pipeline and prepare results for display.
|
51 |
"""
|
52 |
entities = ner_pipeline(text)
|
53 |
-
|
|
|
54 |
for entity in entities:
|
55 |
-
|
56 |
-
if entity_type not in entities_dict:
|
57 |
-
entities_dict[entity_type] = []
|
58 |
-
entities_dict[entity_type].append({
|
59 |
"entity": entity['entity'],
|
60 |
"score": entity['score'],
|
61 |
"index": entity['index'],
|
@@ -63,9 +64,9 @@ def perform_ner(text):
|
|
63 |
"start": entity['start'],
|
64 |
"end": entity['end']
|
65 |
})
|
66 |
-
|
67 |
tagged_sentence = tag_sentence(text, entities_dict)
|
68 |
-
return entities_dict, tagged_sentence
|
69 |
|
70 |
# Streamlit UI
|
71 |
st.title("CyNER 2.0 - Named Entity Recognition")
|
@@ -74,7 +75,9 @@ st.write("Enter text to get named entity recognition results.")
|
|
74 |
input_text = st.text_area("Input Text", "Type your text here...")
|
75 |
|
76 |
if st.button("Analyze"):
|
77 |
-
if input_text
|
|
|
|
|
78 |
entities_dict, tagged_sentence = perform_ner(input_text)
|
79 |
|
80 |
# Display results
|
@@ -84,4 +87,4 @@ if st.button("Analyze"):
|
|
84 |
st.subheader("Entities and Details")
|
85 |
st.json(entities_dict)
|
86 |
else:
|
87 |
-
st.warning("Please enter some text for analysis.")
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
3 |
+
from collections import defaultdict
|
4 |
|
5 |
# Load model and tokenizer
|
6 |
path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base'
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True)
|
8 |
model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint)
|
9 |
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
|
10 |
|
11 |
+
MAX_INPUT_LENGTH = 500 # Set an appropriate length limit
|
12 |
+
|
13 |
def tag_sentence(sentence, entities_dict):
|
14 |
"""
|
15 |
Add HTML tags to entities for visualization.
|
|
|
53 |
Run NER pipeline and prepare results for display.
|
54 |
"""
|
55 |
entities = ner_pipeline(text)
|
56 |
+
|
57 |
+
entities_dict = defaultdict(list)
|
58 |
for entity in entities:
|
59 |
+
entities_dict[entity['entity']].append({
|
|
|
|
|
|
|
60 |
"entity": entity['entity'],
|
61 |
"score": entity['score'],
|
62 |
"index": entity['index'],
|
|
|
64 |
"start": entity['start'],
|
65 |
"end": entity['end']
|
66 |
})
|
67 |
+
|
68 |
tagged_sentence = tag_sentence(text, entities_dict)
|
69 |
+
return dict(entities_dict), tagged_sentence
|
70 |
|
71 |
# Streamlit UI
|
72 |
st.title("CyNER 2.0 - Named Entity Recognition")
|
|
|
75 |
input_text = st.text_area("Input Text", "Type your text here...")
|
76 |
|
77 |
if st.button("Analyze"):
|
78 |
+
if len(input_text) > MAX_INPUT_LENGTH:
|
79 |
+
st.warning(f"Text is too long! Please enter less than {MAX_INPUT_LENGTH} characters.")
|
80 |
+
elif input_text.strip():
|
81 |
entities_dict, tagged_sentence = perform_ner(input_text)
|
82 |
|
83 |
# Display results
|
|
|
87 |
st.subheader("Entities and Details")
|
88 |
st.json(entities_dict)
|
89 |
else:
|
90 |
+
st.warning("Please enter some text for analysis.")
|