Spaces:
Runtime error
Runtime error
File size: 15,130 Bytes
6a89fbe ca4171a 4f10488 7fe0ea6 59a7eba a48f2db 7fe0ea6 42d40cd 6a89fbe 68b6b17 59a7eba a48f2db 3167459 cca4ece 59a7eba a48f2db 68b6b17 4f10488 68b6b17 5a994f8 3167459 5a994f8 65ac75e 5a994f8 84fa2e9 7fe0ea6 59a7eba a48f2db 6886461 59a7eba 7fe0ea6 8bb7965 ca4171a 8bb7965 68b6b17 42d40cd 68b6b17 7fe0ea6 6886461 cca4ece 59a7eba b47aba9 7fe0ea6 cca4ece 59a7eba 8bb7965 b47aba9 8bb7965 b47aba9 a48f2db 5a994f8 9811800 7fe0ea6 8bb7965 7fe0ea6 a48f2db 7fe0ea6 59a7eba a48f2db 8bb7965 59a7eba a48f2db 59a7eba cca4ece 8bb7965 cca4ece 59a7eba b47aba9 a48f2db 5a994f8 a48f2db 9811800 a48f2db 59a7eba 7fe0ea6 356e503 65ac75e 7fe0ea6 8bb7965 84fa2e9 7fe0ea6 59a7eba 68b6b17 9811800 42d40cd 8bb7965 68b6b17 4f10488 2d37288 4f10488 2d37288 8bb7965 0f80297 a48f2db 59a7eba a48f2db 59a7eba a48f2db 59a7eba 0f80297 59a7eba 9e50168 59a7eba 6a89fbe 59a7eba 9811800 59a7eba 9811800 59a7eba 9811800 6a89fbe 59a7eba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 |
import numpy as np
import pandas as pd
import streamlit as st
from streamlit_text_rating.st_text_rater import st_text_rater
from transformers import AutoTokenizer,AutoModelForSequenceClassification
from transformers import AutoModelForMaskedLM
import onnxruntime as ort
import os
import time
import plotly.express as px
import plotly.graph_objects as go
global _plotly_config
_plotly_config={'displayModeBar': False}
from sentiment_clf_helper import (classify_sentiment,
create_onnx_model_sentiment,
classify_sentiment_onnx)
from zeroshot_clf_helper import (zero_shot_classification,
create_onnx_model_zs_nli,
create_onnx_model_zs_mlm,
zero_shot_classification_nli_onnx,
zero_shot_classification_fillmask_onnx)
import multiprocessing
total_threads=multiprocessing.cpu_count()#for ort inference
import yaml
def read_yaml(file_path):
with open(file_path, "r") as f:
return yaml.safe_load(f)
config = read_yaml('config.yaml')
sent_chkpt=config['SENTIMENT_CLF']['sent_chkpt']
sent_mdl_dir=config['SENTIMENT_CLF']['sent_mdl_dir']
sent_onnx_mdl_dir=config['SENTIMENT_CLF']['sent_onnx_mdl_dir']
sent_onnx_mdl_name=config['SENTIMENT_CLF']['sent_onnx_mdl_name']
sent_onnx_quant_mdl_name=config['SENTIMENT_CLF']['sent_onnx_quant_mdl_name']
zs_chkpt=config['ZEROSHOT_CLF']['zs_chkpt']
zs_mdl_dir=config['ZEROSHOT_CLF']['zs_mdl_dir']
zs_onnx_mdl_dir=config['ZEROSHOT_CLF']['zs_onnx_mdl_dir']
zs_onnx_mdl_name=config['ZEROSHOT_CLF']['zs_onnx_mdl_name']
zs_onnx_quant_mdl_name=config['ZEROSHOT_CLF']['zs_onnx_quant_mdl_name']
zs_mlm_chkpt=config['ZEROSHOT_MLM']['zs_mlm_chkpt']
zs_mlm_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_mdl_dir']
zs_mlm_onnx_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_dir']
zs_mlm_onnx_mdl_name=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_name']
st.set_page_config( # Alternate names: setup_page, page, layout
layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed"
page_title='None', # String or None. Strings get appended with "• Streamlit".
)
padding_top = 0
st.markdown(f"""
<style>
.reportview-container .main .block-container{{
padding-top: {padding_top}rem;
}}
</style>""",
unsafe_allow_html=True,
)
def set_page_title(title):
st.sidebar.markdown(unsafe_allow_html=True, body=f"""
<iframe height=0 srcdoc="<script>
const title = window.parent.document.querySelector('title') \
const oldObserver = window.parent.titleObserver
if (oldObserver) {{
oldObserver.disconnect()
}} \
const newObserver = new MutationObserver(function(mutations) {{
const target = mutations[0].target
if (target.text !== '{title}') {{
target.text = '{title}'
}}
}}) \
newObserver.observe(title, {{ childList: true }})
window.parent.titleObserver = newObserver \
title.text = '{title}'
</script>" />
""")
set_page_title('NLP use cases')
# Hide Menu Option
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
#onnx runtime inference threading changes -- session options must be passed in session run
# os.environ["OMP_NUM_THREADS"] = "1" #use this before changing session options of onnx runtime
session_options_ort = ort.SessionOptions()
session_options_ort.intra_op_num_threads=1
session_options_ort.inter_op_num_threads=1
# session_options_ort.execution_mode = session_options_ort.ExecutionMode.ORT_SEQUENTIAL
@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
def create_model_dir(chkpt, model_dir,task_type):
if not os.path.exists(model_dir):
try:
os.mkdir(path=model_dir)
except:
pass
if task_type=='classification':
_model = AutoModelForSequenceClassification.from_pretrained(chkpt)
_tokenizer = AutoTokenizer.from_pretrained(chkpt)
_model.save_pretrained(model_dir)
_tokenizer.save_pretrained(model_dir)
elif task_type=='mlm':
_model=AutoModelForMaskedLM.from_pretrained(chkpt)
_tokenizer=AutoTokenizer.from_pretrained(chkpt)
_model.save_pretrained(model_dir)
_tokenizer.save_pretrained(model_dir)
else:
pass
else:
pass
#title using markdown
st.markdown("<h1 style='text-align: center; color: #3366ff;'>NLP Basic Use Cases</h1>", unsafe_allow_html=True)
st.markdown("---")
with st.sidebar:
# title using markdown
st.markdown("<h1 style='text-align: left; color: ;'>NLP Tasks</h1>", unsafe_allow_html=True)
select_task=st.selectbox(label="Select task from drop down menu",
options=['README',
'Detect Sentiment','Zero Shot Classification'])
############### Pre-Download & instantiate objects for sentiment analysis *********************** START **********************
# #create model/token dir for sentiment classification for faster inference
create_model_dir(chkpt=sent_chkpt, model_dir=sent_mdl_dir,task_type='classification')
@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
def sentiment_task_selected(task,
sent_chkpt=sent_chkpt,
sent_mdl_dir=sent_mdl_dir,
sent_onnx_mdl_dir=sent_onnx_mdl_dir,
sent_onnx_mdl_name=sent_onnx_mdl_name,
sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
##model & tokenizer initialization for normal sentiment classification
# model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_chkpt)
# tokenizer_sentiment=AutoTokenizer.from_pretrained(sent_chkpt)
tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
# # create onnx model for sentiment classification but once created in your local app comment this out
# create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)
#create inference session
sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}",sess_options=session_options_ort)
# sentiment_session_quant = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_quant_mdl_name}")
return tokenizer_sentiment,sentiment_session
############## Pre-Download & instantiate objects for sentiment analysis ********************* END **********************************
############### Pre-Download & instantiate objects for Zero shot clf NLI *********************** START **********************
# create model/token dir for zeroshot clf -- already created so not required
create_model_dir(chkpt=zs_chkpt, model_dir=zs_mdl_dir,task_type='classification')
@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
def zs_nli_task_selected(task,
zs_chkpt ,
zs_mdl_dir,
zs_onnx_mdl_dir,
zs_onnx_mdl_name):
##model & tokenizer initialization for normal ZS classification
# model_zs=AutoModelForSequenceClassification.from_pretrained(zs_chkpt)
# we just need tokenizer for inference and not model since onnx model is already saved
# tokenizer_zs=AutoTokenizer.from_pretrained(zs_chkpt)
tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)
## create onnx model for zeroshot but once created locally comment it out.
#create_onnx_model_zs_nli()
#create inference session from onnx model
zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}",sess_options=session_options_ort)
return tokenizer_zs,zs_session
############## Pre-Download & instantiate objects for Zero shot NLI analysis ********************* END **********************************
############### Pre-Download & instantiate objects for Zero shot clf NLI *********************** START **********************
## create model/token dir for zeroshot clf -- already created so not required
# create_model_dir(chkpt=zs_mlm_chkpt, model_dir=zs_mlm_mdl_dir, task_type='mlm')
@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
def zs_mlm_task_selected(task,
zs_mlm_chkpt=zs_mlm_chkpt,
zs_mlm_mdl_dir=zs_mlm_mdl_dir,
zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir,
zs_mlm_onnx_mdl_name=zs_mlm_onnx_mdl_name):
##model & tokenizer initialization for normal ZS classification
model_zs_mlm=AutoModelForMaskedLM.from_pretrained(zs_mlm_mdl_dir)
##we just need tokenizer for inference and not model since onnx model is already saved
# tokenizer_zs_mlm=AutoTokenizer.from_pretrained(zs_mlm_chkpt)
tokenizer_zs_mlm = AutoTokenizer.from_pretrained(zs_mlm_mdl_dir)
# create onnx model for zeroshot but once created locally comment it out.
create_onnx_model_zs_mlm(_model=model_zs_mlm,
_tokenizer=tokenizer_zs_mlm,
zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir)
# create inference session from onnx model
zs_session_mlm = ort.InferenceSession(f"{zs_mlm_onnx_mdl_dir}/{zs_mlm_onnx_mdl_name}", sess_options=session_options_ort)
return tokenizer_zs_mlm, zs_session_mlm
############## Pre-Download & instantiate objects for Zero shot MLM analysis ********************* END **********************************
if select_task=='README':
st.header("NLP Summary")
st.write(f"The current App deployed in Huggingface Space has {total_threads} CPU cores but only 1 is available per user so "
f"inference time will be on the higher side.")
if select_task == 'Detect Sentiment':
t1=time.time()
tokenizer_sentiment,sentiment_session = sentiment_task_selected(task=select_task)
##below 2 steps are slower as caching is not enabled
# tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
# sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}")
t2 = time.time()
st.write(f"Total time to load Model is {(t2-t1)*1000:.1f} ms")
st.subheader("You are now performing Sentiment Analysis")
input_texts = st.text_input(label="Input texts separated by comma")
c1,c2,_,_=st.columns(4)
with c1:
response1=st.button("Compute (ONNX runtime)")
if response1:
start = time.time()
sentiments=classify_sentiment_onnx(input_texts,
_session=sentiment_session,
_tokenizer=tokenizer_sentiment)
end = time.time()
st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
for i,t in enumerate(input_texts.split(',')):
if sentiments[i]=='Positive':
response=st_text_rater(t + f"--> This statement is {sentiments[i]}",
color_background='rgb(154,205,50)',key=t)
else:
response = st_text_rater(t + f"--> This statement is {sentiments[i]}",
color_background='rgb(233, 116, 81)',key=t)
else:
pass
if select_task=='Zero Shot Classification':
t1=time.time()
tokenizer_zs,session_zs = zs_nli_task_selected(task=select_task ,
zs_chkpt=zs_chkpt,
zs_mdl_dir=zs_mdl_dir,
zs_onnx_mdl_dir=zs_onnx_mdl_dir,
zs_onnx_mdl_name=zs_onnx_mdl_name)
t2 = time.time()
st.write(f"Total time to load NLI Model is {(t2-t1)*1000:.1f} ms")
t1=time.time()
tokenizer_zs_mlm,session_zs_mlm = zs_mlm_task_selected(task=select_task,
zs_mlm_chkpt=zs_mlm_chkpt,
zs_mlm_mdl_dir=zs_mlm_mdl_dir,
zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir,
zs_mlm_onnx_mdl_name=zs_mlm_onnx_mdl_name)
t2 = time.time()
st.write(f"Total time to load MLM Model is {(t2-t1)*1000:.1f} ms")
st.subheader("Zero Shot Classification using NLI")
input_texts = st.text_input(label="Input text to classify into topics")
input_lables = st.text_input(label="Enter labels separated by commas")
input_hypothesis = st.text_input(label="Enter your hypothesis",value="This is an example of")
c1,c2,_,=st.columns(3)
with c1:
response1=st.button("Compute using NLI approach (ONNX runtime)")
with c2:
response2=st.button("Compute using Fill-Mask approach(ONNX runtime)")
if response1:
start = time.time()
df_output = zero_shot_classification_nli_onnx(premise=input_texts,
labels=input_lables,
hypothesis=input_hypothesis,
_session=session_zs,
_tokenizer=tokenizer_zs,
)
end = time.time()
st.write(f"Time taken for computation {(end-start)*1000:.1f} ms")
fig = px.bar(x='Probability',
y='labels',
text='Probability',
data_frame=df_output,
title='Zero Shot NLI Normalized Probabilities')
st.plotly_chart(fig, config=_plotly_config)
elif response2:
start=time.time()
df_output=zero_shot_classification_fillmask_onnx(premise=input_texts,
labels=input_lables,
hypothesis=input_hypothesis,
_session=session_zs_mlm,
_tokenizer=tokenizer_zs_mlm,
)
end=time.time()
st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
fig = px.bar(x='Probability',
y='Labels',
text='Probability',
data_frame=df_output,
title='Zero Shot MLM Normalized Probabilities')
st.plotly_chart(fig, config=_plotly_config)
else:
pass
|