File size: 1,841 Bytes
28422fb 6596636 6dbe383 ba093b8 8fe4b6c bcc7480 c447bbf a38d800 6dbe383 a38d800 6dbe383 89cd9d2 28f0c2b ba093b8 50a2b09 6dbe383 1886f29 bafdc64 200336b 5062d6a ba7e177 50979de 5062d6a 50979de 5062d6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from transformers import pipeline
import streamlit as st
@st.cache_resource
def context_text(text): return f"### Context\n{text}\n\n### Answer"
@st.cache_resource
def load_pipe():
model_name = "MSey/pbt_CaBERT_7_c10731"
return pipeline("token-classification", model=model_name), AutoTokenizer.from_pretrained(model_name)
pipe, tokenizer = load_pipe()
st.header("Test Environment for pbt_CaBERT_7_c10731")
user_input = st.text_input("Enter your Prompt here:", "")
contexted_ipnut = context_text(user_input)
context_len = len(contexted_ipnut)
if user_input:
with st.spinner('Generating response...'):
response = pipe(contexted_ipnut)
st.write("Response:")
st.markdown(response)
# Tokenize input to get token-level alignment
tokens = tokenizer(user_input, return_offsets_mapping=True, return_tensors='pt')
offset_mapping = tokens['offset_mapping'][0].numpy().tolist()
# Initialize the highlighted text
highlighted_text = ""
last_position = 0
# Process each entity and highlight the labeled words
for entity in response:
start, end = offset_mapping[entity['index']][0], offset_mapping[entity['index']][1]
label = entity['entity']
# Add text before the entity
highlighted_text += user_input[last_position:start]
# Add the highlighted entity
highlighted_text += f'<mark style="background-color: #FFFF00;">{user_input[start:end]}</mark><sup>{label}</sup>'
# Update the last position
last_position = end
# Add remaining text after the last entity
highlighted_text += user_input[last_position:]
# Display the highlighted text using st.markdown
st.markdown(highlighted_text, unsafe_allow_html=True) |