sam / app.py
Alshargi's picture
Update app.py
27b7dcb verified
raw
history blame
2.22 kB
import streamlit as st
import skops.hub_utils as hub_utils
import pandas as pd
import re
from nltk.tokenize import word_tokenize
# Define feature functions
def features(sentence, index):
return {
'word': sentence[index],
'is_first': index == 0,
'is_last': index == len(sentence) - 1,
'lword': len(sentence[index]),
'prefix-1': sentence[index][:1],
'prefix-2': sentence[index][:2],
'prefix-3': sentence[index][:3],
'prefix-4': sentence[index][:4],
'prefix-5': sentence[index][:5],
'suffix-1': sentence[index][-1],
'suffix-2': sentence[index][-2:],
'suffix-3': sentence[index][-3:],
'suffix-4': sentence[index][-4:],
'suffix-5': sentence[index][-5:],
'prev_word_4': prvwords_4(sentence, index),
'prev_word_3': prvwords_3(sentence, index),
'prev_word_2': prvwords_2(sentence, index),
'prev_word_1': prvwords_1(sentence, index),
'next_word_1': nextwords_1(sentence, index),
'next_word_2': nextwords_2(sentence, index),
'next_word_3': nextwords_3(sentence, index),
'next_word_4': nextwords_4(sentence, index),
'is_numeric': sentence[index].isdigit(),
}
# Function to prepare text
def prepare_text(text):
symbol_pattern = r'([^\w\s\d])'
prepared_text = re.sub(symbol_pattern, r' \1 ', text)
prepared_text = re.sub(r'\s+', ' ', prepared_text)
return prepared_text.strip()
# Text input field for user input
text_input = st.text_input("Enter some text:")
# Check if the user input is not empty
if text_input:
# Prepare text
prepared_text = prepare_text(text_input)
# Tokenize text
tokenized_text = word_tokenize(prepared_text)
# Extract features
features_list = [features(tokenized_text, i) for i in range(len(tokenized_text))]
# Create a DataFrame with the features
data = pd.DataFrame(features_list)
# Load the model from the Hub
model_id = "Alshargi/arabic-msa-dialects-segmentation"
res = hub_utils.get_model_output(model_id, data)
# Display the model output
st.write("Model Output:", res)
else:
st.write("Please enter some text.")