Update app.py
Browse files
app.py
CHANGED
@@ -1,27 +1,17 @@
|
|
1 |
-
#import streamlit as st
|
2 |
-
#import skops.hub_utils as hub_utils
|
3 |
-
#import pandas as pd
|
4 |
-
import re
|
5 |
-
from nltk.tokenize import word_tokenize
|
6 |
-
import nltk
|
7 |
-
|
8 |
-
|
9 |
|
10 |
-
|
11 |
-
import
|
12 |
import pandas as pd
|
13 |
-
from nltk.tokenize import word_tokenize
|
14 |
from transformers import AutoModelForSequenceClassification
|
15 |
-
import
|
|
|
|
|
16 |
|
17 |
|
18 |
nltk.download('punkt')
|
19 |
|
20 |
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
def nextwords_1(ww, inx):
|
26 |
try:
|
27 |
return '' if inx == len(ww) - 1 else ww[inx + 1]
|
@@ -123,18 +113,17 @@ def features(sentence, index):
|
|
123 |
}
|
124 |
|
125 |
|
126 |
-
|
127 |
# Define the function for processing user input
|
128 |
def process_text(text_input):
|
129 |
if text_input:
|
130 |
# Prepare text (define this function)
|
131 |
-
prepared_text = prepare_text(text_input)
|
132 |
|
133 |
# Tokenize text
|
134 |
-
tokenized_text = word_tokenize(prepared_text)
|
135 |
|
136 |
# Extract features (define this function)
|
137 |
-
features_list = [features(tokenized_text, i) for i in range(len(tokenized_text))]
|
138 |
|
139 |
# Create a DataFrame with the features
|
140 |
data = pd.DataFrame(features_list)
|
@@ -143,7 +132,7 @@ def process_text(text_input):
|
|
143 |
model_id = "Alshargi/arabic-msa-dialects-segmentation"
|
144 |
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
145 |
|
146 |
-
# Get model output
|
147 |
res = hub_utils.get_model_output(model, data)
|
148 |
|
149 |
# Return the model output
|
@@ -151,9 +140,17 @@ def process_text(text_input):
|
|
151 |
else:
|
152 |
return "Please enter some text."
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
#
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
+
import streamlit as st
|
3 |
+
import skops.hub_utils as hub_utils
|
4 |
import pandas as pd
|
|
|
5 |
from transformers import AutoModelForSequenceClassification
|
6 |
+
import re
|
7 |
+
from nltk.tokenize import word_tokenize
|
8 |
+
import nltk
|
9 |
|
10 |
|
11 |
nltk.download('punkt')
|
12 |
|
13 |
|
14 |
|
|
|
|
|
|
|
15 |
def nextwords_1(ww, inx):
|
16 |
try:
|
17 |
return '' if inx == len(ww) - 1 else ww[inx + 1]
|
|
|
113 |
}
|
114 |
|
115 |
|
|
|
116 |
# Define the function for processing user input
|
117 |
def process_text(text_input):
|
118 |
if text_input:
|
119 |
# Prepare text (define this function)
|
120 |
+
prepared_text = prepare_text(text_input) # Assuming prepare_text function is defined elsewhere
|
121 |
|
122 |
# Tokenize text
|
123 |
+
tokenized_text = word_tokenize(prepared_text) # Assuming word_tokenize function is imported
|
124 |
|
125 |
# Extract features (define this function)
|
126 |
+
features_list = [features(tokenized_text, i) for i in range(len(tokenized_text))] # Assuming features function is defined elsewhere
|
127 |
|
128 |
# Create a DataFrame with the features
|
129 |
data = pd.DataFrame(features_list)
|
|
|
132 |
model_id = "Alshargi/arabic-msa-dialects-segmentation"
|
133 |
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
134 |
|
135 |
+
# Get model output using hub_utils
|
136 |
res = hub_utils.get_model_output(model, data)
|
137 |
|
138 |
# Return the model output
|
|
|
140 |
else:
|
141 |
return "Please enter some text."
|
142 |
|
143 |
+
def main():
|
144 |
+
st.title("Model Output with Streamlit")
|
145 |
+
|
146 |
+
# Text input
|
147 |
+
input_text = st.text_input("Enter your text:")
|
148 |
+
|
149 |
+
# Process the text when a button is clicked
|
150 |
+
if st.button("Process"):
|
151 |
+
output = process_text(input_text)
|
152 |
+
st.write("Model Output:")
|
153 |
+
st.write(output)
|
154 |
+
|
155 |
+
if __name__ == "__main__":
|
156 |
+
main()
|