Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,20 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import skops.hub_utils as hub_utils
|
3 |
-
import pandas as pd
|
4 |
import re
|
5 |
from nltk.tokenize import word_tokenize
|
6 |
import nltk
|
7 |
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
nltk.download('punkt')
|
10 |
|
11 |
|
@@ -114,18 +123,17 @@ def features(sentence, index):
|
|
114 |
}
|
115 |
|
116 |
|
117 |
-
import gradio as gr
|
118 |
|
119 |
# Define the function for processing user input
|
120 |
def process_text(text_input):
|
121 |
if text_input:
|
122 |
-
# Prepare text
|
123 |
prepared_text = prepare_text(text_input)
|
124 |
|
125 |
# Tokenize text
|
126 |
tokenized_text = word_tokenize(prepared_text)
|
127 |
|
128 |
-
# Extract features
|
129 |
features_list = [features(tokenized_text, i) for i in range(len(tokenized_text))]
|
130 |
|
131 |
# Create a DataFrame with the features
|
@@ -133,7 +141,10 @@ def process_text(text_input):
|
|
133 |
|
134 |
# Load the model from the Hub
|
135 |
model_id = "Alshargi/arabic-msa-dialects-segmentation"
|
136 |
-
|
|
|
|
|
|
|
137 |
|
138 |
# Return the model output
|
139 |
return res
|
@@ -145,3 +156,4 @@ iface = gr.Interface(fn=process_text, inputs="text", outputs="text", title="Arab
|
|
145 |
|
146 |
# Launch the Gradio interface
|
147 |
iface.launch(share=True)
|
|
|
|
1 |
+
#import streamlit as st
|
2 |
+
#import skops.hub_utils as hub_utils
|
3 |
+
#import pandas as pd
|
4 |
import re
|
5 |
from nltk.tokenize import word_tokenize
|
6 |
import nltk
|
7 |
|
8 |
|
9 |
+
|
10 |
+
|
11 |
+
import gradio as gr
|
12 |
+
import pandas as pd
|
13 |
+
from nltk.tokenize import word_tokenize
|
14 |
+
from transformers import AutoModelForSequenceClassification
|
15 |
+
import hub_utils # Assuming you have a custom module for interacting with the Hugging Face model hub
|
16 |
+
|
17 |
+
|
18 |
nltk.download('punkt')
|
19 |
|
20 |
|
|
|
123 |
}
|
124 |
|
125 |
|
|
|
126 |
|
127 |
# Define the function for processing user input
|
128 |
def process_text(text_input):
|
129 |
if text_input:
|
130 |
+
# Prepare text (define this function)
|
131 |
prepared_text = prepare_text(text_input)
|
132 |
|
133 |
# Tokenize text
|
134 |
tokenized_text = word_tokenize(prepared_text)
|
135 |
|
136 |
+
# Extract features (define this function)
|
137 |
features_list = [features(tokenized_text, i) for i in range(len(tokenized_text))]
|
138 |
|
139 |
# Create a DataFrame with the features
|
|
|
141 |
|
142 |
# Load the model from the Hub
|
143 |
model_id = "Alshargi/arabic-msa-dialects-segmentation"
|
144 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
145 |
+
|
146 |
+
# Get model output (define or import the get_model_output function)
|
147 |
+
res = hub_utils.get_model_output(model, data)
|
148 |
|
149 |
# Return the model output
|
150 |
return res
|
|
|
156 |
|
157 |
# Launch the Gradio interface
|
158 |
iface.launch(share=True)
|
159 |
+
|