Spaces:

Alshargi
/

sam

Sleeping

App Files Files Community

Alshargi commited on Apr 26, 2024

Commit

59515e7

verified ·

1 Parent(s): bfb8606

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -66

app.py CHANGED Viewed

@@ -1,105 +1,67 @@
 import streamlit as st
 import joblib
-from nltk import word_tokenize
 from transformers import pipeline
-#import string, re
 def features(sentence, index):
     return {
         'word': sentence[index],
         'is_first': index == 0,
         'is_last': index == len(sentence) - 1,
         'lword': len(sentence[index]),
         'prefix-1': sentence[index][:1],
         'prefix-2': sentence[index][:2],
         'prefix-3': sentence[index][:3],
         'prefix-4': sentence[index][:4],
         'prefix-5': sentence[index][:5],
         'suffix-1': sentence[index][-1],
         'suffix-2': sentence[index][-2:],
         'suffix-3': sentence[index][-3:],
         'suffix-4': sentence[index][-4:],
         'suffix-5': sentence[index][-5:],
         'prev_word_4': prvwords_4(sentence, index),
         'prev_word_3': prvwords_3(sentence, index),
         'prev_word_2': prvwords_2(sentence, index),
         'prev_word_1': prvwords_1(sentence, index),
         'next_word_1':  nextwords_1(sentence, index),
         'next_word_2':  nextwords_2(sentence, index),
         'next_word_3':  nextwords_3(sentence, index),
         'next_word_4':  nextwords_4(sentence, index),
         'is_numeric': sentence[index].isdigit(),
-            }
-def rebuildxx(ww, xres):
-    numprfx = xres.count('p')
-    numsufx = xres.count('f')
-    resfinal = ''
-    if numprfx != 0 and  numsufx != 0 :
-        resfinal = "{}+{}+{}".format(ww[:numprfx] , ww[numprfx:-numsufx] ,  ww[-numsufx:] )
-    if numprfx == 0 and  numsufx == 0 :
-        #resfinal = "{}+{}+{}".format("", ww , ""  )
-        resfinal = "{}".format(ww )
-    if numprfx == 0 and  numsufx != 0 :
-        #resfinal = "{}+{}+{}".format("" , ww[:-numsufx], ww[-numsufx:] )
-        resfinal = "{}+{}".format(ww[:-numsufx], ww[-numsufx:] )
-    if numprfx != 0 and  numsufx == 0 :
-        #resfinal = "{}+{}+{}".format(ww[:numprfx] ,  ww[numprfx:],  "")
-        resfinal = "{}+{}".format(ww[:numprfx] ,  ww[numprfx:])
-    return resfinal
-import re
 def prepare_text(text):
-    # Define regular expression pattern to match symbols and punctuation from any language
-    symbol_pattern = r'([^\w\s\d])'  # Capture non-word, non-space, non-digit characters
     prepared_text = re.sub(symbol_pattern, r' \1 ', text)
     prepared_text = re.sub(r'\s+', ' ', prepared_text)
-    return prepared_text.strip()  # Remove leading and trailing spaces
-import streamlit as st
-from transformers import pipeline
-# Load the model using the Hugging Face model hub
-model = pipeline("text-classification", model="Alshargi/arabic-msa-dialects-segmentation")
-# Slider to select a value
-x = st.text_input('Enter a text')
-# Check if text input is not empty
-if x:
-    # Make prediction using the loaded model
-    result = model(x)
     # Display the prediction
-    st.write("Prediction:", result)
 else:
     st.write("Please enter some text.")

 import streamlit as st
 import joblib
+import re
 from transformers import pipeline
+# Load the scikit-learn model
+sklearn_model = joblib.load("sklearn_model.pkl")
+# Wrap the scikit-learn model inside a Hugging Face pipeline
+pipeline_model = pipeline(task="feature-extraction", model=sklearn_model)
+# Define feature functions
 def features(sentence, index):
     return {
         'word': sentence[index],
         'is_first': index == 0,
         'is_last': index == len(sentence) - 1,
         'lword': len(sentence[index]),
         'prefix-1': sentence[index][:1],
         'prefix-2': sentence[index][:2],
         'prefix-3': sentence[index][:3],
         'prefix-4': sentence[index][:4],
         'prefix-5': sentence[index][:5],
         'suffix-1': sentence[index][-1],
         'suffix-2': sentence[index][-2:],
         'suffix-3': sentence[index][-3:],
         'suffix-4': sentence[index][-4:],
         'suffix-5': sentence[index][-5:],
         'prev_word_4': prvwords_4(sentence, index),
         'prev_word_3': prvwords_3(sentence, index),
         'prev_word_2': prvwords_2(sentence, index),
         'prev_word_1': prvwords_1(sentence, index),
         'next_word_1':  nextwords_1(sentence, index),
         'next_word_2':  nextwords_2(sentence, index),
         'next_word_3':  nextwords_3(sentence, index),
         'next_word_4':  nextwords_4(sentence, index),
         'is_numeric': sentence[index].isdigit(),
+    }
+# Function to prepare text
 def prepare_text(text):
+    symbol_pattern = r'([^\w\s\d])'
     prepared_text = re.sub(symbol_pattern, r' \1 ', text)
     prepared_text = re.sub(r'\s+', ' ', prepared_text)
+    return prepared_text.strip()
+# Text input field for user input
+text_input = st.text_input("Enter some text:")
+# Check if the user input is not empty
+if text_input:
+    # Prepare text
+    prepared_text = prepare_text(text_input)
+    # Tokenize text
+    tokenized_text = word_tokenize(prepared_text)
+    # Extract features
+    features_list = [features(tokenized_text, i) for i in range(len(tokenized_text))]
+    # Use the Hugging Face pipeline to make predictions
+    prediction = pipeline_model(features_list)
     # Display the prediction
+    st.write("Prediction:", prediction)
 else:
     st.write("Please enter some text.")