Alshargi commited on
Commit
59515e7
·
verified ·
1 Parent(s): bfb8606

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -66
app.py CHANGED
@@ -1,105 +1,67 @@
1
  import streamlit as st
2
-
3
  import joblib
4
- from nltk import word_tokenize
5
  from transformers import pipeline
6
 
 
 
 
 
 
7
 
8
-
9
- #import string, re
10
  def features(sentence, index):
11
  return {
12
  'word': sentence[index],
13
  'is_first': index == 0,
14
  'is_last': index == len(sentence) - 1,
15
  'lword': len(sentence[index]),
16
-
17
  'prefix-1': sentence[index][:1],
18
  'prefix-2': sentence[index][:2],
19
  'prefix-3': sentence[index][:3],
20
  'prefix-4': sentence[index][:4],
21
  'prefix-5': sentence[index][:5],
22
-
23
  'suffix-1': sentence[index][-1],
24
  'suffix-2': sentence[index][-2:],
25
  'suffix-3': sentence[index][-3:],
26
  'suffix-4': sentence[index][-4:],
27
  'suffix-5': sentence[index][-5:],
28
-
29
  'prev_word_4': prvwords_4(sentence, index),
30
  'prev_word_3': prvwords_3(sentence, index),
31
  'prev_word_2': prvwords_2(sentence, index),
32
  'prev_word_1': prvwords_1(sentence, index),
33
-
34
-
35
  'next_word_1': nextwords_1(sentence, index),
36
  'next_word_2': nextwords_2(sentence, index),
37
  'next_word_3': nextwords_3(sentence, index),
38
  'next_word_4': nextwords_4(sentence, index),
39
-
40
  'is_numeric': sentence[index].isdigit(),
41
- }
42
-
43
-
44
-
45
-
46
- def rebuildxx(ww, xres):
47
- numprfx = xres.count('p')
48
- numsufx = xres.count('f')
49
- resfinal = ''
50
- if numprfx != 0 and numsufx != 0 :
51
- resfinal = "{}+{}+{}".format(ww[:numprfx] , ww[numprfx:-numsufx] , ww[-numsufx:] )
52
- if numprfx == 0 and numsufx == 0 :
53
- #resfinal = "{}+{}+{}".format("", ww , "" )
54
- resfinal = "{}".format(ww )
55
-
56
- if numprfx == 0 and numsufx != 0 :
57
- #resfinal = "{}+{}+{}".format("" , ww[:-numsufx], ww[-numsufx:] )
58
- resfinal = "{}+{}".format(ww[:-numsufx], ww[-numsufx:] )
59
-
60
- if numprfx != 0 and numsufx == 0 :
61
- #resfinal = "{}+{}+{}".format(ww[:numprfx] , ww[numprfx:], "")
62
- resfinal = "{}+{}".format(ww[:numprfx] , ww[numprfx:])
63
-
64
- return resfinal
65
-
66
-
67
-
68
- import re
69
 
 
70
  def prepare_text(text):
71
- # Define regular expression pattern to match symbols and punctuation from any language
72
- symbol_pattern = r'([^\w\s\d])' # Capture non-word, non-space, non-digit characters
73
  prepared_text = re.sub(symbol_pattern, r' \1 ', text)
74
  prepared_text = re.sub(r'\s+', ' ', prepared_text)
 
75
 
76
- return prepared_text.strip() # Remove leading and trailing spaces
77
-
78
-
79
-
80
-
81
-
82
- import streamlit as st
83
- from transformers import pipeline
84
-
85
- # Load the model using the Hugging Face model hub
86
- model = pipeline("text-classification", model="Alshargi/arabic-msa-dialects-segmentation")
87
-
88
- # Slider to select a value
89
- x = st.text_input('Enter a text')
90
-
91
- # Check if text input is not empty
92
- if x:
93
- # Make prediction using the loaded model
94
- result = model(x)
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # Display the prediction
97
- st.write("Prediction:", result)
98
  else:
99
  st.write("Please enter some text.")
100
-
101
-
102
-
103
-
104
-
105
-
 
1
  import streamlit as st
 
2
  import joblib
3
+ import re
4
  from transformers import pipeline
5
 
6
+ # Load the scikit-learn model
7
+ sklearn_model = joblib.load("sklearn_model.pkl")
8
+
9
+ # Wrap the scikit-learn model inside a Hugging Face pipeline
10
+ pipeline_model = pipeline(task="feature-extraction", model=sklearn_model)
11
 
12
+ # Define feature functions
 
13
  def features(sentence, index):
14
  return {
15
  'word': sentence[index],
16
  'is_first': index == 0,
17
  'is_last': index == len(sentence) - 1,
18
  'lword': len(sentence[index]),
 
19
  'prefix-1': sentence[index][:1],
20
  'prefix-2': sentence[index][:2],
21
  'prefix-3': sentence[index][:3],
22
  'prefix-4': sentence[index][:4],
23
  'prefix-5': sentence[index][:5],
 
24
  'suffix-1': sentence[index][-1],
25
  'suffix-2': sentence[index][-2:],
26
  'suffix-3': sentence[index][-3:],
27
  'suffix-4': sentence[index][-4:],
28
  'suffix-5': sentence[index][-5:],
 
29
  'prev_word_4': prvwords_4(sentence, index),
30
  'prev_word_3': prvwords_3(sentence, index),
31
  'prev_word_2': prvwords_2(sentence, index),
32
  'prev_word_1': prvwords_1(sentence, index),
 
 
33
  'next_word_1': nextwords_1(sentence, index),
34
  'next_word_2': nextwords_2(sentence, index),
35
  'next_word_3': nextwords_3(sentence, index),
36
  'next_word_4': nextwords_4(sentence, index),
 
37
  'is_numeric': sentence[index].isdigit(),
38
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Function to prepare text
41
  def prepare_text(text):
42
+ symbol_pattern = r'([^\w\s\d])'
 
43
  prepared_text = re.sub(symbol_pattern, r' \1 ', text)
44
  prepared_text = re.sub(r'\s+', ' ', prepared_text)
45
+ return prepared_text.strip()
46
 
47
+ # Text input field for user input
48
+ text_input = st.text_input("Enter some text:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ # Check if the user input is not empty
51
+ if text_input:
52
+ # Prepare text
53
+ prepared_text = prepare_text(text_input)
54
+
55
+ # Tokenize text
56
+ tokenized_text = word_tokenize(prepared_text)
57
+
58
+ # Extract features
59
+ features_list = [features(tokenized_text, i) for i in range(len(tokenized_text))]
60
+
61
+ # Use the Hugging Face pipeline to make predictions
62
+ prediction = pipeline_model(features_list)
63
+
64
  # Display the prediction
65
+ st.write("Prediction:", prediction)
66
  else:
67
  st.write("Please enter some text.")