DHRUV SHEKHAWAT commited on
Commit
0b15668
·
1 Parent(s): 11deb71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -26
app.py CHANGED
@@ -39,7 +39,29 @@ class TransformerChatbot(Model):
39
  def create_padding_mask(self, seq):
40
  mask = tf.cast(tf.math.equal(seq, 0), tf.float32)
41
  return mask[:, tf.newaxis, tf.newaxis, :]
42
- def textcompletion_model(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate,weights, datafile , dict, len , text2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  with open(datafile,"r") as f:
44
  text = f.read()
45
  text = text.lower()
@@ -88,30 +110,6 @@ def textcompletion_model(vocab_size, max_len, d_model, n_head, ff_dim, dropout_r
88
  input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
89
 
90
  out2 = output_sentence
91
- return out2
92
- st.title("UniGLM TEXT completion Model")
93
- st.subheader("Next Word Prediction AI Model by Webraft-AI")
94
- #Picking what NLP task you want to do
95
- option = st.selectbox('Model',('13M','26M')) #option is stored in this variable
96
- #Textbox for text user is entering
97
- st.subheader("Enter a word from which a sentence / word would be predicted")
98
- len2 = st.text_input('Enter sequence length: ')
99
- text2 = st.text_input('Enter word: ') #text is stored in this variable
100
-
101
-
102
- if option == '13M':
103
- vocab_size = 100000
104
- max_len = 1
105
- d_model = 64 # 64 , 1024
106
- n_head = 4 # 8 , 16
107
- ff_dim = 256 # 256 , 2048
108
- dropout_rate = 0.1 # 0.5 , 0.2
109
- weights = "predict3"
110
- datafile = "data2.txt"
111
- dict = "dict_predict3.bin.npz"
112
- len = len2
113
- text2 = text2
114
- out2 = textcompletion_model(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate,weights, datafile , dict, len , text2)
115
 
116
 
117
  elif option=="26M":
@@ -126,7 +124,55 @@ elif option=="26M":
126
  dict = "dict_predict3.bin.npz"
127
  len = len2
128
  text2 = text2
129
- out2 = textcompletion_model(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate,weights, datafile , dict, len , text2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  st.write("Predicted Text: ")
132
  st.write(out2)
 
39
  def create_padding_mask(self, seq):
40
  mask = tf.cast(tf.math.equal(seq, 0), tf.float32)
41
  return mask[:, tf.newaxis, tf.newaxis, :]
42
+
43
+ st.title("UniGLM TEXT completion Model")
44
+ st.subheader("Next Word Prediction AI Model by Webraft-AI")
45
+ #Picking what NLP task you want to do
46
+ option = st.selectbox('Model',('13M','26M')) #option is stored in this variable
47
+ #Textbox for text user is entering
48
+ st.subheader("Enter a word from which a sentence / word would be predicted")
49
+ len2 = st.text_input('Enter sequence length: ')
50
+ text2 = st.text_input('Enter word: ') #text is stored in this variable
51
+
52
+
53
+ if option == '13M':
54
+ vocab_size = 100000
55
+ max_len = 1
56
+ d_model = 64 # 64 , 1024
57
+ n_head = 4 # 8 , 16
58
+ ff_dim = 256 # 256 , 2048
59
+ dropout_rate = 0.1 # 0.5 , 0.2
60
+ weights = "predict3"
61
+ datafile = "data2.txt"
62
+ dict = "dict_predict3.bin.npz"
63
+ len = len2
64
+ text2 = text2
65
  with open(datafile,"r") as f:
66
  text = f.read()
67
  text = text.lower()
 
110
  input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
111
 
112
  out2 = output_sentence
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  elif option=="26M":
 
124
  dict = "dict_predict3.bin.npz"
125
  len = len2
126
  text2 = text2
127
+ with open(datafile,"r") as f:
128
+ text = f.read()
129
+ text = text.lower()
130
+ words = text.split()
131
+ loaded_dict = np.load(dict, allow_pickle=True)
132
+ word_to_num = loaded_dict["word_to_num"].item()
133
+ num_to_word = loaded_dict["num_to_word"].item()
134
+ X = []
135
+ Y = []
136
+ for i in range(len(words)-1):
137
+ word = words[i]
138
+ next_word = words[i+1]
139
+ X.append(word_to_num[word])
140
+ Y.append(word_to_num[next_word])
141
+ Y.append(0)
142
+
143
+ X.append(word_to_num[words[-1]])
144
+ X_train = pad_sequences([X])
145
+ y_train = pad_sequences([Y])
146
 
147
+
148
+
149
+ chatbot = TransformerChatbot(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate)
150
+ chatbot.load_weights(weights)
151
+ chatbot.build(input_shape=(None, max_len)) # Build the model
152
+ chatbot.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
153
+
154
+ for i in range(1):
155
+ other_text1 = text2
156
+ other_text1 = other_text1.lower()
157
+ other_words1 = other_text1.split()
158
+ other_num1 = [word_to_num[word] for word in other_words1]
159
+ given_X1 = other_num1
160
+ input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
161
+ output_sentence = ""
162
+ for _ in range(len):
163
+ predicted_token = np.argmax(chatbot.predict(input_sequence1), axis=-1)
164
+ predicted_token = predicted_token.item()
165
+ out = num_to_word[predicted_token]
166
+
167
+
168
+ output_sentence = out
169
+
170
+ given_X1 = given_X1[1:]
171
+ given_X1.append(predicted_token)
172
+ input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
173
+
174
+ out2 = output_sentence
175
+ else:
176
+ out2 = "Error: Wrong Model Selected"
177
  st.write("Predicted Text: ")
178
  st.write(out2)