DHRUV SHEKHAWAT commited on
Commit
11deb71
·
1 Parent(s): 5c677e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -68
app.py CHANGED
@@ -39,20 +39,12 @@ class TransformerChatbot(Model):
39
  def create_padding_mask(self, seq):
40
  mask = tf.cast(tf.math.equal(seq, 0), tf.float32)
41
  return mask[:, tf.newaxis, tf.newaxis, :]
42
- st.title("UniGLM TEXT completion Model")
43
- st.subheader("Next Word Prediction AI Model by Webraft-AI")
44
- #Picking what NLP task you want to do
45
- option = st.selectbox('Model',('1','2')) #option is stored in this variable
46
- #Textbox for text user is entering
47
- st.subheader("Enter a word from which a sentence / word would be predicted")
48
- text2 = st.text_input('Enter word: ') #text is stored in this variable
49
-
50
- if option == '1':
51
- with open("data2.txt","r") as f:
52
  text = f.read()
53
  text = text.lower()
54
  words = text.split()
55
- loaded_dict = np.load("dict_predict3.bin.npz", allow_pickle=True)
56
  word_to_num = loaded_dict["word_to_num"].item()
57
  num_to_word = loaded_dict["num_to_word"].item()
58
  X = []
@@ -67,16 +59,11 @@ if option == '1':
67
  X.append(word_to_num[words[-1]])
68
  X_train = pad_sequences([X])
69
  y_train = pad_sequences([Y])
70
- vocab_size = 100000
71
- max_len = 1
72
- d_model = 64 # 64 , 1024
73
- n_head = 4 # 8 , 16
74
- ff_dim = 256 # 256 , 2048
75
- dropout_rate = 0.1 # 0.5 , 0.2
76
 
77
 
78
  chatbot = TransformerChatbot(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate)
79
- chatbot.load_weights("predict3")
80
  chatbot.build(input_shape=(None, max_len)) # Build the model
81
  chatbot.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
82
 
@@ -88,7 +75,7 @@ if option == '1':
88
  given_X1 = other_num1
89
  input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
90
  output_sentence = ""
91
- for _ in range(1):
92
  predicted_token = np.argmax(chatbot.predict(input_sequence1), axis=-1)
93
  predicted_token = predicted_token.item()
94
  out = num_to_word[predicted_token]
@@ -101,63 +88,45 @@ if option == '1':
101
  input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
102
 
103
  out2 = output_sentence
104
-
105
-
106
- else:
107
- with open("data2.txt","r") as f:
108
- text = f.read()
109
- text = text.lower()
110
- words = text.split()
111
- loaded_dict = np.load("dict_predict3.bin.npz", allow_pickle=True)
112
- word_to_num = loaded_dict["word_to_num"].item()
113
- num_to_word = loaded_dict["num_to_word"].item()
114
- X = []
115
- Y = []
116
- for i in range(len(words)-1):
117
- word = words[i]
118
- next_word = words[i+1]
119
- X.append(word_to_num[word])
120
- Y.append(word_to_num[next_word])
121
- Y.append(0)
122
 
123
- X.append(word_to_num[words[-1]])
124
- X_train = pad_sequences([X])
125
- y_train = pad_sequences([Y])
126
  vocab_size = 100000
127
  max_len = 1
128
  d_model = 64 # 64 , 1024
129
  n_head = 4 # 8 , 16
130
  ff_dim = 256 # 256 , 2048
131
  dropout_rate = 0.1 # 0.5 , 0.2
132
-
133
-
134
- chatbot = TransformerChatbot(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate)
135
- chatbot.load_weights("predict3")
136
- chatbot.build(input_shape=(None, max_len)) # Build the model
137
- chatbot.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
138
-
139
- for i in range(1):
140
- other_text1 = text2
141
- other_text1 = other_text1.lower()
142
- other_words1 = other_text1.split()
143
- other_num1 = [word_to_num[word] for word in other_words1]
144
- given_X1 = other_num1
145
- input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
146
- output_sentence = other_text1+""
147
- for _ in range(10):
148
- predicted_token = np.argmax(chatbot.predict(input_sequence1), axis=-1)
149
- predicted_token = predicted_token.item()
150
- out = num_to_word[predicted_token]
151
-
152
 
153
- output_sentence += " " + out
154
- if out == ".":
155
- break
156
- given_X1 = given_X1[1:]
157
- given_X1.append(predicted_token)
158
- input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
159
-
160
- out2 = output_sentence
 
 
 
 
 
161
 
162
  st.write("Predicted Text: ")
163
  st.write(out2)
 
39
  def create_padding_mask(self, seq):
40
  mask = tf.cast(tf.math.equal(seq, 0), tf.float32)
41
  return mask[:, tf.newaxis, tf.newaxis, :]
42
+ def textcompletion_model(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate,weights, datafile , dict, len , text2):
43
+ with open(datafile,"r") as f:
 
 
 
 
 
 
 
 
44
  text = f.read()
45
  text = text.lower()
46
  words = text.split()
47
+ loaded_dict = np.load(dict, allow_pickle=True)
48
  word_to_num = loaded_dict["word_to_num"].item()
49
  num_to_word = loaded_dict["num_to_word"].item()
50
  X = []
 
59
  X.append(word_to_num[words[-1]])
60
  X_train = pad_sequences([X])
61
  y_train = pad_sequences([Y])
62
+
 
 
 
 
 
63
 
64
 
65
  chatbot = TransformerChatbot(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate)
66
+ chatbot.load_weights(weights)
67
  chatbot.build(input_shape=(None, max_len)) # Build the model
68
  chatbot.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
69
 
 
75
  given_X1 = other_num1
76
  input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
77
  output_sentence = ""
78
+ for _ in range(len):
79
  predicted_token = np.argmax(chatbot.predict(input_sequence1), axis=-1)
80
  predicted_token = predicted_token.item()
81
  out = num_to_word[predicted_token]
 
88
  input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
89
 
90
  out2 = output_sentence
91
+ return out2
92
+ st.title("UniGLM TEXT completion Model")
93
+ st.subheader("Next Word Prediction AI Model by Webraft-AI")
94
+ #Picking what NLP task you want to do
95
+ option = st.selectbox('Model',('13M','26M')) #option is stored in this variable
96
+ #Textbox for text user is entering
97
+ st.subheader("Enter a word from which a sentence / word would be predicted")
98
+ len2 = st.text_input('Enter sequence length: ')
99
+ text2 = st.text_input('Enter word: ') #text is stored in this variable
 
 
 
 
 
 
 
 
 
100
 
101
+
102
+ if option == '13M':
 
103
  vocab_size = 100000
104
  max_len = 1
105
  d_model = 64 # 64 , 1024
106
  n_head = 4 # 8 , 16
107
  ff_dim = 256 # 256 , 2048
108
  dropout_rate = 0.1 # 0.5 , 0.2
109
+ weights = "predict3"
110
+ datafile = "data2.txt"
111
+ dict = "dict_predict3.bin.npz"
112
+ len = len2
113
+ text2 = text2
114
+ out2 = textcompletion_model(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate,weights, datafile , dict, len , text2)
115
+
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ elif option=="26M":
118
+ vocab_size = 100000
119
+ max_len = 1
120
+ d_model = 128 # 64 , 1024
121
+ n_head = 8 # 8 , 16
122
+ ff_dim = 256 # 256 , 2048
123
+ dropout_rate = 0.1 # 0.5 , 0.2
124
+ weights = "predict5"
125
+ datafile = "data2.txt"
126
+ dict = "dict_predict3.bin.npz"
127
+ len = len2
128
+ text2 = text2
129
+ out2 = textcompletion_model(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate,weights, datafile , dict, len , text2)
130
 
131
  st.write("Predicted Text: ")
132
  st.write(out2)