Spaces:
Paused
Paused
DHRUV SHEKHAWAT
commited on
Commit
·
e5c8275
1
Parent(s):
009513e
Update app.py
Browse files
app.py
CHANGED
@@ -44,33 +44,54 @@ class TransformerChatbot(Model):
|
|
44 |
st.title("UniGLM TEXT completion Model")
|
45 |
st.subheader("Next Word Prediction AI Model by Webraft-AI")
|
46 |
#Picking what NLP task you want to do
|
47 |
-
option = st.selectbox('Model',('
|
48 |
#Textbox for text user is entering
|
49 |
st.subheader("Enter a word from which a sentence / word would be predicted")
|
50 |
-
|
51 |
text2 = st.text_input('Enter word: ') #text is stored in this variable
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
60 |
vocab_size = 100000
|
61 |
max_len = 1
|
62 |
d_model = 64 # 64 , 1024
|
63 |
n_head = 4 # 8 , 16
|
64 |
ff_dim = 256 # 256 , 2048
|
65 |
dropout_rate = 0.1 # 0.5 , 0.2
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
text = f.read()
|
71 |
text = text.lower()
|
72 |
words = text.split()
|
73 |
-
loaded_dict = np.load(
|
74 |
word_to_num = loaded_dict["word_to_num"].item()
|
75 |
num_to_word = loaded_dict["num_to_word"].item()
|
76 |
X = []
|
@@ -85,48 +106,46 @@ if option == '13M_OLD':
|
|
85 |
X.append(word_to_num[words[-1]])
|
86 |
X_train = pad_sequences([X])
|
87 |
y_train = pad_sequences([Y])
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
|
91 |
chatbot = TransformerChatbot(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate)
|
92 |
-
chatbot.load_weights(
|
93 |
chatbot.build(input_shape=(None, max_len)) # Build the model
|
94 |
chatbot.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
|
95 |
-
|
96 |
for i in range(1):
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
output_sentence =
|
104 |
-
for _ in range(
|
105 |
-
predicted_token = np.argmax(chatbot.predict(
|
106 |
predicted_token = predicted_token.item()
|
107 |
out = num_to_word[predicted_token]
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
output_sentence += " " + out
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
out2 = output_sentence
|
117 |
-
|
118 |
-
st.write("Predicted Text: ")
|
119 |
-
st.write(out2)
|
120 |
-
|
121 |
-
|
122 |
-
elif option=="26M_OLD":
|
123 |
-
option2 = st.selectbox('Type',('word','sentence'))
|
124 |
-
if option2 == 'word':
|
125 |
-
len2 = 1
|
126 |
-
else:
|
127 |
-
len2 = 13
|
128 |
-
|
129 |
-
else:
|
130 |
-
out2 = "Error: Wrong Model Selected"
|
131 |
|
132 |
-
|
|
|
|
44 |
st.title("UniGLM TEXT completion Model")
|
45 |
st.subheader("Next Word Prediction AI Model by Webraft-AI")
|
46 |
#Picking what NLP task you want to do
|
47 |
+
option = st.selectbox('Model',('1','2')) #option is stored in this variable
|
48 |
#Textbox for text user is entering
|
49 |
st.subheader("Enter a word from which a sentence / word would be predicted")
|
|
|
50 |
text2 = st.text_input('Enter word: ') #text is stored in this variable
|
51 |
|
52 |
+
if option == '1':
|
53 |
+
with open("data2.txt","r") as f:
|
54 |
+
text = f.read()
|
55 |
+
text = text.lower()
|
56 |
+
words = text.split()
|
57 |
+
loaded_dict = np.load("dict_predict3.bin.npz", allow_pickle=True)
|
58 |
+
word_to_num = loaded_dict["word_to_num"].item()
|
59 |
+
num_to_word = loaded_dict["num_to_word"].item()
|
60 |
+
X = []
|
61 |
+
X.append(word_to_num[words[-1]])
|
62 |
+
X_train = pad_sequences([X])
|
63 |
+
y_train = pad_sequences([Y])
|
64 |
vocab_size = 100000
|
65 |
max_len = 1
|
66 |
d_model = 64 # 64 , 1024
|
67 |
n_head = 4 # 8 , 16
|
68 |
ff_dim = 256 # 256 , 2048
|
69 |
dropout_rate = 0.1 # 0.5 , 0.2
|
70 |
+
|
71 |
+
|
72 |
+
chatbot = TransformerChatbot(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate)
|
73 |
+
chatbot.load_weights("predict3")
|
74 |
+
chatbot.build(input_shape=(None, max_len)) # Build the model
|
75 |
+
chatbot.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
|
76 |
+
|
77 |
+
given_X1 = other_num1
|
78 |
+
input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
|
79 |
+
output_sentence = ""
|
80 |
+
for _ in range(1):
|
81 |
+
predicted_token = np.argmax(chatbot.predict(input_sequence1), axis=-1)
|
82 |
+
predicted_token = predicted_token.item()
|
83 |
+
out = num_to_word[predicted_token]
|
84 |
+
input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
|
85 |
+
|
86 |
+
out2 = output_sentence
|
87 |
+
|
88 |
+
|
89 |
+
else:
|
90 |
+
with open("data2.txt","r") as f:
|
91 |
text = f.read()
|
92 |
text = text.lower()
|
93 |
words = text.split()
|
94 |
+
loaded_dict = np.load("dict_predict3.bin.npz", allow_pickle=True)
|
95 |
word_to_num = loaded_dict["word_to_num"].item()
|
96 |
num_to_word = loaded_dict["num_to_word"].item()
|
97 |
X = []
|
|
|
106 |
X.append(word_to_num[words[-1]])
|
107 |
X_train = pad_sequences([X])
|
108 |
y_train = pad_sequences([Y])
|
109 |
+
vocab_size = 100000
|
110 |
+
max_len = 1
|
111 |
+
d_model = 64 # 64 , 1024
|
112 |
+
n_head = 4 # 8 , 16
|
113 |
+
ff_dim = 256 # 256 , 2048
|
114 |
+
dropout_rate = 0.1 # 0.5 , 0.2
|
115 |
|
116 |
|
117 |
chatbot = TransformerChatbot(vocab_size, max_len, d_model, n_head, ff_dim, dropout_rate)
|
118 |
+
chatbot.load_weights("predict3")
|
119 |
chatbot.build(input_shape=(None, max_len)) # Build the model
|
120 |
chatbot.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
|
121 |
+
|
122 |
for i in range(1):
|
123 |
+
other_text1 = text2
|
124 |
+
other_text1 = other_text1.lower()
|
125 |
+
other_words1 = other_text1.split()
|
126 |
+
other_num1 = [word_to_num[word] for word in other_words1]
|
127 |
+
given_X1 = other_num1
|
128 |
+
input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
|
129 |
+
output_sentence = other_text1+""
|
130 |
+
for _ in range(10):
|
131 |
+
predicted_token = np.argmax(chatbot.predict(input_sequence1), axis=-1)
|
132 |
predicted_token = predicted_token.item()
|
133 |
out = num_to_word[predicted_token]
|
134 |
+
|
135 |
+
|
|
|
136 |
output_sentence += " " + out
|
137 |
+
if out == ".":
|
138 |
+
break
|
139 |
+
given_X1 = given_X1[1:]
|
140 |
+
given_X1.append(predicted_token)
|
141 |
+
input_sequence1 = pad_sequences([given_X1], maxlen=max_len, padding='post')
|
142 |
+
|
143 |
+
out2 = output_sentence
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
st.write("Predicted Text: ")
|
151 |
+
st.write(out2)
|