shrusti333 commited on
Commit
bca2eec
·
1 Parent(s): c28b5a3

Upload main_interface.ipynb

Browse files
Files changed (1) hide show
  1. main_interface.ipynb +522 -0
main_interface.ipynb ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 17,
6
+ "id": "5a68a7b7",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from tkinter import *\n",
11
+ "import pickle\n",
12
+ "import numpy as np\n",
13
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
14
+ "from tensorflow.keras.models import Model\n",
15
+ "from tensorflow.keras import models\n",
16
+ "from tensorflow.keras.layers import Input,LSTM,Dense\n"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "id": "0bce948d",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": []
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "id": "04a85883",
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": []
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 42,
38
+ "id": "7aa10e9a",
39
+ "metadata": {},
40
+ "outputs": [],
41
+ "source": [
42
+ "import tensorflowjs as tf"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 45,
48
+ "id": "b3c21b9e",
49
+ "metadata": {},
50
+ "outputs": [],
51
+ "source": [
52
+ "from tensorflow.keras.models import load_model\n",
53
+ "model=load_model(\"model_translation\")"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 47,
59
+ "id": "29483b74",
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "tf.converters.save_keras_model(model,'C:\\\\Users\\\\Shrusti\\\\Desktop\\\\project')"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 18,
69
+ "id": "4734036f",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "cv_translation=CountVectorizer(binary=True,tokenizer=lambda txt: txt.split(),stop_words=None,analyzer='char') \n",
74
+ "\n",
75
+ "cv_transliteration=CountVectorizer(binary=True,tokenizer=lambda txt: txt.split(),stop_words=None,analyzer='char') "
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": null,
81
+ "id": "3d773f1d",
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": []
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": 19,
89
+ "id": "15b11fd1",
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "datafile_translation = pickle.load(open(\"training_data_translation.pkl\",\"rb\"))\n",
94
+ "input_characters_translation = datafile_translation['input_characters']\n",
95
+ "target_characters_translation = datafile_translation['target_characters']\n",
96
+ "max_input_length_translation = datafile_translation['max_input_length']\n",
97
+ "max_target_length_translation = datafile_translation['max_target_length']\n",
98
+ "num_en_chars_translation = datafile_translation['num_en_chars']\n",
99
+ "num_dec_chars_translation = datafile_translation['num_dec_chars']\n",
100
+ "input_texts_translation=datafile_translation['input_texts']"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "id": "7ed45252",
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": []
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 20,
114
+ "id": "c35fca3a",
115
+ "metadata": {},
116
+ "outputs": [],
117
+ "source": [
118
+ "datafile_transliteration = pickle.load(open(\"training_data_transliteration.pkl\",\"rb\"))\n",
119
+ "input_characters_transliteration = datafile_transliteration['input_characters']\n",
120
+ "target_characters_transliteration = datafile_transliteration['target_characters']\n",
121
+ "max_input_length_transliteration = datafile_transliteration['max_input_length']\n",
122
+ "max_target_length_transliteration = datafile_transliteration['max_target_length']\n",
123
+ "num_en_chars_transliteration = datafile_transliteration['num_en_chars']\n",
124
+ "num_dec_chars_transliteration = datafile_transliteration['num_dec_chars']"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "markdown",
129
+ "id": "324ec66f",
130
+ "metadata": {},
131
+ "source": [
132
+ "transliteration"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 21,
138
+ "id": "c16d85b7",
139
+ "metadata": {},
140
+ "outputs": [],
141
+ "source": [
142
+ "#Inference model\n",
143
+ "#load the model\n",
144
+ "model_transliteration = models.load_model(\"s2s_transliteration\")\n",
145
+ "#construct encoder model from the output of second layer\n",
146
+ "#discard the encoder output and store only states.\n",
147
+ "enc_outputs_transliteration, state_h_enc_transliteration, state_c_enc_transliteration = model_transliteration.layers[2].output \n",
148
+ "#add input object and state from the layer.\n",
149
+ "en_model_transliteration = Model(model_transliteration.input[0], [state_h_enc_transliteration, state_c_enc_transliteration])\n",
150
+ "#create Input object for hidden and cell state for decoder\n",
151
+ "#shape of layer with hidden or latent dimension\n",
152
+ "dec_state_input_h_transliteration = Input(shape=(256,), name=\"input_6\")\n",
153
+ "dec_state_input_c_transliteration = Input(shape=(256,), name=\"input_7\")\n",
154
+ "dec_states_inputs_transliteration = [dec_state_input_h_transliteration, dec_state_input_c_transliteration]\n",
155
+ "#add input from the encoder output and initialize with states.\n",
156
+ "dec_lstm_transliteration = model_transliteration.layers[3]\n",
157
+ "dec_outputs_transliteration, state_h_dec_transliteration, state_c_dec_transliteration = dec_lstm_transliteration(\n",
158
+ " model_transliteration.input[1], initial_state=dec_states_inputs_transliteration\n",
159
+ ")\n",
160
+ "dec_states_transliteration = [state_h_dec_transliteration, state_c_dec_transliteration]\n",
161
+ "dec_dense_transliteration = model_transliteration.layers[4]\n",
162
+ "dec_outputs_transliteration = dec_dense_transliteration(dec_outputs_transliteration)\n",
163
+ "#create Model with the input of decoder state input and encoder input\n",
164
+ "#and decoder output with the decoder states.\n",
165
+ "dec_model_transliteration = Model(\n",
166
+ " [model_transliteration.input[1]] + dec_states_inputs_transliteration, [dec_outputs_transliteration] + dec_states_transliteration\n",
167
+ ")"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": 22,
173
+ "id": "419f55d5",
174
+ "metadata": {},
175
+ "outputs": [],
176
+ "source": [
177
+ "def decode_sequence_transliteration(input_seq):\n",
178
+ " #create a dictionary with a key as index and value as characters.\n",
179
+ " reverse_target_char_index_transliteration = dict(enumerate(target_characters_transliteration))\n",
180
+ " #get the states from the user input sequence\n",
181
+ " states_value_transliteration = en_model_transliteration.predict(input_seq)\n",
182
+ "\n",
183
+ " #fit target characters and \n",
184
+ " #initialize every first character to be 1 which is '\\t'.\n",
185
+ " #Generate empty target sequence of length 1.\n",
186
+ " co=cv_transliteration.fit(target_characters_transliteration) \n",
187
+ " target_seq_transliteration=np.array([co.transform(list(\"\\t\")).toarray().tolist()],dtype=\"float32\")\n",
188
+ "\n",
189
+ " #if the iteration reaches the end of text than it will be stop the it\n",
190
+ " stop_condition = False\n",
191
+ " #append every predicted character in decoded sentence\n",
192
+ " decoded_sentence = \"\"\n",
193
+ "\n",
194
+ " while not stop_condition:\n",
195
+ " #get predicted output and discard hidden and cell state.\n",
196
+ " output_chars, h, c = dec_model_transliteration.predict([target_seq_transliteration] + states_value_transliteration)\n",
197
+ "\n",
198
+ " #get the index and from the dictionary get the character.\n",
199
+ " char_index = np.argmax(output_chars[0, -1, :])\n",
200
+ " text_char = reverse_target_char_index_transliteration[char_index]\n",
201
+ " decoded_sentence += text_char\n",
202
+ " # Exit condition: either hit max length\n",
203
+ " # or find a stop character.\n",
204
+ " if text_char == \"\\n\" or len(decoded_sentence) > max_target_length_transliteration:\n",
205
+ " stop_condition = True\n",
206
+ " #update target sequence to the current character index.\n",
207
+ " target_seq_transliteration = np.zeros((1, 1, num_dec_chars_transliteration))\n",
208
+ " target_seq_transliteration[0, 0, char_index] = 1.0\n",
209
+ " states_value_transliteration = [h, c]\n",
210
+ " #return the decoded sentence\n",
211
+ " return decoded_sentence\n"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": 23,
217
+ "id": "cd306c27",
218
+ "metadata": {},
219
+ "outputs": [],
220
+ "source": [
221
+ "def bagofcharacter_transliteration(input_t):\n",
222
+ " cv_transliteration=CountVectorizer(binary=True,tokenizer=lambda txt:\n",
223
+ " txt.split(),stop_words=None,analyzer='char') \n",
224
+ " en_in_data=[] ; pad_en=[1]+[0]*(len(input_characters_transliteration)-1)\n",
225
+ " \n",
226
+ " cv_inp= cv_transliteration.fit(input_characters_transliteration)\n",
227
+ " en_in_data.append(cv_inp.transform(list(input_t)).toarray().tolist())\n",
228
+ " \n",
229
+ " if len(input_t)< max_input_length_transliteration:\n",
230
+ " for _ in range(max_input_length_transliteration-len(input_t)):\n",
231
+ " en_in_data[0].append(pad_en)\n",
232
+ " \n",
233
+ " return np.array(en_in_data,dtype=\"float32\")"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "markdown",
238
+ "id": "264e62af",
239
+ "metadata": {},
240
+ "source": [
241
+ "translation"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": 24,
247
+ "id": "5b799dff",
248
+ "metadata": {},
249
+ "outputs": [],
250
+ "source": [
251
+ "#Inference model\n",
252
+ "#load the model\n",
253
+ "model_translation = models.load_model(\"model_translation\")\n",
254
+ "#construct encoder model from the output of second layer\n",
255
+ "#discard the encoder output and store only states.\n",
256
+ "enc_outputs_translation, state_h_enc_translation, state_c_enc_translation = model_translation.layers[2].output \n",
257
+ "#add input object and state from the layer.\n",
258
+ "en_model_translation = Model(model_translation.input[0], [state_h_enc_translation, state_c_enc_translation])\n",
259
+ "#create Input object for hidden and cell state for decoder\n",
260
+ "#shape of layer with hidden or latent dimension\n",
261
+ "dec_state_input_h_translation = Input(shape=(256,))\n",
262
+ "dec_state_input_c_translation = Input(shape=(256,))\n",
263
+ "dec_states_inputs_translation = [dec_state_input_h_translation, dec_state_input_c_translation]\n",
264
+ "#add input from the encoder output and initialize with states.\n",
265
+ "dec_lstm_translation = model_translation.layers[3]\n",
266
+ "dec_outputs_translation, state_h_dec_translation, state_c_dec_translation = dec_lstm_translation(\n",
267
+ " model_translation.input[1], initial_state=dec_states_inputs_translation\n",
268
+ ")\n",
269
+ "dec_states_translation = [state_h_dec_translation, state_c_dec_translation]\n",
270
+ "dec_dense_translation = model_translation.layers[4]\n",
271
+ "dec_outputs_translation = dec_dense_translation(dec_outputs_translation)\n",
272
+ "#create Model with the input of decoder state input and encoder input\n",
273
+ "#and decoder output with the decoder states.\n",
274
+ "dec_model_translation = Model(\n",
275
+ " [model_translation.input[1]] + dec_states_inputs_translation, [dec_outputs_translation] + dec_states_translation\n",
276
+ ")"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "code",
281
+ "execution_count": 25,
282
+ "id": "7fb2775a",
283
+ "metadata": {},
284
+ "outputs": [],
285
+ "source": [
286
+ "def decode_sequence_translation(input_seq):\n",
287
+ " #create a dictionary with a key as index and value as characters.\n",
288
+ " reverse_target_char_index_translation = dict(enumerate(target_characters_translation))\n",
289
+ " #get the states from the user input sequence\n",
290
+ " states_value_translation = en_model_translation.predict(input_seq)\n",
291
+ "\n",
292
+ " #fit target characters and \n",
293
+ " #initialize every first character to be 1 which is '\\t'.\n",
294
+ " #Generate empty target sequence of length 1.\n",
295
+ " co_translation=cv_translation.fit(target_characters_translation) \n",
296
+ " target_seq_translation=np.array([co_translation.transform(list(\"\\t\")).toarray().tolist()],dtype=\"float32\")\n",
297
+ "\n",
298
+ " #if the iteration reaches the end of text than it will be stop the it\n",
299
+ " stop_condition = False\n",
300
+ " #append every predicted character in decoded sentence\n",
301
+ " decoded_sentence_translation = \"\"\n",
302
+ "\n",
303
+ " while not stop_condition:\n",
304
+ " #get predicted output and discard hidden and cell state.\n",
305
+ " output_chars_translation, h_translation, c_translation = dec_model_translation.predict([target_seq_translation] + states_value_translation)\n",
306
+ "\n",
307
+ " #get the index and from the dictionary get the character.\n",
308
+ " char_index_translation = np.argmax(output_chars_translation[0, -1, :])\n",
309
+ " text_char_translation = reverse_target_char_index_translation[char_index_translation]\n",
310
+ " decoded_sentence_translation += text_char_translation\n",
311
+ " # Exit condition: either hit max length\n",
312
+ " # or find a stop character.\n",
313
+ " if text_char_translation == \"\\n\" or len(decoded_sentence_translation) > max_target_length_translation:\n",
314
+ " stop_condition = True\n",
315
+ " #update target sequence to the current character index.\n",
316
+ " target_seq_translation = np.zeros((1, 1, num_dec_chars_translation))\n",
317
+ " target_seq_translation[0, 0, char_index_translation] = 1.0\n",
318
+ " states_value_translation = [h_translation, c_translation]\n",
319
+ " #return the decoded sentence\n",
320
+ " return decoded_sentence_translation\n"
321
+ ]
322
+ },
323
+ {
324
+ "cell_type": "code",
325
+ "execution_count": 26,
326
+ "id": "8a662484",
327
+ "metadata": {},
328
+ "outputs": [],
329
+ "source": [
330
+ "\n",
331
+ "def bagofcharacter_translation(input_t):\n",
332
+ " cv_translation=CountVectorizer(binary=True,tokenizer=lambda txt:\n",
333
+ " txt.split(),stop_words=None,analyzer='char') \n",
334
+ " en_in_data=[] ; pad_en=[1]+[0]*(len(input_characters_translation)-1)\n",
335
+ " \n",
336
+ " cv_inp_translation= cv_translation.fit(input_characters_translation)\n",
337
+ " en_in_data.append(cv_inp_translation.transform(list(input_t)).toarray().tolist())\n",
338
+ " \n",
339
+ " if len(input_t)< max_input_length_translation:\n",
340
+ " for _ in range(max_input_length_translation-len(input_t)):\n",
341
+ " en_in_data[0].append(pad_en)\n",
342
+ " \n",
343
+ " return np.array(en_in_data,dtype=\"float32\")\n",
344
+ " "
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": null,
350
+ "id": "dad973d9",
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": []
354
+ },
355
+ {
356
+ "cell_type": "code",
357
+ "execution_count": null,
358
+ "id": "17f284a1",
359
+ "metadata": {},
360
+ "outputs": [],
361
+ "source": []
362
+ },
363
+ {
364
+ "cell_type": "code",
365
+ "execution_count": 31,
366
+ "id": "80758957",
367
+ "metadata": {},
368
+ "outputs": [],
369
+ "source": [
370
+ "def translate_to_Konkani(sent): \n",
371
+ " \n",
372
+ " input_text = sent.split(',') \n",
373
+ " output_texts=\"\"\n",
374
+ " for x in input_text:\n",
375
+ " term=x+\".\"\n",
376
+ " if term in input_texts_translation:\n",
377
+ " en_in_data = bagofcharacter_translation( x.lower()+\".\") \n",
378
+ " x=decode_sequence_translation(en_in_data)\n",
379
+ " output_texts+=\" \"+ x \n",
380
+ " else:\n",
381
+ " en_in_data = bagofcharacter_transliteration( x.lower()+\".\") \n",
382
+ " x=decode_sequence_transliteration(en_in_data)\n",
383
+ " output_texts+=\" \"+ x \n",
384
+ " print(output_texts)\n",
385
+ "\n"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "code",
390
+ "execution_count": null,
391
+ "id": "beab3e3f",
392
+ "metadata": {},
393
+ "outputs": [],
394
+ "source": []
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": null,
399
+ "id": "8049b45b",
400
+ "metadata": {},
401
+ "outputs": [],
402
+ "source": []
403
+ },
404
+ {
405
+ "cell_type": "code",
406
+ "execution_count": null,
407
+ "id": "96009f8b",
408
+ "metadata": {},
409
+ "outputs": [],
410
+ "source": []
411
+ },
412
+ {
413
+ "cell_type": "code",
414
+ "execution_count": null,
415
+ "id": "83c105e1",
416
+ "metadata": {},
417
+ "outputs": [],
418
+ "source": []
419
+ },
420
+ {
421
+ "cell_type": "code",
422
+ "execution_count": null,
423
+ "id": "265d97ea",
424
+ "metadata": {},
425
+ "outputs": [],
426
+ "source": []
427
+ },
428
+ {
429
+ "cell_type": "code",
430
+ "execution_count": null,
431
+ "id": "76fde2e1",
432
+ "metadata": {},
433
+ "outputs": [],
434
+ "source": []
435
+ },
436
+ {
437
+ "cell_type": "code",
438
+ "execution_count": null,
439
+ "id": "dd961506",
440
+ "metadata": {},
441
+ "outputs": [],
442
+ "source": []
443
+ },
444
+ {
445
+ "cell_type": "code",
446
+ "execution_count": null,
447
+ "id": "12ac4538",
448
+ "metadata": {},
449
+ "outputs": [],
450
+ "source": []
451
+ },
452
+ {
453
+ "cell_type": "code",
454
+ "execution_count": null,
455
+ "id": "ceb25845",
456
+ "metadata": {},
457
+ "outputs": [],
458
+ "source": []
459
+ },
460
+ {
461
+ "cell_type": "code",
462
+ "execution_count": null,
463
+ "id": "3bd690d1",
464
+ "metadata": {},
465
+ "outputs": [],
466
+ "source": []
467
+ },
468
+ {
469
+ "cell_type": "code",
470
+ "execution_count": null,
471
+ "id": "a470372d",
472
+ "metadata": {},
473
+ "outputs": [],
474
+ "source": []
475
+ },
476
+ {
477
+ "cell_type": "code",
478
+ "execution_count": null,
479
+ "id": "82b9b9bc",
480
+ "metadata": {},
481
+ "outputs": [],
482
+ "source": []
483
+ },
484
+ {
485
+ "cell_type": "code",
486
+ "execution_count": null,
487
+ "id": "5057c557",
488
+ "metadata": {},
489
+ "outputs": [],
490
+ "source": []
491
+ },
492
+ {
493
+ "cell_type": "code",
494
+ "execution_count": null,
495
+ "id": "1410267f",
496
+ "metadata": {},
497
+ "outputs": [],
498
+ "source": []
499
+ }
500
+ ],
501
+ "metadata": {
502
+ "kernelspec": {
503
+ "display_name": "Python 3 (ipykernel)",
504
+ "language": "python",
505
+ "name": "python3"
506
+ },
507
+ "language_info": {
508
+ "codemirror_mode": {
509
+ "name": "ipython",
510
+ "version": 3
511
+ },
512
+ "file_extension": ".py",
513
+ "mimetype": "text/x-python",
514
+ "name": "python",
515
+ "nbconvert_exporter": "python",
516
+ "pygments_lexer": "ipython3",
517
+ "version": "3.9.13"
518
+ }
519
+ },
520
+ "nbformat": 4,
521
+ "nbformat_minor": 5
522
+ }