shrusti333 commited on
Commit
51aa909
·
1 Parent(s): a306ee4

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ model_translation/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
model_translation/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0fd2d25bbc9339e576f1bf794e45909029e4b917e2fdcda60d3644fbe2a7ed
3
+ size 15344
model_translation/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae0193972052061a91222b654a5e6a8db7b241d2cfd7e7e6564643d5f6c3d40
3
+ size 1667158
model_translation/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6522bca3da353baba81a9eaee8d2b0490317d2d8a0e8b50163a7463c0b9e9935
3
+ size 4832938
model_translation/variables/variables.index ADDED
Binary file (1.83 kB). View file
 
training_data_translation.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e983d73156e86347c95b242407b2d909f1a98e87468f6411d74ce913ae8bad4
3
+ size 1429
translation_interface.ipynb ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 17,
6
+ "id": "d5e3e67f",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from tkinter import *\n",
11
+ "import pickle\n",
12
+ "import numpy as np\n",
13
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
14
+ "from tensorflow.keras.models import Model\n",
15
+ "from tensorflow.keras import models\n",
16
+ "from tensorflow.keras.layers import Input,LSTM,Dense\n",
17
+ "\n",
18
+ "cv=CountVectorizer(binary=True,tokenizer=lambda txt: txt.split(),stop_words=None,analyzer='char') \n",
19
+ "\n"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "id": "40c50a8d",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": []
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 18,
33
+ "id": "7e54fc77",
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "datafile = pickle.load(open(\"training_data_translation.pkl\",\"rb\"))\n",
38
+ "input_characters = datafile['input_characters']\n",
39
+ "target_characters = datafile['target_characters']\n",
40
+ "max_input_length = datafile['max_input_length']\n",
41
+ "max_target_length = datafile['max_target_length']\n",
42
+ "num_en_chars = datafile['num_en_chars']\n",
43
+ "num_dec_chars = datafile['num_dec_chars']\n"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 19,
49
+ "id": "ec54e3fc",
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "#Inference model\n",
54
+ "#load the model\n",
55
+ "model = models.load_model(\"model_translation\")\n",
56
+ "#construct encoder model from the output of second layer\n",
57
+ "#discard the encoder output and store only states.\n",
58
+ "enc_outputs, state_h_enc, state_c_enc = model.layers[2].output \n",
59
+ "#add input object and state from the layer.\n",
60
+ "en_model = Model(model.input[0], [state_h_enc, state_c_enc])\n",
61
+ "#create Input object for hidden and cell state for decoder\n",
62
+ "#shape of layer with hidden or latent dimension\n",
63
+ "dec_state_input_h = Input(shape=(256,))\n",
64
+ "dec_state_input_c = Input(shape=(256,))\n",
65
+ "dec_states_inputs = [dec_state_input_h, dec_state_input_c]\n",
66
+ "#add input from the encoder output and initialize with states.\n",
67
+ "dec_lstm = model.layers[3]\n",
68
+ "dec_outputs, state_h_dec, state_c_dec = dec_lstm(\n",
69
+ " model.input[1], initial_state=dec_states_inputs\n",
70
+ ")\n",
71
+ "dec_states = [state_h_dec, state_c_dec]\n",
72
+ "dec_dense = model.layers[4]\n",
73
+ "dec_outputs = dec_dense(dec_outputs)\n",
74
+ "#create Model with the input of decoder state input and encoder input\n",
75
+ "#and decoder output with the decoder states.\n",
76
+ "dec_model = Model(\n",
77
+ " [model.input[1]] + dec_states_inputs, [dec_outputs] + dec_states\n",
78
+ ")"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 20,
84
+ "id": "835bebec",
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "def decode_sequence_translation(input_seq):\n",
89
+ " #create a dictionary with a key as index and value as characters.\n",
90
+ " reverse_target_char_index = dict(enumerate(target_characters))\n",
91
+ " #get the states from the user input sequence\n",
92
+ " states_value = en_model.predict(input_seq)\n",
93
+ "\n",
94
+ " #fit target characters and \n",
95
+ " #initialize every first character to be 1 which is '\\t'.\n",
96
+ " #Generate empty target sequence of length 1.\n",
97
+ " co=cv.fit(target_characters) \n",
98
+ " target_seq=np.array([co.transform(list(\"\\t\")).toarray().tolist()],dtype=\"float32\")\n",
99
+ "\n",
100
+ " #if the iteration reaches the end of text than it will be stop the it\n",
101
+ " stop_condition = False\n",
102
+ " #append every predicted character in decoded sentence\n",
103
+ " decoded_sentence = \"\"\n",
104
+ "\n",
105
+ " while not stop_condition:\n",
106
+ " #get predicted output and discard hidden and cell state.\n",
107
+ " output_chars, h, c = dec_model.predict([target_seq] + states_value)\n",
108
+ "\n",
109
+ " #get the index and from the dictionary get the character.\n",
110
+ " char_index = np.argmax(output_chars[0, -1, :])\n",
111
+ " text_char = reverse_target_char_index[char_index]\n",
112
+ " decoded_sentence += text_char\n",
113
+ " # Exit condition: either hit max length\n",
114
+ " # or find a stop character.\n",
115
+ " if text_char == \"\\n\" or len(decoded_sentence) > max_target_length:\n",
116
+ " stop_condition = True\n",
117
+ " #update target sequence to the current character index.\n",
118
+ " target_seq = np.zeros((1, 1, num_dec_chars))\n",
119
+ " target_seq[0, 0, char_index] = 1.0\n",
120
+ " states_value = [h, c]\n",
121
+ " #return the decoded sentence\n",
122
+ " return decoded_sentence\n",
123
+ "\n",
124
+ " "
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": 21,
130
+ "id": "911511bb",
131
+ "metadata": {},
132
+ "outputs": [],
133
+ "source": [
134
+ "\n",
135
+ "def bagofcharacter_translation(input_t):\n",
136
+ " cv=CountVectorizer(binary=True,tokenizer=lambda txt:\n",
137
+ " txt.split(),stop_words=None,analyzer='char') \n",
138
+ " en_in_data=[] ; pad_en=[1]+[0]*(len(input_characters)-1)\n",
139
+ " \n",
140
+ " cv_inp= cv.fit(input_characters)\n",
141
+ " en_in_data.append(cv_inp.transform(list(input_t)).toarray().tolist())\n",
142
+ " \n",
143
+ " if len(input_t)< max_input_length:\n",
144
+ " for _ in range(max_input_length-len(input_t)):\n",
145
+ " en_in_data[0].append(pad_en)\n",
146
+ " \n",
147
+ " return np.array(en_in_data,dtype=\"float32\")\n",
148
+ " \n",
149
+ " \n",
150
+ "\n",
151
+ "\n"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": null,
157
+ "id": "2732c86d",
158
+ "metadata": {},
159
+ "outputs": [],
160
+ "source": [
161
+ "output_texts=[]\n",
162
+ "sent= input( ) \n",
163
+ "input_text = sent.split(' ') \n",
164
+ "output_texts=\"\"\n",
165
+ "\n",
166
+ "en_in_data = bagofcharacter_translation( x.lower()+\".\") \n",
167
+ "x=decode_sequence_translation(en_in_data)\n",
168
+ "output_texts+=\" \"+ x \n",
169
+ "print(output_texts)"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": null,
175
+ "id": "7bc57d99",
176
+ "metadata": {},
177
+ "outputs": [],
178
+ "source": []
179
+ }
180
+ ],
181
+ "metadata": {
182
+ "kernelspec": {
183
+ "display_name": "Python 3 (ipykernel)",
184
+ "language": "python",
185
+ "name": "python3"
186
+ },
187
+ "language_info": {
188
+ "codemirror_mode": {
189
+ "name": "ipython",
190
+ "version": 3
191
+ },
192
+ "file_extension": ".py",
193
+ "mimetype": "text/x-python",
194
+ "name": "python",
195
+ "nbconvert_exporter": "python",
196
+ "pygments_lexer": "ipython3",
197
+ "version": "3.9.13"
198
+ }
199
+ },
200
+ "nbformat": 4,
201
+ "nbformat_minor": 5
202
+ }