Spaces:

shrusti333
/

translator

Runtime error

App Files Files Community

shrusti333 commited on May 18, 2023

Commit

bca2eec

1 Parent(s): c28b5a3

Upload main_interface.ipynb

Browse files

Files changed (1) hide show

main_interface.ipynb +522 -0

main_interface.ipynb ADDED Viewed

	@@ -0,0 +1,522 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "5a68a7b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tkinter import *\n",
+    "import pickle\n",
+    "import numpy as np\n",
+    "from sklearn.feature_extraction.text import CountVectorizer\n",
+    "from tensorflow.keras.models import Model\n",
+    "from tensorflow.keras import models\n",
+    "from tensorflow.keras.layers import Input,LSTM,Dense\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bce948d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "04a85883",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "7aa10e9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflowjs as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "b3c21b9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras.models import load_model\n",
+    "model=load_model(\"model_translation\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "29483b74",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.converters.save_keras_model(model,'C:\\\\Users\\\\Shrusti\\\\Desktop\\\\project')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "4734036f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cv_translation=CountVectorizer(binary=True,tokenizer=lambda txt: txt.split(),stop_words=None,analyzer='char') \n",
+    "\n",
+    "cv_transliteration=CountVectorizer(binary=True,tokenizer=lambda txt: txt.split(),stop_words=None,analyzer='char') "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d773f1d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "15b11fd1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datafile_translation = pickle.load(open(\"training_data_translation.pkl\",\"rb\"))\n",
+    "input_characters_translation = datafile_translation['input_characters']\n",
+    "target_characters_translation = datafile_translation['target_characters']\n",
+    "max_input_length_translation = datafile_translation['max_input_length']\n",
+    "max_target_length_translation = datafile_translation['max_target_length']\n",
+    "num_en_chars_translation = datafile_translation['num_en_chars']\n",
+    "num_dec_chars_translation = datafile_translation['num_dec_chars']\n",
+    "input_texts_translation=datafile_translation['input_texts']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ed45252",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "c35fca3a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datafile_transliteration = pickle.load(open(\"training_data_transliteration.pkl\",\"rb\"))\n",
+    "input_characters_transliteration = datafile_transliteration['input_characters']\n",
+    "target_characters_transliteration = datafile_transliteration['target_characters']\n",
+    "max_input_length_transliteration = datafile_transliteration['max_input_length']\n",
+    "max_target_length_transliteration = datafile_transliteration['max_target_length']\n",
+    "num_en_chars_transliteration = datafile_transliteration['num_en_chars']\n",
+    "num_dec_chars_transliteration = datafile_transliteration['num_dec_chars']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "324ec66f",
+   "metadata": {},
+   "source": [
+    "transliteration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "c16d85b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Inference model\n",
+    "#load the model\n",
+    "model_transliteration = models.load_model(\"s2s_transliteration\")\n",
+    "#construct encoder model from the output of second layer\n",
+    "#discard the encoder output and store only states.\n",
+    "enc_outputs_transliteration, state_h_enc_transliteration, state_c_enc_transliteration = model_transliteration.layers[2].output \n",
+    "#add input object and state from the layer.\n",
+    "en_model_transliteration = Model(model_transliteration.input[0], [state_h_enc_transliteration, state_c_enc_transliteration])\n",
+    "#create Input object for hidden and cell state for decoder\n",
+    "#shape of layer with hidden or latent dimension\n",
+    "dec_state_input_h_transliteration = Input(shape=(256,), name=\"input_6\")\n",
+    "dec_state_input_c_transliteration = Input(shape=(256,), name=\"input_7\")\n",
+    "dec_states_inputs_transliteration = [dec_state_input_h_transliteration, dec_state_input_c_transliteration]\n",
+    "#add input from the encoder output and initialize with states.\n",
+    "dec_lstm_transliteration = model_transliteration.layers[3]\n",
+    "dec_outputs_transliteration, state_h_dec_transliteration, state_c_dec_transliteration = dec_lstm_transliteration(\n",
+    "    model_transliteration.input[1], initial_state=dec_states_inputs_transliteration\n",
+    ")\n",
+    "dec_states_transliteration = [state_h_dec_transliteration, state_c_dec_transliteration]\n",
+    "dec_dense_transliteration = model_transliteration.layers[4]\n",
+    "dec_outputs_transliteration = dec_dense_transliteration(dec_outputs_transliteration)\n",
+    "#create Model with the input of decoder state input and encoder input\n",
+    "#and decoder output with the decoder states.\n",
+    "dec_model_transliteration = Model(\n",
+    "    [model_transliteration.input[1]] + dec_states_inputs_transliteration, [dec_outputs_transliteration] + dec_states_transliteration\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "419f55d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decode_sequence_transliteration(input_seq):\n",
+    "        #create a dictionary with a key as index and value as characters.\n",
+    "        reverse_target_char_index_transliteration = dict(enumerate(target_characters_transliteration))\n",
+    "        #get the states from the user input sequence\n",
+    "        states_value_transliteration = en_model_transliteration.predict(input_seq)\n",
+    "\n",
+    "        #fit target characters and \n",
+    "        #initialize every first character to be 1 which is '\\t'.\n",
+    "        #Generate empty target sequence of length 1.\n",
+    "        co=cv_transliteration.fit(target_characters_transliteration) \n",
+    "        target_seq_transliteration=np.array([co.transform(list(\"\\t\")).toarray().tolist()],dtype=\"float32\")\n",
+    "\n",
+    "        #if the iteration reaches the end of text than it will be stop the it\n",
+    "        stop_condition = False\n",
+    "        #append every predicted character in decoded sentence\n",
+    "        decoded_sentence = \"\"\n",
+    "\n",
+    "        while not stop_condition:\n",
+    "            #get predicted output and discard hidden and cell state.\n",
+    "            output_chars, h, c = dec_model_transliteration.predict([target_seq_transliteration] + states_value_transliteration)\n",
+    "\n",
+    "            #get the index and from the dictionary get the character.\n",
+    "            char_index = np.argmax(output_chars[0, -1, :])\n",
+    "            text_char = reverse_target_char_index_transliteration[char_index]\n",
+    "            decoded_sentence += text_char\n",
+    "                # Exit condition: either hit max length\n",
+    "            # or find a stop character.\n",
+    "            if text_char == \"\\n\" or len(decoded_sentence) > max_target_length_transliteration:\n",
+    "                stop_condition = True\n",
+    "            #update target sequence to the current character index.\n",
+    "            target_seq_transliteration = np.zeros((1, 1, num_dec_chars_transliteration))\n",
+    "            target_seq_transliteration[0, 0, char_index] = 1.0\n",
+    "            states_value_transliteration = [h, c]\n",
+    "        #return the decoded sentence\n",
+    "        return decoded_sentence\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "cd306c27",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def bagofcharacter_transliteration(input_t):\n",
+    "        cv_transliteration=CountVectorizer(binary=True,tokenizer=lambda txt:\n",
+    "        txt.split(),stop_words=None,analyzer='char') \n",
+    "        en_in_data=[] ; pad_en=[1]+[0]*(len(input_characters_transliteration)-1)\n",
+    "    \n",
+    "        cv_inp= cv_transliteration.fit(input_characters_transliteration)\n",
+    "        en_in_data.append(cv_inp.transform(list(input_t)).toarray().tolist())\n",
+    "    \n",
+    "        if len(input_t)< max_input_length_transliteration:\n",
+    "          for _ in range(max_input_length_transliteration-len(input_t)):\n",
+    "            en_in_data[0].append(pad_en)\n",
+    "    \n",
+    "        return np.array(en_in_data,dtype=\"float32\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "264e62af",
+   "metadata": {},
+   "source": [
+    "translation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "5b799dff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Inference model\n",
+    "#load the model\n",
+    "model_translation = models.load_model(\"model_translation\")\n",
+    "#construct encoder model from the output of second layer\n",
+    "#discard the encoder output and store only states.\n",
+    "enc_outputs_translation, state_h_enc_translation, state_c_enc_translation = model_translation.layers[2].output \n",
+    "#add input object and state from the layer.\n",
+    "en_model_translation = Model(model_translation.input[0], [state_h_enc_translation, state_c_enc_translation])\n",
+    "#create Input object for hidden and cell state for decoder\n",
+    "#shape of layer with hidden or latent dimension\n",
+    "dec_state_input_h_translation = Input(shape=(256,))\n",
+    "dec_state_input_c_translation = Input(shape=(256,))\n",
+    "dec_states_inputs_translation = [dec_state_input_h_translation, dec_state_input_c_translation]\n",
+    "#add input from the encoder output and initialize with states.\n",
+    "dec_lstm_translation = model_translation.layers[3]\n",
+    "dec_outputs_translation, state_h_dec_translation, state_c_dec_translation = dec_lstm_translation(\n",
+    "    model_translation.input[1], initial_state=dec_states_inputs_translation\n",
+    ")\n",
+    "dec_states_translation = [state_h_dec_translation, state_c_dec_translation]\n",
+    "dec_dense_translation = model_translation.layers[4]\n",
+    "dec_outputs_translation = dec_dense_translation(dec_outputs_translation)\n",
+    "#create Model with the input of decoder state input and encoder input\n",
+    "#and decoder output with the decoder states.\n",
+    "dec_model_translation = Model(\n",
+    "    [model_translation.input[1]] + dec_states_inputs_translation, [dec_outputs_translation] + dec_states_translation\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "7fb2775a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decode_sequence_translation(input_seq):\n",
+    "        #create a dictionary with a key as index and value as characters.\n",
+    "        reverse_target_char_index_translation = dict(enumerate(target_characters_translation))\n",
+    "        #get the states from the user input sequence\n",
+    "        states_value_translation = en_model_translation.predict(input_seq)\n",
+    "\n",
+    "        #fit target characters and \n",
+    "        #initialize every first character to be 1 which is '\\t'.\n",
+    "        #Generate empty target sequence of length 1.\n",
+    "        co_translation=cv_translation.fit(target_characters_translation) \n",
+    "        target_seq_translation=np.array([co_translation.transform(list(\"\\t\")).toarray().tolist()],dtype=\"float32\")\n",
+    "\n",
+    "        #if the iteration reaches the end of text than it will be stop the it\n",
+    "        stop_condition = False\n",
+    "        #append every predicted character in decoded sentence\n",
+    "        decoded_sentence_translation = \"\"\n",
+    "\n",
+    "        while not stop_condition:\n",
+    "            #get predicted output and discard hidden and cell state.\n",
+    "            output_chars_translation, h_translation, c_translation = dec_model_translation.predict([target_seq_translation] + states_value_translation)\n",
+    "\n",
+    "            #get the index and from the dictionary get the character.\n",
+    "            char_index_translation = np.argmax(output_chars_translation[0, -1, :])\n",
+    "            text_char_translation = reverse_target_char_index_translation[char_index_translation]\n",
+    "            decoded_sentence_translation += text_char_translation\n",
+    "                # Exit condition: either hit max length\n",
+    "            # or find a stop character.\n",
+    "            if text_char_translation == \"\\n\" or len(decoded_sentence_translation) > max_target_length_translation:\n",
+    "                stop_condition = True\n",
+    "            #update target sequence to the current character index.\n",
+    "            target_seq_translation = np.zeros((1, 1, num_dec_chars_translation))\n",
+    "            target_seq_translation[0, 0, char_index_translation] = 1.0\n",
+    "            states_value_translation = [h_translation, c_translation]\n",
+    "        #return the decoded sentence\n",
+    "        return decoded_sentence_translation\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "8a662484",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def bagofcharacter_translation(input_t):\n",
+    "        cv_translation=CountVectorizer(binary=True,tokenizer=lambda txt:\n",
+    "        txt.split(),stop_words=None,analyzer='char') \n",
+    "        en_in_data=[] ; pad_en=[1]+[0]*(len(input_characters_translation)-1)\n",
+    "    \n",
+    "        cv_inp_translation= cv_translation.fit(input_characters_translation)\n",
+    "        en_in_data.append(cv_inp_translation.transform(list(input_t)).toarray().tolist())\n",
+    "    \n",
+    "        if len(input_t)< max_input_length_translation:\n",
+    "          for _ in range(max_input_length_translation-len(input_t)):\n",
+    "            en_in_data[0].append(pad_en)\n",
+    "    \n",
+    "        return np.array(en_in_data,dtype=\"float32\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dad973d9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "17f284a1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "80758957",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def translate_to_Konkani(sent):  \n",
+    "    \n",
+    "    input_text = sent.split(',') \n",
+    "    output_texts=\"\"\n",
+    "    for x in input_text:\n",
+    "        term=x+\".\"\n",
+    "        if term in input_texts_translation:\n",
+    "            en_in_data = bagofcharacter_translation( x.lower()+\".\")    \n",
+    "            x=decode_sequence_translation(en_in_data)\n",
+    "            output_texts+=\" \"+ x \n",
+    "        else:\n",
+    "            en_in_data = bagofcharacter_transliteration( x.lower()+\".\")    \n",
+    "            x=decode_sequence_transliteration(en_in_data)\n",
+    "            output_texts+=\" \"+ x  \n",
+    "    print(output_texts)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "beab3e3f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8049b45b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96009f8b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83c105e1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "265d97ea",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "76fde2e1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd961506",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12ac4538",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ceb25845",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bd690d1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a470372d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82b9b9bc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5057c557",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1410267f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}