import torch import torch.onnx from transformer import Transformer import torch from huggingface_hub import hf_hub_download import torch import numpy as np import gradio as gr # Generated this by filtering Appendix code START_TOKEN = '' PADDING_TOKEN = '' END_TOKEN = '' english_vocabulary = [START_TOKEN, ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', PADDING_TOKEN, END_TOKEN] gujarati_vocabulary = [ START_TOKEN, ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '૦', '૧', '૨', '૩', '૪', '૫', '૬', '૭', '૮', '૯', ':', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'અ', 'આ', 'ઇ', 'ઈ', 'ઉ', 'ઊ', 'ઋ', 'એ', 'ઐ', 'ઓ', 'ઔ', 'ક', 'ખ', 'ગ', 'ઘ', 'ઙ', 'ચ', 'છ', 'જ', 'ઝ', 'ઞ', 'ટ', 'ઠ', 'ડ', 'ઢ', 'ણ', 'ત', 'થ', 'દ', 'ધ', 'ન', 'પ', 'ફ', 'બ', 'ભ', 'મ', 'ય', 'ર', 'લ', 'વ', 'શ', 'ષ', 'સ', 'હ', 'ળ', 'ક્ષ', 'જ્ઞ', 'ં', 'ઃ', 'ઁ', 'ા', 'િ', 'ી', 'ુ', 'ૂ', 'ે', 'ૈ', 'ો', 'ૌ', '્', 'ૐ', '{', '|', '}', '~', PADDING_TOKEN, END_TOKEN ] index_to_gujarati = {k:v for k,v in enumerate(gujarati_vocabulary)} gujarati_to_index = {v:k for k,v in enumerate(gujarati_vocabulary)} index_to_english = {k:v for k,v in enumerate(english_vocabulary)} english_to_index = {v:k for k,v in enumerate(english_vocabulary)} d_model = 512 # batch_size = 64 ffn_hidden = 2048 num_heads = 8 drop_prob = 0.1 num_layers = 6 max_sequence_length = 200 kn_vocab_size = len(gujarati_vocabulary) # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') device = torch.device('cpu') transformer = Transformer(d_model, ffn_hidden, num_heads, drop_prob, num_layers, max_sequence_length, kn_vocab_size, english_to_index, gujarati_to_index, START_TOKEN, END_TOKEN, PADDING_TOKEN) model_file = hf_hub_download(repo_id="yashAI007/English_to_Gujarati_Translation", filename="model.pth") model = torch.load(model_file,map_location='cpu') transformer.load_state_dict(model['model_state_dict']) transformer.to(device) transformer.eval() NEG_INFTY = -1e9 def create_masks(eng_batch, kn_batch): num_sentences = len(eng_batch) look_ahead_mask = torch.full([max_sequence_length, max_sequence_length] , True) look_ahead_mask = torch.triu(look_ahead_mask, diagonal=1) encoder_padding_mask = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False) decoder_padding_mask_self_attention = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False) decoder_padding_mask_cross_attention = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False) for idx in range(num_sentences): eng_sentence_length, kn_sentence_length = len(eng_batch[idx]), len(kn_batch[idx]) eng_chars_to_padding_mask = np.arange(eng_sentence_length + 1, max_sequence_length) kn_chars_to_padding_mask = np.arange(kn_sentence_length + 1, max_sequence_length) encoder_padding_mask[idx, :, eng_chars_to_padding_mask] = True encoder_padding_mask[idx, eng_chars_to_padding_mask, :] = True decoder_padding_mask_self_attention[idx, :, kn_chars_to_padding_mask] = True decoder_padding_mask_self_attention[idx, kn_chars_to_padding_mask, :] = True decoder_padding_mask_cross_attention[idx, :, eng_chars_to_padding_mask] = True decoder_padding_mask_cross_attention[idx, kn_chars_to_padding_mask, :] = True encoder_self_attention_mask = torch.where(encoder_padding_mask, NEG_INFTY, 0) decoder_self_attention_mask = torch.where(look_ahead_mask + decoder_padding_mask_self_attention, NEG_INFTY, 0) decoder_cross_attention_mask = torch.where(decoder_padding_mask_cross_attention, NEG_INFTY, 0) return encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask transformer.eval() def translate(eng_sentence): print("English Sentence:",eng_sentence) eng_sentence = (eng_sentence.lower(),) kn_sentence = ("",) for word_counter in range(max_sequence_length): encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask= create_masks(eng_sentence, kn_sentence) predictions = transformer(eng_sentence, kn_sentence, encoder_self_attention_mask.to(device), decoder_self_attention_mask.to(device), decoder_cross_attention_mask.to(device), enc_start_token=False, enc_end_token=False, dec_start_token=True, dec_end_token=False) next_token_prob_distribution = predictions[0][word_counter] next_token_index = torch.argmax(next_token_prob_distribution).item() next_token = index_to_gujarati[next_token_index] kn_sentence = (kn_sentence[0] + next_token, ) if next_token == END_TOKEN: break print("Gujarati Sentence:",kn_sentence[0][:-5],'\n') return kn_sentence[0][:-5] examples = [ ["Hello, how are you?"], ["What is your name?"], ["I love programming."], ["This is a beautiful day."], ["Can you help me with this?"], ["What time is it?"], ["I am learning data science."], ["Where is the nearest bus stop?"], ["I enjoy reading books."], ["Thank you for your help."] ] description = "This tool translates English sentences into Gujarati. Please enter your text above to get started!" iface = gr.Interface(fn=translate, inputs="text", outputs="text", title="English to Gujarati Translation", examples=examples, description=description, ) if __name__ == "__main__": iface.launch()