NMTKD / combined_proj.py
sakharamg's picture
Uploading all files
158b61b
raw
history blame
3.29 kB
import streamlit as st
import pandas as pd
import numpy as np
import re
from datetime import datetime
import subprocess
from fairseq.models.transformer import TransformerModel
time_interval=0
st.title('Knowledge Distillation in Neural Machine Translation')
title = st.text_input('English Text', 'I welcome you to the demonstration.')
if st.button('En-Hi Teacher'):
time_1 = datetime.now()
#subprocess.run('source ~/miniconda3/etc/profile.d/conda.sh && conda init bash')
file1 = open("translation/input-files/flores/eng.devtest","w")
file1.write(title)
file1.close()
subprocess.run('cd translation && bash -i translate-en-hi.sh && cd ..', shell=True)
time_2 = datetime.now()
time_interval = time_2 - time_1
file1 = open("translation/output-translation/flores/test-flores.hi","r")
st.write('Hindi Translation: ',file1.read())
file1.close()
st.write('Inference Time: ',time_interval)
if st.button('En-Hi Student'):
#title = re.sub('([.,!?()])', r' \1 ', title)
#title = re.sub('\s{2,}', ' ', title)
time_1 = datetime.now()
zh2en = TransformerModel.from_pretrained('Student_en_hi/out_distill/tokenized.en-hi/', checkpoint_file='../../checkpoint_use.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
time_2 = datetime.now()
time_interval = time_2 - time_1
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
st.write('Inference Time: ',time_interval)
if st.button('Law En-Hi Teacher'):
time_1 = datetime.now()
zh2en = TransformerModel.from_pretrained('law/out/tokenized.en-hi/', checkpoint_file='../../checkpoint_best.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
time_2 = datetime.now()
time_interval = time_2 - time_1
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
st.write('Inference Time: ',time_interval)
if st.button('Sports En-Hi Teacher'):
time_1 = datetime.now()
zh2en = TransformerModel.from_pretrained('sports/out/tokenized.en-hi/', checkpoint_file='../../checkpoint_best.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
time_2 = datetime.now()
time_interval = time_2 - time_1
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
st.write('Inference Time: ',time_interval)
if st.button('Tourism En-Hi Teacher'):
time_1 = datetime.now()
zh2en = TransformerModel.from_pretrained('tourism/out/tokenized.en-hi/', checkpoint_file='../../checkpoint_best.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
time_2 = datetime.now()
time_interval = time_2 - time_1
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
st.write('Inference Time: ',time_interval)
if st.button('Multi-Domain En-Hi Student'):
time_1 = datetime.now()
zh2en = TransformerModel.from_pretrained('multi/out/tokenized.en-hi/', checkpoint_file='../../checkpoint_best.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
time_2 = datetime.now()
time_interval = time_2 - time_1
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
st.write('Inference Time: ',time_interval)