sobir-hf's picture
Added fa-tj model and language detection
f2dafec
import torch
import streamlit as st
from model import init_model, predict
from data import Tokenizer, load_config, language_detect
MODEL_PATH_TJ_FA = 'tj-fa.pt'
MODEL_PATH_FA_TJ = 'fa-tj.pt'
config_tj_fa = load_config(MODEL_PATH_TJ_FA)
tokenizer_tj_fa = Tokenizer(config_tj_fa)
model_tj_fa = init_model(MODEL_PATH_TJ_FA)
config_fa_tj = load_config(MODEL_PATH_FA_TJ)
tokenizer_fa_tj = Tokenizer(config_fa_tj)
model_fa_tj = init_model(MODEL_PATH_FA_TJ)
user_input = st.text_area("Enter some text here", value="Он ҷо, ки висоли дӯстон аст,\nВ-оллоҳ, ки миёни хона саҳрост.")
device = "cuda" if torch.cuda.is_available() else "cpu"
# Detect language
detected_language = language_detect(user_input, tokenizer_tj_fa, tokenizer_fa_tj)
if detected_language == 'tj':
model = model_tj_fa
tokenizer = tokenizer_tj_fa
st.text('Detected language: Tajik (TJ) -> Transliterating to Persian (FA)')
else:
model = model_fa_tj
tokenizer = tokenizer_fa_tj
st.text('Detected language: Persian (FA) -> Transliterating to Tajik (TJ)')
# Run the model on the user's text and store the output
model_output = predict(model, tokenizer, user_input, device)
# Display the model's output in a text area box
st.text_area('Transliteration:', value=str(model_output), max_chars=None, key=None)