import gradio as gr from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import fasttext # Initialize fastText model model_path = 'lid.323.ftz' language_model = fasttext.load_model(model_path) model_path_translation = "anzorq/m2m100_418M_ft_ru-kbd_44K" tokenizer = AutoTokenizer.from_pretrained(model_path_translation) model = AutoModelForSeq2SeqLM.from_pretrained(model_path_translation, use_safetensors=True) def translate(text, num_beams=4, num_return_sequences=4): # Detect language languages, _ = language_model.predict(text, k=1) detected_language = languages[0].replace("__label__", "") inputs = tokenizer(text, return_tensors="pt") num_return_sequences = min(num_return_sequences, num_beams) translated_tokens = model.generate( **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences ) translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens] return detected_language, text, translations title = "Russian-Circassian translator demo" article = "
Want to help? Join the Discord server
" num_beams = gr.inputs.Slider(2, 10, step=1, label="Number of beams", default=4) num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned sentences", default=4) gr.Interface( fn=translate, inputs=["text", num_beams, num_return_sequences], outputs=["text", "text", gr.Textbox()], titles=["Detected Language", "Input", "Translations"], title=title, article=article).launch() # import gradio as gr # title = "Русско-черкесский переводчик" # description = "Demo of a Russian-Circassian (Kabardian dialect) translator.