File size: 3,428 Bytes
896d0ef
 
 
 
de3a317
7d5a525
896d0ef
 
 
296b6b4
 
 
 
 
896d0ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11dcb45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
896d0ef
11dcb45
 
 
 
 
 
 
 
 
 
 
 
 
896d0ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1090bad
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
import requests
from transformers import pipeline
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk import sent_tokenize
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import pipeline
import os
from dotenv import load_dotenv, dotenv_values 
load_dotenv() 

api_token_header = os.getenv("api_token_header")

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text2text-generation", model="SnypzZz/Llama2-13b-Language-translate", use_fast = False)

tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang="en_XX")
model = None
model_loaded = False


def load_model():
    global model, model_loaded
    model = MBartForConditionalGeneration.from_pretrained("SnypzZz/Llama2-13b-Language-translate")
    model_loaded =True
    return model

def translation(text,dest_lang,dest_lang_code, src_lang_code):

    if(dest_lang_code == src_lang_code):
        return "Please select different languages to translate between."

    headers = {"Authorization": f"Bearer {api_token_header}"}

    # # Bengali Done
    # if(dest_lang == "Bengali" and src_lang_code == "en_XX"):
    #     API_URL = "https://api-inference.huggingface.co/models/csebuetnlp/banglat5_nmt_en_bn"
    #     def query(payload):
    #         response = requests.post(API_URL, headers=headers, json=payload)
    #         return response.json()
    #     output = query({
    #         "inputs": text,
    #     })
    #     print(output)
    #     return output[0]['translation_text']
    # else:
    global model
    if model:
        pass
    else:
        model = load_model()
    loaded_model = model
    tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang=src_lang_code)
    #model_inputs = tokenizer(text, return_tensors="pt")
    loaded_model_inputs = tokenizer(text, return_tensors="pt")
    # translate
    generated_tokens = loaded_model.generate(
        **loaded_model_inputs,
        forced_bos_token_id=tokenizer.lang_code_to_id[dest_lang_code]
    )
    output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    print(output)
    return output[0]
    

def main_translation(text,dest_lang_code,src_lang_code):

    codes = {"en_XX":"English","bn_IN":"Bengali", "en_GB":"English","gu_IN":"Gujarati","hi_IN":"Hindi","ta_IN":"Tamil","te_IN":"Telugu","mr_IN":"Marathi"}
    dest_lang = codes[dest_lang_code]
    src_lang = codes[src_lang_code]

    sentences = sent_tokenize(text)
    output = ""
    for line in sentences:
        output += translation(line,dest_lang,dest_lang_code, src_lang_code)
    return {"output":output}


def test(text, src, dest):
  ans = main_translation(text,dest,src)
  return ans['output']
demo = gr.Interface(
    test,
    ["textbox",
     gr.Dropdown(
            [("English", "en_XX"), ("Hindi","hi_IN"), ("Bengali","bn_IN"), ("Gujarati","gu_IN"), ("Tamil","ta_IN"), ("Telugu","te_IN"), ("Marathi","mr_IN")], label="Source", info="Select the Source Language!"
        ),
     gr.Dropdown(
            [("English", "en_XX"), ("Hindi","hi_IN"), ("Bengali","bn_IN"), ("Gujarati","gu_IN"), ("Tamil","ta_IN"), ("Telugu","te_IN"), ("Marathi","mr_IN")], label="Destination", info="Select the Destination Language!"
        ),
     ],
    outputs=["textbox"],
)

demo.launch(share=True)