import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained('humarin/chatgpt_paraphraser_on_T5_base', cache_dir='./Models') model = AutoModelForSeq2SeqLM.from_pretrained('humarin/chatgpt_paraphraser_on_T5_base', cache_dir='./Models') torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8, inplace=True) def paraphrase(model, text, max_length=128, num_return_sequences=5, num_beams=25, temperature=0.7): input_ids = tokenizer( f'paraphrase: {text}', return_tensors="pt", padding="longest", max_length=max_length, truncation=True, ).input_ids outputs = model.generate( input_ids, temperature=temperature, repetition_penalty=1.5, num_return_sequences=num_return_sequences, no_repeat_ngram_size=5, num_beams=num_beams, max_length=max_length ) res = tokenizer.batch_decode(outputs, skip_special_tokens=True) return res def fn(text, results_num=5, beams_num=25, temperature=0.7): return '\n'.join(paraphrase(model, text, num_return_sequences=results_num, num_beams=beams_num, temperature=temperature)) demo = gr.Interface( fn=fn, inputs=[gr.Textbox(lines=3, placeholder='Enter Text To Paraphrase'), gr.Slider(minimum=1, maximum=10, step=1, value=5), gr.Slider(minimum=1, maximum=50, step=1, value=25), gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.7)], outputs=['text'], ) demo.launch(share=True)