# 1. The RoBERTa base model is used, fine-tuned using the SQuAD 2.0 dataset. # It’s been trained on question-answer pairs, including unanswerable questions, for the task of question and answering. # from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline # import gradio as grad # import ast # mdl_name = "deepset/roberta-base-squad2" # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name) # def answer_question(question,context): # text= "{"+"'question': '"+question+"','context': '"+context+"'}" # di=ast.literal_eval(text) # response = my_pipeline(di) # return response # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch() #--------------------------------------------------------------------------------- # 2. Same task, different model. # from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline # import gradio as grad # import ast # mdl_name = "distilbert-base-cased-distilled-squad" # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name) # def answer_question(question,context): # text= "{"+"'question': '"+question+"','context': '"+context+"'}" # di=ast.literal_eval(text) # response = my_pipeline(di) # return response # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch() #--------------------------------------------------------------------------------- # 3. Different task: language translation. # from transformers import pipeline # import gradio as grad # First model translates English to German. # mdl_name = "Helsinki-NLP/opus-mt-en-de" # opus_translator = pipeline("translation", model=mdl_name) # def translate(text): # response = opus_translator(text) # return response # grad.Interface(translate, inputs=["text",], outputs="text").launch() #---------------------------------------------------------------------------------- # 4. Language translation without pipeline API. # Second model translates English to French. # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # import gradio as grad # mdl_name = "Helsinki-NLP/opus-mt-en-fr" # mdl = AutoModelForSeq2SeqLM.from_pretrained(mdl_name) # my_tkn = AutoTokenizer.from_pretrained(mdl_name) # def translate(text): # inputs = my_tkn(text, return_tensors="pt") # trans_output = mdl.generate(**inputs) # response = my_tkn.decode(trans_output[0], skip_special_tokens=True) # return response # txt = grad.Textbox(lines=1, label="English", placeholder="English Text here") # out = grad.Textbox(lines=1, label="French") # grad.Interface(translate, inputs=txt, outputs=out).launch() #----------------------------------------------------------------------------------- # 5. Different task: abstractive summarization # Abstractive summarization is more difficult than extractive summarization, # which pulls key sentences from a document and combines them to form a “summary.” # Because abstractive summarization involves paraphrasing words, it is also more time-consuming; # however, it has the potential to produce a more polished and coherent summary. # from transformers import PegasusForConditionalGeneration, PegasusTokenizer # import gradio as grad # mdl_name = "google/pegasus-xsum" # pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name) # mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name) # def summarize(text): # tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt") # txt_summary = mdl.generate(**tokens) # response = pegasus_tkn.batch_decode(txt_summary, skip_special_tokens=True) # return response # txt = grad.Textbox(lines=10, label="English", placeholder="English Text here") # out = grad.Textbox(lines=10, label="Summary") # grad.Interface(summarize, inputs=txt, outputs=out).launch() #------------------------------------------------------------------------------------------ # 6. Same model with some tuning with some parameters: num_return_sequences=5, max_length=200, temperature=1.5, num_beams=10 from transformers import PegasusForConditionalGeneration, PegasusTokenizer import gradio as grad mdl_name = "google/pegasus-xsum" pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name) mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name) def summarize(text): tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt") translated_txt = mdl.generate(**tokens, num_return_sequences=5, max_length=200, temperature=1.5, num_beams=10) response = pegasus_tkn.batch_decode(translated_txt, skip_special_tokens=True) return response txt = grad.Textbox(lines=10, label="English", placeholder="English Text here") out = grad.Textbox(lines=10, label="Summary") grad.Interface(summarize, inputs=txt, outputs=out).launch()