File size: 9,375 Bytes
0c4b135
5c2f8ce
 
80b26db
21d73b8
 
80b26db
21d73b8
 
 
 
6fc805b
5c2f8ce
98e1cb0
d133fb5
 
4dec23b
d133fb5
21d73b8
5c2f8ce
6fc805b
 
 
 
 
 
 
21d73b8
6fc805b
0c4b135
21d73b8
 
 
0c4b135
 
 
03113eb
0c4b135
 
21d73b8
 
 
 
 
07c7708
 
 
 
21d73b8
 
 
 
 
07c7708
 
 
 
 
 
220d18f
 
 
 
07c7708
 
 
 
 
 
 
220d18f
 
 
 
0c4b135
220d18f
92e2176
 
 
 
220d18f
 
8c6b14f
8159951
4b632de
8159951
 
220d18f
92e2176
 
 
 
 
79b9061
 
 
 
92e2176
 
 
79b9061
 
 
 
 
1afaaf2
 
 
 
 
79b9061
 
 
 
 
 
 
 
1afaaf2
 
 
 
 
 
15b206d
 
 
 
1afaaf2
 
 
 
 
 
 
 
 
 
 
 
15b206d
 
 
 
 
e85cffd
 
 
0713efa
15b206d
5389890
 
 
 
15b206d
e85cffd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4922309
e85cffd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
from transformers import AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, AutoTokenizer, PegasusForConditionalGeneration, PegasusTokenizer, pipeline
import gradio as grad
import ast

# mdl_name = "deepset/roberta-base-squad2"
# my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)

# model_translate_name = 'danhsf/m2m100_418M-finetuned-kde4-en-to-pt_BR'
# model_translate = AutoModelForSeq2SeqLM.from_pretrained(model_translate_name)
# model_translate_token = AutoTokenizer.from_pretrained(model_translate_name)
# translate_pipeline = pipeline('translation', model=model_translate_name)

def answer_question(question,context):
    text= "{"+"'question': '"+question+"','context': '"+context+"'}"
    di=ast.literal_eval(text)
    response = my_pipeline(di)
    print('response', response)
    return response
#grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()


def translate(text):
    inputs = model_translate_token(text, return_tensor='pt')
    translate_output = model_translate.generate(**inputs)
    response = model_translate_token(translate_output[0], skip_special_tokens=True)
    #response = translate_pipeline(text)
    return response
# grad.Interface(translate, inputs=['text',], outputs='text').launch()


# mdl_name = "google/pegasus-xsum"
# pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name)
# mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name)

def summarize(text):
    tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt")
    txt_summary = mdl.generate(**tokens, num_return_sequences=5, max_length=200, temperature=1.5,num_beams=10)
    response = pegasus_tkn.batch_decode(txt_summary, skip_special_tokens=True)
    return response
    
# txt=grad.Textbox(lines=10, label="English", placeholder="English Text here")
# out=grad.Textbox(lines=10, label="Summary")
# grad.Interface(summarize, inputs=txt, outputs=out).launch()

# ZeroShotClassification using pipeline
# from transformers import pipeline
# import gradio as grad
# zero_shot_classifier = pipeline("zero-shot-classification")
def classify(text,labels):
    classifer_labels = labels.split(",")
    #["software", "politics", "love", "movies", "emergency", "advertisment","sports"]
    response = zero_shot_classifier(text,classifer_labels)
    return response
# txt=grad.Textbox(lines=1, label="English", placeholder="text to be classified")
# labels=grad.Textbox(lines=1, label="Labels", placeholder="comma separated labels")
# out=grad.Textbox(lines=1, label="Classification")
# grad.Interface(classify, inputs=[txt,labels], outputs=out).launch()

# Text classification using BartForSequenceClassification
# from transformers import BartForSequenceClassification, BartTokenizer
# import gradio as grad
# bart_tkn = BartTokenizer.from_pretrained('facebook/bart-large-mnli')
# mdl = BartForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
def classify(text,label):
    tkn_ids = bart_tkn.encode(text, label, return_tensors='pt')
    tkn_lgts = mdl(tkn_ids)[0]
    entail_contra_tkn_lgts = tkn_lgts[:,[0,2]]
    probab = entail_contra_tkn_lgts.softmax(dim=1)
    response =  probab[:,1].item() * 100
    return response
# txt=grad.Textbox(lines=1, label="English", placeholder="text to be classified")
# labels=grad.Textbox(lines=1, label="Label", placeholder="Input a Label")
# out=grad.Textbox(lines=1, label="Probablity of label being true is")
# grad.Interface(classify, inputs=[txt,labels], outputs=out).launch()

# GPT2
# from transformers import GPT2LMHeadModel,GPT2Tokenizer
# import gradio as grad
# mdl = GPT2LMHeadModel.from_pretrained('gpt2')
# gpt2_tkn=GPT2Tokenizer.from_pretrained('gpt2')
def generate(starting_text):
    tkn_ids = gpt2_tkn.encode(starting_text, return_tensors = 'pt')
    gpt2_tensors = mdl.generate(tkn_ids,max_length=100,no_repeat_ngram_size=True,num_beams=3,do_sample=True)
    response=""
    #response = gpt2_tensors
    for i, x in enumerate(gpt2_tensors):
       response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}"
    return response
# txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
# out=grad.Textbox(lines=1, label="Generated Text")
# grad.Interface(generate, inputs=txt, outputs=out).launch()

#DistlGPT2
# from transformers import pipeline, set_seed
# import gradio as grad
# gpt2_pipe = pipeline('text-generation', model='distilgpt2')
# set_seed(42)
def generateDistlGPT2(starting_text):
    response= gpt2_pipe(starting_text, max_length=20, num_return_sequences=5)
    return response
# txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
# out=grad.Textbox(lines=1, label="Generated Text")
# grad.Interface(generateDistlGPT2, inputs=txt, outputs=out).launch()

#Text Generation
#Question Generation
# from transformers import AutoModelWithLMHead, AutoTokenizer
# import gradio as grad
# text2text_tkn = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
# mdl = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
def text2text(context,answer):
    input_text = "answer: %s  context: %s </s>" % (answer, context)
    features = text2text_tkn ([input_text], return_tensors='pt')
    output = mdl.generate(input_ids=features['input_ids'],
               attention_mask=features['attention_mask'],
               max_length=64)
    response=text2text_tkn.decode(output[0])
    return response
# context=grad.Textbox(lines=10, label="English", placeholder="Context")
# ans=grad.Textbox(lines=1, label="Answer")
# out=grad.Textbox(lines=1, label="Genereated Question")
# grad.Interface(text2text, inputs=[context,ans], outputs=out).launch()

#T5 summaryzer
# from transformers import AutoTokenizer, AutoModelWithLMHead
# import gradio as grad
# text2text_tkn = AutoTokenizer.from_pretrained("deep-learning-analytics/wikihow-t5-small")
# mdl = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/wikihow-t5-small")
def text2text_summary(para):
    initial_txt = para.strip().replace("\n","")
    tkn_text = text2text_tkn.encode(initial_txt, return_tensors="pt")
    tkn_ids = mdl.generate(
            tkn_text,
            max_length=250,
            num_beams=5,
            repetition_penalty=2.5,
            early_stopping=True
        )
    response = text2text_tkn.decode(tkn_ids[0], skip_special_tokens=True)
    return response
# para=grad.Textbox(lines=10, label="Paragraph", placeholder="Copy paragraph")
# out=grad.Textbox(lines=1, label="Summary")
# grad.Interface(text2text_summary, inputs=para, outputs=out).launch()

# T5 Translate
# from transformers import T5ForConditionalGeneration, T5Tokenizer
# import gradio as grad
# text2text_tkn= T5Tokenizer.from_pretrained("t5-small")
# mdl = T5ForConditionalGeneration.from_pretrained("t5-small")
def text2text_translation(text):
     inp = "translate English to Portuguese: "+text
     enc = text2text_tkn(inp, return_tensors="pt", max_length=512, truncation=True)
     tokens = mdl.generate(**enc, max_length=100, num_return_sequences=1, early_stopping=True)
     response=text2text_tkn.decode(tokens[0], skip_special_tokens=True)
     return response
# para=grad.Textbox(lines=1, label="English Text", placeholder="Text in English")
# out=grad.Textbox(lines=1, label="Portuguese Translation")
# grad.Interface(text2text_translation, inputs=para, outputs=out).launch()



# ChatBot
from transformers import AutoModelForCausalLM, AutoTokenizer,BlenderbotForConditionalGeneration
import torch
chat_tkn = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
mdl = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
#chat_tkn = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
#mdl = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill")


def converse(user_input, chat_history=[]):
    user_input_ids = chat_tkn(user_input + chat_tkn.eos_token, return_tensors='pt').input_ids
    # keep history in the tensor
    bot_input_ids = torch.cat([torch.LongTensor(chat_history), user_input_ids], dim=-1)
    # get response
    chat_history = mdl.generate(bot_input_ids, max_length=1000, pad_token_id=chat_tkn.eos_token_id).tolist()
    print (chat_history)
    response = chat_tkn.decode(chat_history[0]).split("<|endoftext|>")
    print("starting to print response")
    print(response)
    # html for display
    html = "<div class='mychat'>"
    for x, mesg in enumerate(response):
        if x%2!=0 :
           mesg="Alicia:"+mesg
           clazz="alicia"
        else :
           clazz="user"
        print("value of x")
        print(x)
        print("message")
        print (mesg)
        html += "<div class='mesg {}'> {}</div>".format(clazz, mesg)
    html += "</div>"
    print(html)
    return html, chat_history
import gradio as grad
css = """
.mychat {display:flex;flex-direction:column}
.mesg {padding:5px;margin-bottom:5px;border-radius:5px;width:75%}
.mesg.user {background-color:lightblue;color:white}
.mesg.alicia {background-color:orange;color:white,align-self:self-end}
.footer {display:none !important}
"""
text=grad.inputs.Textbox(placeholder="Lets chat")
grad.Interface(fn=converse,
             theme="default",
             inputs=[text, "state"],
             outputs=["html", "state"],
             css=css).launch()