Spaces:
Runtime error
Runtime error
Update app.py
Browse filesDifferent text generation model "distilgpt2"
app.py
CHANGED
|
@@ -163,41 +163,59 @@
|
|
| 163 |
# which could capture long-term dependencies in text. Finally, we came to transformers, whose decoder architecture became popular for generative models
|
| 164 |
# used for generating text as an example.
|
| 165 |
|
| 166 |
-
from transformers import GPT2LMHeadModel,GPT2Tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
import gradio as grad
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
|
| 172 |
def generate(starting_text):
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
# When no specific parameter is specified, the model performs a greedy search to find the next word, which entails selecting the word from all of the
|
| 176 |
-
# alternatives that has the highest probability of being correct. This process is deterministic in nature, which means that resultant text is the same
|
| 177 |
-
# as before if we use the same parameters.
|
| 178 |
-
|
| 179 |
-
# The num_beams parameter does a beam search: it returns the sequences that have the highest probability, and then, when it comes time to
|
| 180 |
-
# choose, it picks the one that has the highest probability.
|
| 181 |
-
|
| 182 |
-
# The do_sample parameter select the next word at random from the probability distribution.
|
| 183 |
-
|
| 184 |
-
# The temperature parameter controls the level of greed that the generative model exhibits.
|
| 185 |
-
# If the temperature is low, the probabilities of sample classes other than the one with the highest log probability will be low.
|
| 186 |
-
# As a result, the model will probably output the text that is most correct, but it will be rather monotonous and contain only a small amount of variation.
|
| 187 |
-
# If the temperature is high, the model has a greater chance of outputting different words than those with the highest probability.
|
| 188 |
-
# The generated text will feature a greater variety of topics, but there is also an increased likelihood that it will generate nonsense text and
|
| 189 |
-
# contain grammatical errors.
|
| 190 |
-
|
| 191 |
-
# With less temperature (1.5 --> 0.1), the output becomes less variational.
|
| 192 |
-
gpt2_tensors = mdl.generate(tkn_ids, max_length=100, no_repeat_ngram_size=True, num_beams=3, do_sample=True, temperature=0.1)
|
| 193 |
-
response=""
|
| 194 |
-
#response = gpt2_tensors
|
| 195 |
-
for i, x in enumerate(gpt2_tensors):
|
| 196 |
-
response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}" # Decode tensors into text
|
| 197 |
-
return gpt2_tensors, response
|
| 198 |
|
| 199 |
txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
|
| 200 |
-
|
| 201 |
-
out_text=grad.Textbox(lines=1, label="Generated Text")
|
| 202 |
|
| 203 |
-
grad.Interface(generate, inputs=txt, outputs=
|
|
|
|
| 163 |
# which could capture long-term dependencies in text. Finally, we came to transformers, whose decoder architecture became popular for generative models
|
| 164 |
# used for generating text as an example.
|
| 165 |
|
| 166 |
+
# from transformers import GPT2LMHeadModel,GPT2Tokenizer
|
| 167 |
+
# import gradio as grad
|
| 168 |
+
|
| 169 |
+
# mdl = GPT2LMHeadModel.from_pretrained('gpt2')
|
| 170 |
+
# gpt2_tkn=GPT2Tokenizer.from_pretrained('gpt2')
|
| 171 |
+
|
| 172 |
+
# def generate(starting_text):
|
| 173 |
+
# tkn_ids = gpt2_tkn.encode(starting_text, return_tensors = 'pt')
|
| 174 |
+
|
| 175 |
+
# # When no specific parameter is specified, the model performs a greedy search to find the next word, which entails selecting the word from all of the
|
| 176 |
+
# # alternatives that has the highest probability of being correct. This process is deterministic in nature, which means that resultant text is the same
|
| 177 |
+
# # as before if we use the same parameters.
|
| 178 |
+
|
| 179 |
+
# # The num_beams parameter does a beam search: it returns the sequences that have the highest probability, and then, when it comes time to
|
| 180 |
+
# # choose, it picks the one that has the highest probability.
|
| 181 |
+
|
| 182 |
+
# # The do_sample parameter select the next word at random from the probability distribution.
|
| 183 |
+
|
| 184 |
+
# # The temperature parameter controls the level of greed that the generative model exhibits.
|
| 185 |
+
# # If the temperature is low, the probabilities of sample classes other than the one with the highest log probability will be low.
|
| 186 |
+
# # As a result, the model will probably output the text that is most correct, but it will be rather monotonous and contain only a small amount of variation.
|
| 187 |
+
# # If the temperature is high, the model has a greater chance of outputting different words than those with the highest probability.
|
| 188 |
+
# # The generated text will feature a greater variety of topics, but there is also an increased likelihood that it will generate nonsense text and
|
| 189 |
+
# # contain grammatical errors.
|
| 190 |
+
|
| 191 |
+
# # With less temperature (1.5 --> 0.1), the output becomes less variational.
|
| 192 |
+
# gpt2_tensors = mdl.generate(tkn_ids, max_length=100, no_repeat_ngram_size=True, num_beams=3, do_sample=True, temperature=0.1)
|
| 193 |
+
# response=""
|
| 194 |
+
# #response = gpt2_tensors
|
| 195 |
+
# for i, x in enumerate(gpt2_tensors):
|
| 196 |
+
# response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}" # Decode tensors into text
|
| 197 |
+
# return gpt2_tensors, response
|
| 198 |
+
|
| 199 |
+
# txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
|
| 200 |
+
# out_tensors=grad.Textbox(lines=1, label="Generated Tensors")
|
| 201 |
+
# out_text=grad.Textbox(lines=1, label="Generated Text")
|
| 202 |
+
|
| 203 |
+
# grad.Interface(generate, inputs=txt, outputs=[out_tensors, out_text]).launch()
|
| 204 |
+
|
| 205 |
+
#-----------------------------------------------------------------------------------
|
| 206 |
+
# 9. Text Generation: different model "distilgpt2"
|
| 207 |
+
|
| 208 |
+
from transformers import pipeline, set_seed
|
| 209 |
import gradio as grad
|
| 210 |
|
| 211 |
+
gpt2_pipe = pipeline('text-generation', model='distilgpt2')
|
| 212 |
+
set_seed(42)
|
| 213 |
|
| 214 |
def generate(starting_text):
|
| 215 |
+
response= gpt2_pipe(starting_text, max_length=20, num_return_sequences=5)
|
| 216 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
|
| 219 |
+
out=grad.Textbox(lines=1, label="Generated Text")
|
|
|
|
| 220 |
|
| 221 |
+
grad.Interface(generate, inputs=txt, outputs=out).launch()
|