Spaces:

innev
/

GPT2-large

Sleeping

App Files Files Community

JunzhaoSun commited on May 17, 2023

Commit

634b9bc

1 Parent(s): 14f33b1

对比多种搜索方式

Browse files

Files changed (1) hide show

app.py +66 -15

app.py CHANGED Viewed

@@ -9,9 +9,11 @@ import os
 checkpoint = "gpt2-large"
 # checkpoint = "/innev/open-ai/huggingface/models/gpt2-large"
 tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForCausalLM.from_pretrained(checkpoint)
-def generate(text):
     # text = 'Who was Jim Henson ? Jim Henson was a'
     # 编码一段文本
@@ -22,7 +24,6 @@ def generate(text):
     # shape为 torch.Size([1, 11])
     tokens_tensor = torch.tensor([indexed_tokens])
     # 设置为evaluation模式，去取消激活dropout等模块。
     # 在huggingface/transformers框架中，默认就是eval模式
     model.eval()
@@ -51,21 +52,61 @@ def generate(text):
     return predicted_text
-def doloop(prompts):
     text = prompts
     total = 1
     while text[-1] != "." and total < 20:
-        text = generate(text)
         print("Index %s: %s" % (total, text))
         total = total + 1
     return text, total
 title = "GPT2 large"
 description = """
 本例为使用GPT2模型的简单推测语句DEMO，输入前面的句子，推测出后面的句子。
@@ -73,21 +114,31 @@ description = """
 """
 examples = [
-    ["Who was Jim Henson ? Jim Henson was a", None],
-    ["My name is Julien and I like to", None],
-    ["My name is Thomas and my main", None],
-    ["My name is Mariama, my favorite", None],
-    ["My name is Clara and I am", None],
 ]
 gr.Interface(
-    fn=doloop,
-    inputs=gr.Text(label="输入前置语句"),
     outputs=[
-        gr.Text(label="补全后输出"),
         gr.Text(label="循环次数"),
     ],
     title=title,
     description=description,
     examples=examples,
 ).launch()

 checkpoint = "gpt2-large"
 # checkpoint = "/innev/open-ai/huggingface/models/gpt2-large"
 tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+# model = AutoModelForCausalLM.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint, pad_token_id=tokenizer.eos_token_id)
+# 简单生成
+def sampleGen(text):
     # text = 'Who was Jim Henson ? Jim Henson was a'
     # 编码一段文本
     # shape为 torch.Size([1, 11])
     tokens_tensor = torch.tensor([indexed_tokens])
     # 设置为evaluation模式，去取消激活dropout等模块。
     # 在huggingface/transformers框架中，默认就是eval模式
     model.eval()
     return predicted_text
+# 关键词预测 生成文本
+def loopGen(prompts):
     text = prompts
     total = 1
     while text[-1] != "." and total < 20:
+        text = sampleGen(text)
         print("Index %s: %s" % (total, text))
         total = total + 1
     return text, total
+# 贪心搜索 生成文本
+def greedySearch(prompts):
+    input_ids = tokenizer(prompts, return_tensors='pt').input_ids
+    # generate the result with greedy search
+    output = model.generate(input_ids, max_length=128)
+    text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return text, 1
+# 随机方法 生成文本
+def randomSearch(prompts):
+    input_ids = tokenizer(prompts, return_tensors='pt').input_ids
+    # generate the result with random search
+    torch.manual_seed(0.)
+    output = model.generate(input_ids, do_sample=True, max_length=128, top_p=0.95, top_k=0)
+    text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return text, 1
+# 对比搜索 生成文本
+def contrastiveSearch(prompts):
+    input_ids = tokenizer(prompts, return_tensors='pt').input_ids
+    # generate the result with contrastive search
+    output = model.generate(input_ids, penalty_alpha=0.6, top_k=4, max_length=512)
+    text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return text, 1
+def predict(searchType, prompts):
+    if searchType == "贪心搜索":
+        return greedySearch(prompts)
+    elif searchType == "随机方法":
+        return randomSearch(prompts)
+    elif searchType == "对比搜索":
+        return contrastiveSearch(prompts)
+    else:
+        return loopGen(prompts)
 title = "GPT2 large"
+searchMapping = ['关键词预测', '贪心搜索', '随机方法', '对比搜索']
 description = """
 本例为使用GPT2模型的简单推测语句DEMO，输入前面的句子，推测出后面的句子。
 """
 examples = [
+    [None, "DeepMind Company is", None],
+    [None, "Who was Jim Henson ? Jim Henson was a", None],
+    [None, "My name is Julien and I like to", None],
+    [None, "My name is Thomas and my main", None],
+    [None, "My name is Mariama, my favorite", None],
+    [None, "My name is Clara and I am", None],
 ]
+article = """
+## 文章参考
+- [在 Transformers 中使用对比搜索生成可媲美人类水平的文本 🤗](https://mp.weixin.qq.com/s/mydQLDlGUzFJuNBCIYc3CA)
+"""
 gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Radio(label="搜索方法", choices=searchMapping, value="关键词预测"),
+        gr.Text(label="输入前置语句"),
+    ],
     outputs=[
+        gr.Text(label="生成文本"),
         gr.Text(label="循环次数"),
     ],
     title=title,
     description=description,
+    article=article,
     examples=examples,
 ).launch()