VladimirVorobev
commited on
Commit
•
d601354
1
Parent(s):
b79257d
Update README.md
Browse files
README.md
CHANGED
@@ -26,17 +26,29 @@ tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_bas
|
|
26 |
|
27 |
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)
|
28 |
|
29 |
-
def paraphrase(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
input_ids = tokenizer(
|
31 |
-
f'paraphrase: {
|
32 |
return_tensors="pt", padding="longest",
|
33 |
max_length=max_length,
|
34 |
truncation=True,
|
35 |
-
).input_ids
|
36 |
|
37 |
outputs = model.generate(
|
38 |
-
input_ids, temperature=temperature, repetition_penalty=
|
39 |
-
num_return_sequences=num_return_sequences, no_repeat_ngram_size=
|
|
|
|
|
40 |
)
|
41 |
|
42 |
res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
@@ -53,11 +65,11 @@ paraphrase(text)
|
|
53 |
```
|
54 |
**Output:**
|
55 |
```python
|
56 |
-
['
|
57 |
-
'
|
58 |
-
'
|
59 |
-
'
|
60 |
-
'
|
61 |
```
|
62 |
|
63 |
**Input:**
|
@@ -67,17 +79,17 @@ paraphrase(text)
|
|
67 |
```
|
68 |
**Output:**
|
69 |
```python
|
70 |
-
['In May and June 2000, Rammstein
|
71 |
-
'
|
72 |
-
'
|
73 |
-
'
|
74 |
-
'In May and June 2000, Rammstein recorded Mutter in
|
75 |
```
|
76 |
|
77 |
|
78 |
**Train parameters:**
|
79 |
```python
|
80 |
-
epochs = 3
|
81 |
batch_size = 64
|
82 |
max_length = 128
|
83 |
lr = 5e-5
|
|
|
26 |
|
27 |
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)
|
28 |
|
29 |
+
def paraphrase(
|
30 |
+
question,
|
31 |
+
num_beams=5,
|
32 |
+
num_beam_groups=5,
|
33 |
+
num_return_sequences=5,
|
34 |
+
repetition_penalty=10.0,
|
35 |
+
diversity_penalty=3.0,
|
36 |
+
no_repeat_ngram_size=2,
|
37 |
+
temperature=0.7,
|
38 |
+
max_length=128
|
39 |
+
):
|
40 |
input_ids = tokenizer(
|
41 |
+
f'paraphrase: {question}',
|
42 |
return_tensors="pt", padding="longest",
|
43 |
max_length=max_length,
|
44 |
truncation=True,
|
45 |
+
).input_ids
|
46 |
|
47 |
outputs = model.generate(
|
48 |
+
input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
|
49 |
+
num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
|
50 |
+
num_beams=num_beams, num_beam_groups=num_beam_groups,
|
51 |
+
max_length=max_length, diversity_penalty=diversity_penalty
|
52 |
)
|
53 |
|
54 |
res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
|
|
65 |
```
|
66 |
**Output:**
|
67 |
```python
|
68 |
+
['Which places should I not miss when visiting New York?',
|
69 |
+
'What are the top-rated tourist destinations in New York?',
|
70 |
+
'Where should I go sightseeing in New York?',
|
71 |
+
'Can you suggest some must-see places in New York?',
|
72 |
+
'What are some must-see places in New York?']
|
73 |
```
|
74 |
|
75 |
**Input:**
|
|
|
79 |
```
|
80 |
**Output:**
|
81 |
```python
|
82 |
+
['In May and June 2000, Rammstein filmed the album Mutter in the south of France, with mixing taking place in Stockholm in October of that year.',
|
83 |
+
'The album Mutter by Rammstein was recorded in the south of France during May and June 2000, with subsequent mixing taking place in Stockholm in October of that year.',
|
84 |
+
'Mutter, the album by Rammstein, was recorded in the south of France during May and June 2000, with mixing taking place at Stockholm in October of that year.',
|
85 |
+
"Rammstein's album Mutter was produced during May and June 2000 in southern France, with mixing taking place in Stockholm from October.",
|
86 |
+
'In May and June 2000, Rammstein recorded Mutter in southern France, followed by mixing it in Stockholm in October of the same year.']
|
87 |
```
|
88 |
|
89 |
|
90 |
**Train parameters:**
|
91 |
```python
|
92 |
+
epochs = 3.5
|
93 |
batch_size = 64
|
94 |
max_length = 128
|
95 |
lr = 5e-5
|