File size: 5,196 Bytes
8655f82
 
 
039d611
 
 
 
8655f82
039d611
 
a5b34f7
039d611
8655f82
039d611
8655f82
 
a5b34f7
8655f82
 
 
 
 
 
 
 
 
039d611
 
 
 
 
 
 
88976aa
039d611
 
8655f82
 
 
 
 
039d611
0872eb7
f6fe4f2
 
0872eb7
 
 
 
 
 
 
f6fe4f2
0872eb7
 
 
 
 
 
 
f6fe4f2
0872eb7
039d611
d15e848
 
e8d705f
f6fe4f2
 
 
 
0872eb7
 
5727bd1
88976aa
5727bd1
87fc391
8285b48
0872eb7
f6fe4f2
 
0872eb7
 
f6fe4f2
 
0872eb7
 
 
87fc391
 
f6fe4f2
87fc391
1118a37
1b1ed88
1118a37
87fc391
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import torch
import numpy as np
from torch.nn.utils.rnn import pad_sequence
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load the model and tokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
embedding_model = SentenceTransformer('AnnaWegmann/Style-Embedding', device='cpu')
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model.to(device)

def get_target_style_embeddings(target_texts_batch):
    all_target_texts = [target_text for target_texts in target_texts_batch for target_text in target_texts]
    embeddings = embedding_model.encode(all_target_texts, batch_size=len(all_target_texts), convert_to_tensor=True, show_progress_bar=False)
    lengths = [len(target_texts) for target_texts in target_texts_batch]
    split_embeddings = torch.split(embeddings, lengths)
    padded_embeddings = pad_sequence(split_embeddings, batch_first=True, padding_value=0.0)
    mask = (torch.arange(padded_embeddings.size(1))[None, :] < torch.tensor(lengths)[:, None]).to(torch.float32).unsqueeze(-1)
    mean_embeddings = torch.sum(padded_embeddings * mask, dim=1) / mask.sum(dim=1)
    return mean_embeddings.cpu().numpy()

def run_tinystyler_batch(source_texts, target_example_texts_batch, reranking, temperature, top_p):
    inputs = tokenizer(source_texts, return_tensors="pt")
    
    # Generate the output with specified temperature and top_p
    output = model.generate(
        inputs["input_ids"], 
        do_sample=True, 
        temperature=temperature, 
        top_p=top_p,
        max_length=1024
    )
    
    generated_texts = tokenizer.decode_batch(output, skip_special_tokens=True)
    return generated_texts
    
def run_tinystyler(source_text, target_example_texts, reranking, temperature, top_p):
    return run_tinystyler_batch([source_text], [target_example_texts], reranking, temperature, top_p)[0]

# Preset examples with cached generations
preset_examples = {
    "Example 1": {
        "source_text": "Once upon a time in a small village",
        "target_example_texts": "In a land far away, there was a kingdom ruled by a wise king. Every day, the people of the kingdom would gather to listen to the king's stories, which were full of wisdom and kindness.",
        "reranking": 5,
        "temperature": 1.0,
        "top_p": 1.0,
        "output": "Once upon a time in a small village in a land far away, there was a kingdom ruled by a wise king. Every day, the people of the kingdom would gather to listen to the king's stories, which were full of wisdom and kindness."
    },
    "Example 2": {
        "source_text": "The quick brown fox",
        "target_example_texts": "A nimble, chocolate-colored fox swiftly darted through the emerald forest, weaving between trees with grace and agility.",
        "reranking": 5,
        "temperature": 0.9,
        "top_p": 0.9,
        "output": "The quick brown fox, a nimble, chocolate-colored fox, swiftly darted through the emerald forest, weaving between trees with grace and agility."
    }
}

# Define Gradio interface
with gr.Blocks(theme="ParityError/[email protected]") as demo:
    gr.Markdown("# TinyStyler Demo")
    gr.Markdown("Style transfer the source text into the target style, given some example texts of the target style. You can adjust re-ranking and top_p to your desire to control the quality of style transfer. A higher re-ranking value will generally result in better generations, at slower speed.")
    
    with gr.Row():
        example_dropdown = gr.Dropdown(label="Examples", choices=list(preset_examples.keys()))
    
    source_text = gr.Textbox(lines=3, placeholder="Enter the source text to transform into the target style...", label="Source Text")
    target_example_texts = gr.Textbox(lines=5, placeholder="Enter example texts of the target style (one per line)...", label="Example Texts of the Target Style")
    reranking = gr.Slider(1, 10, value=5, step=1, label="Re-ranking")
    temperature = gr.Slider(0.1, 2.0, value=1.0, step=0.1, label="Temperature")
    top_p = gr.Slider(0.0, 1.0, value=1.0, step=0.1, label="Top-P")
    
    output = gr.Textbox(lines=5, placeholder="Click 'Generate' to transform the source text into the target style.", label="Output", interactive=False)

    def set_example(example_name):
        example = preset_examples[example_name]
        return example["source_text"], example["target_example_texts"], example["reranking"], example["temperature"], example["top_p"], example["output"]

    example_dropdown.change(
        set_example,
        inputs=[example_dropdown],
        outputs=[source_text, target_example_texts, reranking, temperature, top_p, output]
    )
    
    btn = gr.Button("Generate")
    btn.click(run_tinystyler, [source_text, target_example_texts, reranking, temperature, top_p], output)

    # Initialize the fields with the first example
    example_dropdown.value, (source_text.value, target_example_texts.value, reranking.value, temperature.value, top_p.value, output.value) = list(preset_examples.keys())[0], set_example(list(preset_examples.keys())[0])

demo.launch()