File size: 3,859 Bytes
822e468
5afebd5
 
8aa7c89
5afebd5
f56318b
8aa7c89
5afebd5
04c3d31
5afebd5
04c3d31
 
5afebd5
822e468
04c3d31
5afebd5
 
04c3d31
5afebd5
04c3d31
5afebd5
f56318b
 
 
04c3d31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f56318b
04c3d31
 
f56318b
 
8aa7c89
 
f56318b
04c3d31
5afebd5
04c3d31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f56318b
 
04c3d31
 
 
 
 
 
f56318b
04c3d31
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import asyncio
from concurrent.futures import ThreadPoolExecutor
import requests
import gradio as gr


MAX_NEW_TOKENS = 128
TOKEN = os.environ.get("API_TOKEN", None)
URLS = [
    "https://api-inference.huggingface.co/models/google/flan-ul2",
    "https://api-inference.huggingface.co/models/google/flan-t5-xxl",
]


def fetch(session, text, api_url):
    model = api_url.split("/")[-1]
    response = session.post(api_url, json={"inputs": text, "parameters": {"max_new_tokens": MAX_NEW_TOKENS}})
    if response.status_code != 200:
        return model, None
    return model, response.json()


examples = [
    ["Please answer to the following question. Who is going to be the next Ballon d'or?"],
    ["Q: Can Barack Obama have a conversation with George Washington? Give the rationale before answering."],
    [
        "Summarize the following text: Peter and Elizabeth took a taxi to attend the night party in the city. While in the party, Elizabeth collapsed and was rushed to the hospital. Since she was diagnosed with a brain injury, the doctor told Peter to stay besides her until she gets well. Therefore, Peter stayed with her at the hospital for 3 days without leaving."
    ],
    ["Please answer the following question: What is the boiling point of water?"],
    ["Answer the following question by detailing your reasoning: Are Pokemons alive?"],
    ["Translate to German: How old are you?"],
    ["Generate a cooking recipe to make bolognese pasta:"],
    ["Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?"],
    [
        "Premise:  At my age you will probably have learnt one lesson. Hypothesis:  It's not certain how many lessons you'll learn by your thirties. Does the premise entail the hypothesis?"
    ],
    [
        "Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch and bought 6 more, how many apples do they have?"
    ],
    [
        """Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?
A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
Q: A juggler can juggle 16 balls. Half of the balls are golf balls, and half of the golf balls are blue. How many blue golf balls are there?"""
    ],
]

title = "Flan UL2 vs Flan T5 XXL"
description = "This demo compares [Flan-T5-xxl](https://huggingface.co/google/flan-t5-xxl) and [Flan-UL2](https://huggingface.co/google/flan-ul2). Learn more about these models in their model card!"


async def inference(text):
    with ThreadPoolExecutor(max_workers=2) as executor:
        with requests.Session() as session:
            session.headers = {"Authorization": f"Bearer {TOKEN}"}
            # Initialize the event loop
            loop = asyncio.get_event_loop()
            tasks = [
                loop.run_in_executor(
                    executor, fetch, *(session, text, url)  # Allows us to pass in multiple arguments to `fetch`
                )
                for url in URLS
            ]

            # Initializes the tasks to run and awaits their results
            responses = [None, None]
            for (model, response) in await asyncio.gather(*tasks):
                if response is not None:
                    if model == "flan-ul2":
                        responses[0] = response[0]["generated_text"]
                    elif model == "flan-t5-xxl":
                        responses[1] = response[0]["generated_text"]
    return responses


io = gr.Interface(
    inference,
    gr.Textbox(lines=3),
    outputs=[gr.Textbox(lines=3, label="Flan T5-UL2"), gr.Textbox(lines=3, label="Flan T5-XXL")],
    title=title,
    description=description,
    examples=examples,
)
io.launch()