import copy import random from time import sleep import gradio as gr TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have showcased remarkable reasoning capabilities, yet they remain susceptible to errors, particularly in temporal reasoning tasks involving complex temporal logic. """ def generate_data_test(): """Generator to yield words""" temp = copy.deepcopy(TEST) l1 = temp.split() random.shuffle(l1) temp = ' '.join(l1) for word in temp.split(" "): yield word + " " def stream_data(progress=gr.Progress()): """Stream data to all columns""" outputs = ["", "", ""] generators = [generate_data_test() for _ in range(3)] while True: updated = False for i, gen in enumerate(generators): try: word = next(gen) outputs[i] += word updated = True except StopIteration: pass if not updated: break yield tuple(outputs) sleep(0.01) def create_interface(): with gr.Group(): with gr.Row(): col1 = gr.Textbox(label="Column 1", lines=10) col2 = gr.Textbox(label="Column 2", lines=10) col3 = gr.Textbox(label="Column 3", lines=10) start_btn = gr.Button("Start Streaming") start_btn.click( fn=stream_data, outputs=[col1, col2, col3], show_progress=False ) #return demo if __name__ == "__main__": demo = create_interface() demo.queue() demo.launch()