import copy
import random
from time import sleep
import gradio as gr

TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have 
showcased remarkable reasoning capabilities, yet they remain susceptible to errors, particularly in temporal 
reasoning tasks involving complex temporal logic. """

def generate_data_test():
    """Generator to yield words"""
    temp = copy.deepcopy(TEST)
    l1 = temp.split()
    random.shuffle(l1)
    temp = ' '.join(l1)
    for word in temp.split(" "):
        yield word + " "

def stream_data(progress=gr.Progress()):
    """Stream data to all columns"""
    outputs = ["", "", ""]
    generators = [generate_data_test() for _ in range(3)]
    
    while True:
        updated = False
        for i, gen in enumerate(generators):
            try:
                word = next(gen)
                outputs[i] += word
                updated = True
            except StopIteration:
                pass
        
        if not updated:
            break
        
        yield tuple(outputs)
        sleep(0.01)

def create_interface():
    with gr.Group():
        with gr.Row():
            col1 = gr.Textbox(label="Column 1", lines=10)
            col2 = gr.Textbox(label="Column 2", lines=10)
            col3 = gr.Textbox(label="Column 3", lines=10)
        
        start_btn = gr.Button("Start Streaming")
        
        start_btn.click(
            fn=stream_data,
            outputs=[col1, col2, col3],
            show_progress=False
        )

    #return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.queue()
    demo.launch()