import gradio as gr import numpy as np import pandas as pd import pysr import tempfile from typing import Optional empty_df = pd.DataFrame( { "equation": [], "loss": [], "complexity": [], } ) test_equations = [ "sin(x) + cos(2*x) + tan(x/3)", ] def generate_data(s: str, num_points: int, noise_level: float): x = np.linspace(0, 10, num_points) for (k, v) in { "sin": "np.sin", "cos": "np.cos", "exp": "np.exp", "log": "np.log", "tan": "np.tan", "^": "**", }.items(): s = s.replace(k, v) y = eval(s) noise = np.random.normal(0, noise_level, y.shape) y_noisy = y + noise return pd.DataFrame({"x": x}), y_noisy def greet( file_obj: Optional[tempfile._TemporaryFileWrapper], test_equation: str, num_points: int, noise_level: float, niterations: int, maxsize: int, binary_operators: list, unary_operators: list, force_run: bool, ): if file_obj is not None: if len(binary_operators) == 0 and len(unary_operators) == 0: return ( empty_df, "Please select at least one operator!", ) # Look at some statistics of the file: df = pd.read_csv(file_obj) if len(df) == 0: return ( empty_df, "The file is empty!", ) if len(df.columns) == 1: return ( empty_df, "The file has only one column!", ) if len(df) > 10_000 and not force_run: return ( empty_df, "You have uploaded a file with more than 10,000 rows. " "This will take very long to run. " "Please upload a subsample of the data, " "or check the box 'Ignore Warnings'.", ) col_to_fit = df.columns[-1] y = np.array(df[col_to_fit]) X = df.drop([col_to_fit], axis=1) else: X, y = generate_data(test_equation, num_points, noise_level) model = pysr.PySRRegressor( bumper=True, maxsize=maxsize, niterations=niterations, binary_operators=binary_operators, unary_operators=unary_operators, timeout_in_seconds=1000, ) model.fit(X, y) df = model.equations_[["equation", "loss", "complexity"]] # Convert all columns to string type: df = df.astype(str) msg = ( "Success!\n" f"You may run the model locally (faster) with " f"the following parameters:" + f""" model = PySRRegressor( niterations={niterations}, binary_operators={str(binary_operators)}, unary_operators={str(unary_operators)}, maxsize={maxsize}, ) model.fit(X, y)""" ) df.to_csv("pysr_output.csv", index=False) return df, msg def main(): with gr.Blocks() as demo: with gr.Row(): with gr.Column(): with gr.Row(): with gr.Tab("Example Data"): # Plot of the example data: example_plot = gr.ScatterPlot( x="x", y="y", tooltip=["x", "y"], x_lim=[0, 10], y_lim=[-5, 5], width=350, height=300, ) test_equation = gr.Radio( test_equations, value=test_equations[0], label="Test Equation" ) num_points = gr.Slider( minimum=10, maximum=1000, value=100, label="Number of Data Points", step=1, ) noise_level = gr.Slider( minimum=0, maximum=1, value=0.1, label="Noise Level" ) with gr.Tab("Upload Data"): file_input = gr.File(label="Upload a CSV File") with gr.Row(): binary_operators = gr.CheckboxGroup( choices=["+", "-", "*", "/", "^"], label="Binary Operators", value=["+", "-", "*", "/"], ) unary_operators = gr.CheckboxGroup( choices=[ "sin", "cos", "exp", "log", "square", "cube", "sqrt", "abs", "tan", ], label="Unary Operators", value=[], ) niterations = gr.Slider( minimum=1, maximum=1000, value=40, label="Number of Iterations", step=1, ) maxsize = gr.Slider( minimum=7, maximum=35, value=20, label="Maximum Complexity", step=1, ) force_run = gr.Checkbox( value=False, label="Ignore Warnings", ) with gr.Column(): with gr.Row(): df = gr.Dataframe( headers=["Equation", "Loss", "Complexity"], datatype=["str", "number", "number"], ) error_log = gr.Textbox(label="Error Log") with gr.Row(): run_button = gr.Button() run_button.click( greet, inputs=[ file_input, test_equation, num_points, noise_level, niterations, maxsize, binary_operators, unary_operators, force_run, ], outputs=[df, error_log], ) # Any update to the equation choice will trigger a replot: for eqn_component in [test_equation, num_points, noise_level]: eqn_component.change(replot, [test_equation, num_points, noise_level], example_plot) demo.launch() def replot(test_equation, num_points, noise_level): X, y = generate_data(test_equation, num_points, noise_level) df = pd.DataFrame({"x": X["x"], "y": y}) return df if __name__ == "__main__": main()