import gradio as gr from gradio_huggingfacehub_search import HuggingfaceHubSearch import nbformat as nbf from huggingface_hub import HfApi """ TODOs: - Show auth and push button only after notebook creation - Improve the link to the result notebook - Handle erros - Add more commands to the notebook - Parametrize the commands - How to handle configs and splits? - Let user choose the framework - Improve logs """ def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"): nb = nbf.v4.new_notebook() nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands] with open(notebook_name, "w") as f: nbf.write(nb, f) print(f"Notebook '{notebook_name}' created successfully.") def push_notebook(file_path, dataset_id, token): api = HfApi(token=token) api.upload_file( path_or_fileobj=file_path, path_in_repo="dataset_analysis.ipynb", repo_id=dataset_id, repo_type="dataset", ) # TODO: Handle permission error print("Notebook uploaded to Huggingface Hub.") link = ( f"https://huggingface.co/datasets/{dataset_id}/blob/main/dataset_analyst.ipynb" ) return f'See notebook' def generate_notebook(dataset_id): # TODO: Get first config and split? or generate a dataframe per each split maybe? commands = [ "!pip install pandas", "import pandas as pd", f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')", "df.head()", ] notebook_name = f"{dataset_id.replace('/', '-')}.ipynb" create_notebook_file(commands, notebook_name=notebook_name) print("Notebook uploaded to Huggingface Hub.") return notebook_name with gr.Blocks() as demo: gr.Markdown("# 🤖 Dataset auto analyst creator 🕵️") dataset_name = HuggingfaceHubSearch( label="Hub Dataset ID", placeholder="Search for dataset id on Huggingface", search_type="dataset", value="", ) @gr.render(inputs=dataset_name) def embed(name): if not name: return gr.Markdown("### No dataset provided") html_code = f""" """ return gr.HTML(value=html_code) generate_btn = gr.Button("Generate notebook", visible=True) download_link = gr.File(label="Download notebook") generate_btn.click( generate_notebook, inputs=[dataset_name], outputs=[download_link] ) with gr.Row() as auth_page: with gr.Column(): auth_title = gr.Markdown( "Enter your token ([settings](https://huggingface.co/settings/tokens)):" ) token_box = gr.Textbox( "", label="token", placeholder="hf_xxx", type="password" ) auth_error = gr.Markdown("", visible=False) def auth(token): if not token: return { auth_error: gr.Markdown(value="", visible=False), push_btn: gr.Row(visible=False), } return { auth_error: gr.Markdown(value="", visible=False), push_btn: gr.Row(visible=True), } push_btn = gr.Button("Push notebook to hub", visible=False) token_box.change( auth, inputs=token_box, outputs=[auth_error, push_btn], ) output_lbl = gr.HTML(value="") push_btn.click( push_notebook, inputs=[download_link, dataset_name, token_box], outputs=[output_lbl], ) demo.launch()