import gradio as gr
from gradio_huggingfacehub_search import HuggingfaceHubSearch
import nbformat as nbf
from huggingface_hub import HfApi
"""
TODOs:
- Show auth and push button only after notebook creation
- Improve the link to the result notebook
- Handle erros
- Add more commands to the notebook
- Parametrize the commands
- How to handle configs and splits?
- Let user choose the framework
- Improve logs
"""
def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"):
nb = nbf.v4.new_notebook()
nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
with open(notebook_name, "w") as f:
nbf.write(nb, f)
print(f"Notebook '{notebook_name}' created successfully.")
def push_notebook(file_path, dataset_id, token):
api = HfApi(token=token)
api.upload_file(
path_or_fileobj=file_path,
path_in_repo="dataset_analysis.ipynb",
repo_id=dataset_id,
repo_type="dataset",
)
# TODO: Handle permission error
print("Notebook uploaded to Huggingface Hub.")
link = (
f"https://huggingface.co/datasets/{dataset_id}/blob/main/dataset_analyst.ipynb"
)
return f'See notebook'
def generate_notebook(dataset_id):
# TODO: Get first config and split? or generate a dataframe per each split maybe?
commands = [
"!pip install pandas",
"import pandas as pd",
f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')",
"df.head()",
]
notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
create_notebook_file(commands, notebook_name=notebook_name)
print("Notebook uploaded to Huggingface Hub.")
return notebook_name
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Dataset auto analyst creator 🕵️")
dataset_name = HuggingfaceHubSearch(
label="Hub Dataset ID",
placeholder="Search for dataset id on Huggingface",
search_type="dataset",
value="",
)
@gr.render(inputs=dataset_name)
def embed(name):
if not name:
return gr.Markdown("### No dataset provided")
html_code = f"""
"""
return gr.HTML(value=html_code)
generate_btn = gr.Button("Generate notebook", visible=True)
download_link = gr.File(label="Download notebook")
generate_btn.click(
generate_notebook, inputs=[dataset_name], outputs=[download_link]
)
with gr.Row() as auth_page:
with gr.Column():
auth_title = gr.Markdown(
"Enter your token ([settings](https://huggingface.co/settings/tokens)):"
)
token_box = gr.Textbox(
"", label="token", placeholder="hf_xxx", type="password"
)
auth_error = gr.Markdown("", visible=False)
def auth(token):
if not token:
return {
auth_error: gr.Markdown(value="", visible=False),
push_btn: gr.Row(visible=False),
}
return {
auth_error: gr.Markdown(value="", visible=False),
push_btn: gr.Row(visible=True),
}
push_btn = gr.Button("Push notebook to hub", visible=False)
token_box.change(
auth,
inputs=token_box,
outputs=[auth_error, push_btn],
)
output_lbl = gr.HTML(value="")
push_btn.click(
push_notebook,
inputs=[download_link, dataset_name, token_box],
outputs=[output_lbl],
)
demo.launch()