asoria HF staff commited on
Commit
f327376
·
1 Parent(s): ca1279e

Integration with compatible-libraries and other commands

Browse files
Files changed (1) hide show
  1. app.py +51 -29
app.py CHANGED
@@ -3,30 +3,37 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
3
  import nbformat as nbf
4
  from huggingface_hub import HfApi
5
  from httpx import Client
 
 
6
 
7
  """
8
  TODOs:
9
- - Handle erros
10
  - Add more commands to the notebook
11
  - Parametrize the commands (Move to another file)
12
- - How to handle configs and splits? -> Got from /compatible-libraries
13
- - Let user choose the framework
14
- - Use an LLM to suggest commands
15
  - Add commands for auto training
16
- - Improve logs
17
  - Enable 'generate notebook' button only if dataset is available and supports library
18
  """
19
 
 
20
  BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
21
- headers = {"Accept": "application/json", "Content-Type": "application/json"}
22
- client = Client(headers=headers)
 
 
23
 
24
 
25
  def get_compatible_libraries(dataset: str):
26
- resp = client.get(
27
- f"{BASE_DATASETS_SERVER_URL}/compatible-libraries?dataset={dataset}"
28
- )
29
- return resp.json()
 
 
 
 
 
30
 
31
 
32
  def create_notebook_file(cell_commands, notebook_name):
@@ -35,41 +42,56 @@ def create_notebook_file(cell_commands, notebook_name):
35
 
36
  with open(notebook_name, "w") as f:
37
  nbf.write(nb, f)
 
38
 
39
 
40
  def push_notebook(file_path, dataset_id, token):
41
  notebook_name = "dataset_analysis.ipynb"
42
  api = HfApi(token=token)
43
- api.upload_file(
44
- path_or_fileobj=file_path,
45
- path_in_repo=notebook_name,
46
- repo_id=dataset_id,
47
- repo_type="dataset",
48
- )
49
- link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
50
- html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>'
51
-
52
- return gr.HTML(value=html, visible=True)
 
 
 
 
 
53
 
54
 
55
  def generate_notebook(dataset_id):
56
  first_code = f"import pandas as pd\n\ndf = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')"
57
- try:
58
- libraries = get_compatible_libraries(dataset_id)["libraries"]
59
- except Exception as err:
60
- print(f"Error: {err}")
61
  return gr.File(visible=False), gr.Row.update(visible=False)
62
- if pandas_library := next(
63
- (element for element in libraries if element["library"] == "pandas"), None
64
- ):
 
 
 
65
  first_code = pandas_library["loading_codes"][0]["code"]
66
  else:
67
  return gr.File(visible=False), gr.Row.update(visible=False)
68
 
 
69
  commands = [
70
  "!pip install pandas",
71
  first_code,
72
  "df.head()",
 
 
 
 
 
 
73
  ]
74
  notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
75
  create_notebook_file(commands, notebook_name=notebook_name)
@@ -103,7 +125,7 @@ with gr.Blocks() as demo:
103
  download_link = gr.File(label="Download notebook", visible=False)
104
  with gr.Row(visible=False) as auth_page:
105
  with gr.Column():
106
- auth_title = gr.Markdown(
107
  "Want to push to hub? Enter your token ([settings](https://huggingface.co/settings/tokens)):"
108
  )
109
  token_box = gr.Textbox(
 
3
  import nbformat as nbf
4
  from huggingface_hub import HfApi
5
  from httpx import Client
6
+ import logging
7
+
8
 
9
  """
10
  TODOs:
 
11
  - Add more commands to the notebook
12
  - Parametrize the commands (Move to another file)
13
+ - Let user choose the framework and get if from /compatible-libraries
14
+ - Use an LLM to suggest commands by column types
 
15
  - Add commands for auto training
 
16
  - Enable 'generate notebook' button only if dataset is available and supports library
17
  """
18
 
19
+ # Configuration
20
  BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
21
+ HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
22
+ client = Client(headers=HEADERS)
23
+
24
+ logging.basicConfig(level=logging.INFO)
25
 
26
 
27
  def get_compatible_libraries(dataset: str):
28
+ try:
29
+ resp = client.get(
30
+ f"{BASE_DATASETS_SERVER_URL}/compatible-libraries?dataset={dataset}"
31
+ )
32
+ resp.raise_for_status()
33
+ return resp.json()
34
+ except Exception as err:
35
+ logging.error(f"Failed to fetch compatible libraries: {err}")
36
+ return None
37
 
38
 
39
  def create_notebook_file(cell_commands, notebook_name):
 
42
 
43
  with open(notebook_name, "w") as f:
44
  nbf.write(nb, f)
45
+ logging.info(f"Notebook {notebook_name} created successfully")
46
 
47
 
48
  def push_notebook(file_path, dataset_id, token):
49
  notebook_name = "dataset_analysis.ipynb"
50
  api = HfApi(token=token)
51
+ try:
52
+ api.upload_file(
53
+ path_or_fileobj=file_path,
54
+ path_in_repo=notebook_name,
55
+ repo_id=dataset_id,
56
+ repo_type="dataset",
57
+ )
58
+ link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
59
+ return gr.HTML(
60
+ value=f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>',
61
+ visible=True,
62
+ )
63
+ except Exception as err:
64
+ logging.error(f"Failed to push notebook: {err}")
65
+ return gr.HTML(value="Failed to push notebook", visible=True)
66
 
67
 
68
  def generate_notebook(dataset_id):
69
  first_code = f"import pandas as pd\n\ndf = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')"
70
+ libraries = get_compatible_libraries(dataset_id)
71
+
72
+ if not libraries:
 
73
  return gr.File(visible=False), gr.Row.update(visible=False)
74
+
75
+ pandas_library = next(
76
+ (lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
77
+ None,
78
+ )
79
+ if pandas_library:
80
  first_code = pandas_library["loading_codes"][0]["code"]
81
  else:
82
  return gr.File(visible=False), gr.Row.update(visible=False)
83
 
84
+ html_code = f"<iframe src='https://huggingface.co/datasets/{dataset_id}/embed/viewer' width='80%' height='560px'></iframe>"
85
  commands = [
86
  "!pip install pandas",
87
  first_code,
88
  "df.head()",
89
+ f'from IPython.display import HTML\n\ndisplay(HTML("{html_code}"))',
90
+ "print(df.shape)",
91
+ "df.columns",
92
+ "df.describe()",
93
+ "df.info()",
94
+ # TODO: Generate more commands according to column types for EDA and then for auto training?
95
  ]
96
  notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
97
  create_notebook_file(commands, notebook_name=notebook_name)
 
125
  download_link = gr.File(label="Download notebook", visible=False)
126
  with gr.Row(visible=False) as auth_page:
127
  with gr.Column():
128
+ gr.Markdown(
129
  "Want to push to hub? Enter your token ([settings](https://huggingface.co/settings/tokens)):"
130
  )
131
  token_box = gr.Textbox(