asoria HF staff commited on
Commit
ca1279e
·
1 Parent(s): 7af3e0d

add dataset viewer compatible libraries code

Browse files
Files changed (2) hide show
  1. app.py +31 -8
  2. requirements.txt +2 -1
app.py CHANGED
@@ -2,28 +2,40 @@ import gradio as gr
2
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
3
  import nbformat as nbf
4
  from huggingface_hub import HfApi
5
-
6
 
7
  """
8
  TODOs:
9
  - Handle erros
10
  - Add more commands to the notebook
11
  - Parametrize the commands (Move to another file)
12
- - How to handle configs and splits?
13
  - Let user choose the framework
 
 
14
  - Improve logs
 
15
  """
16
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"):
 
19
  nb = nbf.v4.new_notebook()
20
  nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
21
 
22
  with open(notebook_name, "w") as f:
23
  nbf.write(nb, f)
24
 
25
- print(f"Notebook '{notebook_name}' created successfully.")
26
-
27
 
28
  def push_notebook(file_path, dataset_id, token):
29
  notebook_name = "dataset_analysis.ipynb"
@@ -34,7 +46,6 @@ def push_notebook(file_path, dataset_id, token):
34
  repo_id=dataset_id,
35
  repo_type="dataset",
36
  )
37
- print("Notebook uploaded to Huggingface Hub.")
38
  link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
39
  html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>'
40
 
@@ -42,10 +53,22 @@ def push_notebook(file_path, dataset_id, token):
42
 
43
 
44
  def generate_notebook(dataset_id):
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  commands = [
46
  "!pip install pandas",
47
- "import pandas as pd",
48
- f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')",
49
  "df.head()",
50
  ]
51
  notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
 
2
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
3
  import nbformat as nbf
4
  from huggingface_hub import HfApi
5
+ from httpx import Client
6
 
7
  """
8
  TODOs:
9
  - Handle erros
10
  - Add more commands to the notebook
11
  - Parametrize the commands (Move to another file)
12
+ - How to handle configs and splits? -> Got from /compatible-libraries
13
  - Let user choose the framework
14
+ - Use an LLM to suggest commands
15
+ - Add commands for auto training
16
  - Improve logs
17
+ - Enable 'generate notebook' button only if dataset is available and supports library
18
  """
19
 
20
+ BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
21
+ headers = {"Accept": "application/json", "Content-Type": "application/json"}
22
+ client = Client(headers=headers)
23
+
24
+
25
+ def get_compatible_libraries(dataset: str):
26
+ resp = client.get(
27
+ f"{BASE_DATASETS_SERVER_URL}/compatible-libraries?dataset={dataset}"
28
+ )
29
+ return resp.json()
30
 
31
+
32
+ def create_notebook_file(cell_commands, notebook_name):
33
  nb = nbf.v4.new_notebook()
34
  nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
35
 
36
  with open(notebook_name, "w") as f:
37
  nbf.write(nb, f)
38
 
 
 
39
 
40
  def push_notebook(file_path, dataset_id, token):
41
  notebook_name = "dataset_analysis.ipynb"
 
46
  repo_id=dataset_id,
47
  repo_type="dataset",
48
  )
 
49
  link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
50
  html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>'
51
 
 
53
 
54
 
55
  def generate_notebook(dataset_id):
56
+ first_code = f"import pandas as pd\n\ndf = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')"
57
+ try:
58
+ libraries = get_compatible_libraries(dataset_id)["libraries"]
59
+ except Exception as err:
60
+ print(f"Error: {err}")
61
+ return gr.File(visible=False), gr.Row.update(visible=False)
62
+ if pandas_library := next(
63
+ (element for element in libraries if element["library"] == "pandas"), None
64
+ ):
65
+ first_code = pandas_library["loading_codes"][0]["code"]
66
+ else:
67
+ return gr.File(visible=False), gr.Row.update(visible=False)
68
+
69
  commands = [
70
  "!pip install pandas",
71
+ first_code,
 
72
  "df.head()",
73
  ]
74
  notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  gradio_huggingfacehub_search==0.0.7
2
  huggingface_hub
3
- nbformat
 
 
1
  gradio_huggingfacehub_search==0.0.7
2
  huggingface_hub
3
+ nbformat
4
+ httpx