lhoestq HF Staff commited on
Commit
4b3b034
·
1 Parent(s): e2928bf

add url query param support

Browse files
Files changed (1) hide show
  1. app.py +73 -28
app.py CHANGED
@@ -13,6 +13,7 @@ client = InferenceClient(model_id)
13
 
14
  MAX_TOTAL_NB_ITEMS = 100 # almost infinite, don't judge me (actually it's because gradio needs a fixed number of components)
15
  MAX_NB_ITEMS_PER_GENERATION_CALL = 10
 
16
 
17
  GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
18
  "A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
@@ -168,11 +169,10 @@ with gr.Blocks(css=css) as demo:
168
  buttons.append(gr.Button(dataset_name, elem_classes=dataset_name_classes))
169
  buttons.append(gr.Button(tags, elem_classes=tags_classes))
170
 
171
- see_more = gr.Button("See more") # TODO: dosable when reaching end of page
172
  gr.Markdown(f"_powered by [{model_id}](https://huggingface.co/{model_id})_")
173
  with gr.Column(scale=4, min_width=0):
174
  pass
175
- # more.click(search_more_datasets, inputs=[generated_texts, search_bar], outputs=[generated_texts] + buttons)
176
  with gr.Column(visible=False) as dataset_page:
177
  with gr.Row():
178
  with gr.Column(scale=4, min_width=0):
@@ -185,12 +185,15 @@ with gr.Blocks(css=css) as demo:
185
  pass
186
  with gr.Column():
187
  generate_full_dataset_button = gr.Button("Generate Full Dataset", variant="primary") # TODO: implement
 
 
188
  back_button = gr.Button("< Back", size="sm")
189
  with gr.Column(scale=4, min_width=0):
190
  pass
191
  with gr.Column(scale=4, min_width=0):
192
  pass
193
-
 
194
 
195
  T = TypeVar("T")
196
 
@@ -218,6 +221,7 @@ with gr.Blocks(css=css) as demo:
218
  max_tokens=max_tokens,
219
  stream=True,
220
  top_p=0.8,
 
221
  ):
222
  yield message.choices[0].delta.content
223
  except requests.exceptions.ConnectionError as e:
@@ -228,6 +232,7 @@ with gr.Blocks(css=css) as demo:
228
 
229
 
230
  def gen_datasets_line_by_line(search_query: str, generated_texts: tuple[str] = ()) -> Iterator[str]:
 
231
  search_query = search_query[:1000] if search_query.strip() else landing_page_query
232
  generated_text = ""
233
  current_line = ""
@@ -246,6 +251,7 @@ with gr.Blocks(css=css) as demo:
246
 
247
 
248
  def gen_dataset_content(search_query: str, dataset_name: str, tags: str) -> Iterator[str]:
 
249
  search_query = search_query[:1000] if search_query.strip() else landing_page_query
250
  generated_text = ""
251
  for token in stream_reponse(GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS.format(
@@ -257,11 +263,9 @@ with gr.Blocks(css=css) as demo:
257
  yield generated_text
258
  print("-----\n\n" + generated_text)
259
 
260
- search_datasets_inputs = search_bar
261
- search_datasets_outputs = button_groups + buttons + [generated_texts_state]
262
 
263
- def search_datasets(search_query):
264
- yield {generated_texts_state: []}
265
  yield {
266
  button_group: gr.Group(elem_classes="buttonsGroup insivibleButtonGroup")
267
  for button_group in button_groups[MAX_NB_ITEMS_PER_GENERATION_CALL:]
@@ -295,9 +299,23 @@ with gr.Blocks(css=css) as demo:
295
  }
296
  current_item_idx += 1
297
 
298
- search_more_datasets_inputs = [search_bar, generated_texts_state]
299
- search_more_datasets_outputs = button_groups + buttons + [generated_texts_state]
300
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  def search_more_datasets(search_query, generated_texts):
302
  current_item_idx = initial_item_idx = len(generated_texts) * MAX_NB_ITEMS_PER_GENERATION_CALL
303
  yield {
@@ -324,36 +342,63 @@ with gr.Blocks(css=css) as demo:
324
  }
325
  current_item_idx += 1
326
 
327
- show_dataset_inputs = [search_bar, *buttons]
328
- show_dataset_outputs = [search_page, dataset_page, dataset_title, dataset_content]
329
-
330
- def show_dataset(search_query, *buttons_values, i):
331
- dataset_name, tags = buttons_values[2 * i : 2 * i + 2]
332
  yield {
333
  search_page: gr.Column(visible=False),
334
  dataset_page: gr.Column(visible=True),
335
- dataset_title: f"# {dataset_name}\n\n tags: {tags}\n\n _Note: This is an AI-generated dataset so its content may be inaccurate or false_"
 
 
 
 
 
 
336
  }
337
  for generated_text in gen_dataset_content(search_query=search_query, dataset_name=dataset_name, tags=tags):
338
  yield {dataset_content: generated_text}
339
 
340
 
341
- def show_search_page():
342
- return gr.Column(visible=True), gr.Column(visible=False)
343
 
 
 
 
 
 
 
 
344
 
345
- def generate_full_dataset():
346
- raise gr.Error("Not implemented yet sorry ! Give me some feedbacks in the Community tab in the meantime ;)")
347
 
 
 
 
348
 
349
- search_bar.submit(search_datasets, inputs=search_datasets_inputs, outputs=search_datasets_outputs)
350
- search_button.click(search_datasets, inputs=search_datasets_inputs, outputs=search_datasets_outputs)
351
- for i, (dataset_name_button, tags_button) in enumerate(batched(buttons, 2)):
352
- dataset_name_button.click(partial(show_dataset, i=i), inputs=show_dataset_inputs, outputs=show_dataset_outputs)
353
- tags_button.click(partial(show_dataset, i=i), inputs=show_dataset_inputs, outputs=show_dataset_outputs)
354
- see_more.click(search_more_datasets, inputs=search_more_datasets_inputs, outputs=search_more_datasets_outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
- generate_full_dataset_button.click(generate_full_dataset)
357
- back_button.click(show_search_page, inputs=[], outputs=[search_page, dataset_page])
358
 
359
  demo.launch()
 
13
 
14
  MAX_TOTAL_NB_ITEMS = 100 # almost infinite, don't judge me (actually it's because gradio needs a fixed number of components)
15
  MAX_NB_ITEMS_PER_GENERATION_CALL = 10
16
+ URL = "https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub"
17
 
18
  GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
19
  "A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
 
169
  buttons.append(gr.Button(dataset_name, elem_classes=dataset_name_classes))
170
  buttons.append(gr.Button(tags, elem_classes=tags_classes))
171
 
172
+ load_more_datasets = gr.Button("Load more datasets") # TODO: dosable when reaching end of page
173
  gr.Markdown(f"_powered by [{model_id}](https://huggingface.co/{model_id})_")
174
  with gr.Column(scale=4, min_width=0):
175
  pass
 
176
  with gr.Column(visible=False) as dataset_page:
177
  with gr.Row():
178
  with gr.Column(scale=4, min_width=0):
 
185
  pass
186
  with gr.Column():
187
  generate_full_dataset_button = gr.Button("Generate Full Dataset", variant="primary") # TODO: implement
188
+ dataset_share_button = gr.Button("Share Dataset URL")
189
+ dataset_share_textbox = gr.Textbox(visible=False, show_copy_button=True, label="Copy this URL:", interactive=False, show_label=True)
190
  back_button = gr.Button("< Back", size="sm")
191
  with gr.Column(scale=4, min_width=0):
192
  pass
193
  with gr.Column(scale=4, min_width=0):
194
  pass
195
+
196
+ app_state = gr.State({})
197
 
198
  T = TypeVar("T")
199
 
 
221
  max_tokens=max_tokens,
222
  stream=True,
223
  top_p=0.8,
224
+ seed=42,
225
  ):
226
  yield message.choices[0].delta.content
227
  except requests.exceptions.ConnectionError as e:
 
232
 
233
 
234
  def gen_datasets_line_by_line(search_query: str, generated_texts: tuple[str] = ()) -> Iterator[str]:
235
+ search_query = search_query or ""
236
  search_query = search_query[:1000] if search_query.strip() else landing_page_query
237
  generated_text = ""
238
  current_line = ""
 
251
 
252
 
253
  def gen_dataset_content(search_query: str, dataset_name: str, tags: str) -> Iterator[str]:
254
+ search_query = search_query or ""
255
  search_query = search_query[:1000] if search_query.strip() else landing_page_query
256
  generated_text = ""
257
  for token in stream_reponse(GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS.format(
 
263
  yield generated_text
264
  print("-----\n\n" + generated_text)
265
 
 
 
266
 
267
+ def _search_datasets(search_query):
268
+ yield {generated_texts_state: [], app_state: {"search_query": search_query}}
269
  yield {
270
  button_group: gr.Group(elem_classes="buttonsGroup insivibleButtonGroup")
271
  for button_group in button_groups[MAX_NB_ITEMS_PER_GENERATION_CALL:]
 
299
  }
300
  current_item_idx += 1
301
 
302
+ search_js = """
303
+ (search_query => {
304
+ const urlParams = new URLSearchParams(window.location.search);
305
+ urlParams.set('q', search_query);
306
+ window.location.search = urlParams;
307
+ })
308
+ """
309
+
310
+ @search_button.click(inputs=search_bar, outputs=button_groups + buttons + [generated_texts_state, app_state], js=search_js)
311
+ def search_dataset_from_search_button(search_query):
312
+ yield from _search_datasets(search_query)
313
+
314
+ @search_bar.submit(inputs=search_bar, outputs=button_groups + buttons + [generated_texts_state, app_state], js=search_js)
315
+ def search_dataset_from_search_bar(search_query):
316
+ yield from _search_datasets(search_query)
317
+
318
+ @load_more_datasets.click(inputs=[search_bar, generated_texts_state], outputs=button_groups + buttons + [generated_texts_state])
319
  def search_more_datasets(search_query, generated_texts):
320
  current_item_idx = initial_item_idx = len(generated_texts) * MAX_NB_ITEMS_PER_GENERATION_CALL
321
  yield {
 
342
  }
343
  current_item_idx += 1
344
 
345
+ def _show_dataset(search_query, dataset_name, tags):
 
 
 
 
346
  yield {
347
  search_page: gr.Column(visible=False),
348
  dataset_page: gr.Column(visible=True),
349
+ dataset_title: f"# {dataset_name}\n\n tags: {tags}\n\n _Note: This is an AI-generated dataset so its content may be inaccurate or false_",
350
+ dataset_share_textbox: gr.Textbox(visible=False),
351
+ app_state: {
352
+ "search_query": search_query,
353
+ "dataset_name": dataset_name,
354
+ "tags": tags
355
+ }
356
  }
357
  for generated_text in gen_dataset_content(search_query=search_query, dataset_name=dataset_name, tags=tags):
358
  yield {dataset_content: generated_text}
359
 
360
 
361
+ show_dataset_inputs = [search_bar, *buttons]
362
+ show_dataset_outputs = [app_state, search_page, dataset_page, dataset_title, dataset_content, dataset_share_textbox]
363
 
364
+ def show_dataset_from_button(search_query, *buttons_values, i):
365
+ dataset_name, tags = buttons_values[2 * i : 2 * i + 2]
366
+ yield from _show_dataset(search_query, dataset_name, tags)
367
+
368
+ for i, (dataset_name_button, tags_button) in enumerate(batched(buttons, 2)):
369
+ dataset_name_button.click(partial(show_dataset_from_button, i=i), inputs=show_dataset_inputs, outputs=show_dataset_outputs)
370
+ tags_button.click(partial(show_dataset_from_button, i=i), inputs=show_dataset_inputs, outputs=show_dataset_outputs)
371
 
 
 
372
 
373
+ @back_button.click(outputs=[search_page, dataset_page])
374
+ def show_search_page():
375
+ return gr.Column(visible=True), gr.Column(visible=False)
376
 
377
+ @generate_full_dataset_button.click()
378
+ def generate_full_dataset():
379
+ raise gr.Error("Not implemented yet sorry ! Request your dataset in the Discussion tab (provide the dataset URL)")
380
+
381
+ @dataset_share_button.click(inputs=[app_state], outputs=[dataset_share_textbox])
382
+ def show_dataset_url(state):
383
+ return gr.Textbox(
384
+ f"{URL}?q={state['search_query'].replace(' ', '+')}&dataset={state['dataset_name']}&tags={state['tags']}",
385
+ visible=True,
386
+ )
387
+
388
+ @demo.load(outputs=[app_state, search_page, search_bar, dataset_page, dataset_title, dataset_content, dataset_share_textbox] + button_groups + buttons + [generated_texts_state])
389
+ def load_app(request: gr.Request):
390
+ query_params = dict(request.query_params)
391
+ if "dataset" in query_params:
392
+ yield from _show_dataset(
393
+ search_query=query_params.get("q", query_params["dataset"]),
394
+ dataset_name=query_params["dataset"],
395
+ tags=query_params.get("tags", "")
396
+ )
397
+ elif "q" in query_params:
398
+ yield {search_bar: query_params["q"]}
399
+ yield from _search_datasets(query_params["q"])
400
+ else:
401
+ yield {search_page: gr.Column(visible=True)}
402
 
 
 
403
 
404
  demo.launch()