Spaces:
Sleeping
Sleeping
add url query param support
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ client = InferenceClient(model_id)
|
|
13 |
|
14 |
MAX_TOTAL_NB_ITEMS = 100 # almost infinite, don't judge me (actually it's because gradio needs a fixed number of components)
|
15 |
MAX_NB_ITEMS_PER_GENERATION_CALL = 10
|
|
|
16 |
|
17 |
GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
|
18 |
"A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
|
@@ -168,11 +169,10 @@ with gr.Blocks(css=css) as demo:
|
|
168 |
buttons.append(gr.Button(dataset_name, elem_classes=dataset_name_classes))
|
169 |
buttons.append(gr.Button(tags, elem_classes=tags_classes))
|
170 |
|
171 |
-
|
172 |
gr.Markdown(f"_powered by [{model_id}](https://huggingface.co/{model_id})_")
|
173 |
with gr.Column(scale=4, min_width=0):
|
174 |
pass
|
175 |
-
# more.click(search_more_datasets, inputs=[generated_texts, search_bar], outputs=[generated_texts] + buttons)
|
176 |
with gr.Column(visible=False) as dataset_page:
|
177 |
with gr.Row():
|
178 |
with gr.Column(scale=4, min_width=0):
|
@@ -185,12 +185,15 @@ with gr.Blocks(css=css) as demo:
|
|
185 |
pass
|
186 |
with gr.Column():
|
187 |
generate_full_dataset_button = gr.Button("Generate Full Dataset", variant="primary") # TODO: implement
|
|
|
|
|
188 |
back_button = gr.Button("< Back", size="sm")
|
189 |
with gr.Column(scale=4, min_width=0):
|
190 |
pass
|
191 |
with gr.Column(scale=4, min_width=0):
|
192 |
pass
|
193 |
-
|
|
|
194 |
|
195 |
T = TypeVar("T")
|
196 |
|
@@ -218,6 +221,7 @@ with gr.Blocks(css=css) as demo:
|
|
218 |
max_tokens=max_tokens,
|
219 |
stream=True,
|
220 |
top_p=0.8,
|
|
|
221 |
):
|
222 |
yield message.choices[0].delta.content
|
223 |
except requests.exceptions.ConnectionError as e:
|
@@ -228,6 +232,7 @@ with gr.Blocks(css=css) as demo:
|
|
228 |
|
229 |
|
230 |
def gen_datasets_line_by_line(search_query: str, generated_texts: tuple[str] = ()) -> Iterator[str]:
|
|
|
231 |
search_query = search_query[:1000] if search_query.strip() else landing_page_query
|
232 |
generated_text = ""
|
233 |
current_line = ""
|
@@ -246,6 +251,7 @@ with gr.Blocks(css=css) as demo:
|
|
246 |
|
247 |
|
248 |
def gen_dataset_content(search_query: str, dataset_name: str, tags: str) -> Iterator[str]:
|
|
|
249 |
search_query = search_query[:1000] if search_query.strip() else landing_page_query
|
250 |
generated_text = ""
|
251 |
for token in stream_reponse(GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS.format(
|
@@ -257,11 +263,9 @@ with gr.Blocks(css=css) as demo:
|
|
257 |
yield generated_text
|
258 |
print("-----\n\n" + generated_text)
|
259 |
|
260 |
-
search_datasets_inputs = search_bar
|
261 |
-
search_datasets_outputs = button_groups + buttons + [generated_texts_state]
|
262 |
|
263 |
-
def
|
264 |
-
yield {generated_texts_state: []}
|
265 |
yield {
|
266 |
button_group: gr.Group(elem_classes="buttonsGroup insivibleButtonGroup")
|
267 |
for button_group in button_groups[MAX_NB_ITEMS_PER_GENERATION_CALL:]
|
@@ -295,9 +299,23 @@ with gr.Blocks(css=css) as demo:
|
|
295 |
}
|
296 |
current_item_idx += 1
|
297 |
|
298 |
-
|
299 |
-
|
300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
def search_more_datasets(search_query, generated_texts):
|
302 |
current_item_idx = initial_item_idx = len(generated_texts) * MAX_NB_ITEMS_PER_GENERATION_CALL
|
303 |
yield {
|
@@ -324,36 +342,63 @@ with gr.Blocks(css=css) as demo:
|
|
324 |
}
|
325 |
current_item_idx += 1
|
326 |
|
327 |
-
|
328 |
-
show_dataset_outputs = [search_page, dataset_page, dataset_title, dataset_content]
|
329 |
-
|
330 |
-
def show_dataset(search_query, *buttons_values, i):
|
331 |
-
dataset_name, tags = buttons_values[2 * i : 2 * i + 2]
|
332 |
yield {
|
333 |
search_page: gr.Column(visible=False),
|
334 |
dataset_page: gr.Column(visible=True),
|
335 |
-
dataset_title: f"# {dataset_name}\n\n tags: {tags}\n\n _Note: This is an AI-generated dataset so its content may be inaccurate or false_"
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
}
|
337 |
for generated_text in gen_dataset_content(search_query=search_query, dataset_name=dataset_name, tags=tags):
|
338 |
yield {dataset_content: generated_text}
|
339 |
|
340 |
|
341 |
-
|
342 |
-
|
343 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
|
345 |
-
def generate_full_dataset():
|
346 |
-
raise gr.Error("Not implemented yet sorry ! Give me some feedbacks in the Community tab in the meantime ;)")
|
347 |
|
|
|
|
|
|
|
348 |
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
|
356 |
-
generate_full_dataset_button.click(generate_full_dataset)
|
357 |
-
back_button.click(show_search_page, inputs=[], outputs=[search_page, dataset_page])
|
358 |
|
359 |
demo.launch()
|
|
|
13 |
|
14 |
MAX_TOTAL_NB_ITEMS = 100 # almost infinite, don't judge me (actually it's because gradio needs a fixed number of components)
|
15 |
MAX_NB_ITEMS_PER_GENERATION_CALL = 10
|
16 |
+
URL = "https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub"
|
17 |
|
18 |
GENERATE_DATASET_NAMES_FOR_SEARCH_QUERY = (
|
19 |
"A Machine Learning Practioner is looking for a dataset that matches '{search_query}'. "
|
|
|
169 |
buttons.append(gr.Button(dataset_name, elem_classes=dataset_name_classes))
|
170 |
buttons.append(gr.Button(tags, elem_classes=tags_classes))
|
171 |
|
172 |
+
load_more_datasets = gr.Button("Load more datasets") # TODO: dosable when reaching end of page
|
173 |
gr.Markdown(f"_powered by [{model_id}](https://huggingface.co/{model_id})_")
|
174 |
with gr.Column(scale=4, min_width=0):
|
175 |
pass
|
|
|
176 |
with gr.Column(visible=False) as dataset_page:
|
177 |
with gr.Row():
|
178 |
with gr.Column(scale=4, min_width=0):
|
|
|
185 |
pass
|
186 |
with gr.Column():
|
187 |
generate_full_dataset_button = gr.Button("Generate Full Dataset", variant="primary") # TODO: implement
|
188 |
+
dataset_share_button = gr.Button("Share Dataset URL")
|
189 |
+
dataset_share_textbox = gr.Textbox(visible=False, show_copy_button=True, label="Copy this URL:", interactive=False, show_label=True)
|
190 |
back_button = gr.Button("< Back", size="sm")
|
191 |
with gr.Column(scale=4, min_width=0):
|
192 |
pass
|
193 |
with gr.Column(scale=4, min_width=0):
|
194 |
pass
|
195 |
+
|
196 |
+
app_state = gr.State({})
|
197 |
|
198 |
T = TypeVar("T")
|
199 |
|
|
|
221 |
max_tokens=max_tokens,
|
222 |
stream=True,
|
223 |
top_p=0.8,
|
224 |
+
seed=42,
|
225 |
):
|
226 |
yield message.choices[0].delta.content
|
227 |
except requests.exceptions.ConnectionError as e:
|
|
|
232 |
|
233 |
|
234 |
def gen_datasets_line_by_line(search_query: str, generated_texts: tuple[str] = ()) -> Iterator[str]:
|
235 |
+
search_query = search_query or ""
|
236 |
search_query = search_query[:1000] if search_query.strip() else landing_page_query
|
237 |
generated_text = ""
|
238 |
current_line = ""
|
|
|
251 |
|
252 |
|
253 |
def gen_dataset_content(search_query: str, dataset_name: str, tags: str) -> Iterator[str]:
|
254 |
+
search_query = search_query or ""
|
255 |
search_query = search_query[:1000] if search_query.strip() else landing_page_query
|
256 |
generated_text = ""
|
257 |
for token in stream_reponse(GENERATE_DATASET_CONTENT_FOR_SEARCH_QUERY_AND_NAME_AND_TAGS.format(
|
|
|
263 |
yield generated_text
|
264 |
print("-----\n\n" + generated_text)
|
265 |
|
|
|
|
|
266 |
|
267 |
+
def _search_datasets(search_query):
|
268 |
+
yield {generated_texts_state: [], app_state: {"search_query": search_query}}
|
269 |
yield {
|
270 |
button_group: gr.Group(elem_classes="buttonsGroup insivibleButtonGroup")
|
271 |
for button_group in button_groups[MAX_NB_ITEMS_PER_GENERATION_CALL:]
|
|
|
299 |
}
|
300 |
current_item_idx += 1
|
301 |
|
302 |
+
search_js = """
|
303 |
+
(search_query => {
|
304 |
+
const urlParams = new URLSearchParams(window.location.search);
|
305 |
+
urlParams.set('q', search_query);
|
306 |
+
window.location.search = urlParams;
|
307 |
+
})
|
308 |
+
"""
|
309 |
+
|
310 |
+
@search_button.click(inputs=search_bar, outputs=button_groups + buttons + [generated_texts_state, app_state], js=search_js)
|
311 |
+
def search_dataset_from_search_button(search_query):
|
312 |
+
yield from _search_datasets(search_query)
|
313 |
+
|
314 |
+
@search_bar.submit(inputs=search_bar, outputs=button_groups + buttons + [generated_texts_state, app_state], js=search_js)
|
315 |
+
def search_dataset_from_search_bar(search_query):
|
316 |
+
yield from _search_datasets(search_query)
|
317 |
+
|
318 |
+
@load_more_datasets.click(inputs=[search_bar, generated_texts_state], outputs=button_groups + buttons + [generated_texts_state])
|
319 |
def search_more_datasets(search_query, generated_texts):
|
320 |
current_item_idx = initial_item_idx = len(generated_texts) * MAX_NB_ITEMS_PER_GENERATION_CALL
|
321 |
yield {
|
|
|
342 |
}
|
343 |
current_item_idx += 1
|
344 |
|
345 |
+
def _show_dataset(search_query, dataset_name, tags):
|
|
|
|
|
|
|
|
|
346 |
yield {
|
347 |
search_page: gr.Column(visible=False),
|
348 |
dataset_page: gr.Column(visible=True),
|
349 |
+
dataset_title: f"# {dataset_name}\n\n tags: {tags}\n\n _Note: This is an AI-generated dataset so its content may be inaccurate or false_",
|
350 |
+
dataset_share_textbox: gr.Textbox(visible=False),
|
351 |
+
app_state: {
|
352 |
+
"search_query": search_query,
|
353 |
+
"dataset_name": dataset_name,
|
354 |
+
"tags": tags
|
355 |
+
}
|
356 |
}
|
357 |
for generated_text in gen_dataset_content(search_query=search_query, dataset_name=dataset_name, tags=tags):
|
358 |
yield {dataset_content: generated_text}
|
359 |
|
360 |
|
361 |
+
show_dataset_inputs = [search_bar, *buttons]
|
362 |
+
show_dataset_outputs = [app_state, search_page, dataset_page, dataset_title, dataset_content, dataset_share_textbox]
|
363 |
|
364 |
+
def show_dataset_from_button(search_query, *buttons_values, i):
|
365 |
+
dataset_name, tags = buttons_values[2 * i : 2 * i + 2]
|
366 |
+
yield from _show_dataset(search_query, dataset_name, tags)
|
367 |
+
|
368 |
+
for i, (dataset_name_button, tags_button) in enumerate(batched(buttons, 2)):
|
369 |
+
dataset_name_button.click(partial(show_dataset_from_button, i=i), inputs=show_dataset_inputs, outputs=show_dataset_outputs)
|
370 |
+
tags_button.click(partial(show_dataset_from_button, i=i), inputs=show_dataset_inputs, outputs=show_dataset_outputs)
|
371 |
|
|
|
|
|
372 |
|
373 |
+
@back_button.click(outputs=[search_page, dataset_page])
|
374 |
+
def show_search_page():
|
375 |
+
return gr.Column(visible=True), gr.Column(visible=False)
|
376 |
|
377 |
+
@generate_full_dataset_button.click()
|
378 |
+
def generate_full_dataset():
|
379 |
+
raise gr.Error("Not implemented yet sorry ! Request your dataset in the Discussion tab (provide the dataset URL)")
|
380 |
+
|
381 |
+
@dataset_share_button.click(inputs=[app_state], outputs=[dataset_share_textbox])
|
382 |
+
def show_dataset_url(state):
|
383 |
+
return gr.Textbox(
|
384 |
+
f"{URL}?q={state['search_query'].replace(' ', '+')}&dataset={state['dataset_name']}&tags={state['tags']}",
|
385 |
+
visible=True,
|
386 |
+
)
|
387 |
+
|
388 |
+
@demo.load(outputs=[app_state, search_page, search_bar, dataset_page, dataset_title, dataset_content, dataset_share_textbox] + button_groups + buttons + [generated_texts_state])
|
389 |
+
def load_app(request: gr.Request):
|
390 |
+
query_params = dict(request.query_params)
|
391 |
+
if "dataset" in query_params:
|
392 |
+
yield from _show_dataset(
|
393 |
+
search_query=query_params.get("q", query_params["dataset"]),
|
394 |
+
dataset_name=query_params["dataset"],
|
395 |
+
tags=query_params.get("tags", "")
|
396 |
+
)
|
397 |
+
elif "q" in query_params:
|
398 |
+
yield {search_bar: query_params["q"]}
|
399 |
+
yield from _search_datasets(query_params["q"])
|
400 |
+
else:
|
401 |
+
yield {search_page: gr.Column(visible=True)}
|
402 |
|
|
|
|
|
403 |
|
404 |
demo.launch()
|