m7n commited on
Commit
1951cf0
·
verified ·
1 Parent(s): 2261de4

Update app.py

Browse files

Added sample reduction method button, added logic for this, set Button to main

Files changed (1) hide show
  1. app.py +20 -8
app.py CHANGED
@@ -239,7 +239,7 @@ print(f"Language model is set up: {time.strftime('%Y-%m-%d %H:%M:%S')}")
239
 
240
 
241
 
242
- def predict(text_input, sample_size_slider, reduce_sample_checkbox, progress=gr.Progress()):
243
 
244
  print('getting data to project')
245
  progress(0, desc="Starting...")
@@ -259,6 +259,7 @@ def predict(text_input, sample_size_slider, reduce_sample_checkbox, progress=gr.
259
 
260
 
261
 
 
262
  records_df = pd.DataFrame(records)
263
  records_df['abstract'] = [invert_abstract(t) for t in records_df['abstract_inverted_index']]
264
 
@@ -271,11 +272,18 @@ def predict(text_input, sample_size_slider, reduce_sample_checkbox, progress=gr.
271
 
272
 
273
 
 
274
 
275
  if reduce_sample_checkbox:
276
- records_df = records_df.sample(sample_size_slider)
 
 
 
 
 
 
277
  print(records_df)
278
-
279
 
280
  progress(0.3, desc="Embedding Data...")
281
  texts_to_embedd = [title + tokenizer.sep_token + publication + tokenizer.sep_token + abstract for title, publication, abstract in zip(records_df['title'],records_df['parsed_publication'], records_df['abstract'])]
@@ -422,14 +430,18 @@ with gr.Blocks() as block:
422
  # You're a philosopher of science who wonders where the concept of a fitness landscape has appeared...
423
 
424
  with gr.Column():
425
- text_input = gr.Textbox(label="OpenAlex Fulltext-Search")
426
- sample_size_slider = gr.Slider(label="Sample Size", minimum=10, maximum=20000, step=10, value=1000)
427
- reduce_sample_checkbox = gr.Checkbox(label="Reduce Sample Size", value=True)
428
- new_btn = gr.Button("Run Query")
 
 
 
 
429
  markdown = gr.Markdown(label="")
430
  html = gr.HTML(label="HTML preview", show_label=True)
431
 
432
- new_btn.click(fn=predict, inputs=[text_input, sample_size_slider, reduce_sample_checkbox], outputs=[markdown, html])
433
 
434
 
435
 
 
239
 
240
 
241
 
242
+ def predict(text_input, sample_size_slider, reduce_sample_checkbox,sample_reduction_method, progress=gr.Progress()):
243
 
244
  print('getting data to project')
245
  progress(0, desc="Starting...")
 
259
 
260
 
261
 
262
+
263
  records_df = pd.DataFrame(records)
264
  records_df['abstract'] = [invert_abstract(t) for t in records_df['abstract_inverted_index']]
265
 
 
272
 
273
 
274
 
275
+
276
 
277
  if reduce_sample_checkbox:
278
+ sample_size = min(sample_size_slider, len(records_df))
279
+ if sample_reduction_method == "Random":
280
+ records_df = records_df.sample(sample_size)
281
+
282
+ elif sample_reduction_method == "Order of Results":
283
+ records_df = records_df.iloc[:sample_size]
284
+
285
  print(records_df)
286
+
287
 
288
  progress(0.3, desc="Embedding Data...")
289
  texts_to_embedd = [title + tokenizer.sep_token + publication + tokenizer.sep_token + abstract for title, publication, abstract in zip(records_df['title'],records_df['parsed_publication'], records_df['abstract'])]
 
430
  # You're a philosopher of science who wonders where the concept of a fitness landscape has appeared...
431
 
432
  with gr.Column():
433
+ text_input = gr.Textbox(label="OpenAlex-search URL")
434
+ with gr.Row():
435
+ reduce_sample_checkbox = gr.Checkbox(label="Reduce Sample Size", value=True, info="Reduce sample size.")
436
+ sample_size_slider = gr.Slider(label="Sample Size", minimum=10, maximum=20000, step=10, value=1000, info="How many samples to keep.")
437
+ sample_reduction_method = gr.Dropdown(["Order of Results", "Random"], label="Order of Results", info="How to choose the samples to keep.")
438
+
439
+
440
+ new_btn = gr.Button("Run Query",variant='primary')
441
  markdown = gr.Markdown(label="")
442
  html = gr.HTML(label="HTML preview", show_label=True)
443
 
444
+ new_btn.click(fn=predict, inputs=[text_input, sample_size_slider, reduce_sample_checkbox,sample_reduction_method], outputs=[markdown, html])
445
 
446
 
447