Spaces:
Build error
Build error
Update app.py
Browse filesAdded sample reduction method button, added logic for this, set Button to main
app.py
CHANGED
@@ -239,7 +239,7 @@ print(f"Language model is set up: {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
239 |
|
240 |
|
241 |
|
242 |
-
def predict(text_input, sample_size_slider, reduce_sample_checkbox, progress=gr.Progress()):
|
243 |
|
244 |
print('getting data to project')
|
245 |
progress(0, desc="Starting...")
|
@@ -259,6 +259,7 @@ def predict(text_input, sample_size_slider, reduce_sample_checkbox, progress=gr.
|
|
259 |
|
260 |
|
261 |
|
|
|
262 |
records_df = pd.DataFrame(records)
|
263 |
records_df['abstract'] = [invert_abstract(t) for t in records_df['abstract_inverted_index']]
|
264 |
|
@@ -271,11 +272,18 @@ def predict(text_input, sample_size_slider, reduce_sample_checkbox, progress=gr.
|
|
271 |
|
272 |
|
273 |
|
|
|
274 |
|
275 |
if reduce_sample_checkbox:
|
276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
print(records_df)
|
278 |
-
|
279 |
|
280 |
progress(0.3, desc="Embedding Data...")
|
281 |
texts_to_embedd = [title + tokenizer.sep_token + publication + tokenizer.sep_token + abstract for title, publication, abstract in zip(records_df['title'],records_df['parsed_publication'], records_df['abstract'])]
|
@@ -422,14 +430,18 @@ with gr.Blocks() as block:
|
|
422 |
# You're a philosopher of science who wonders where the concept of a fitness landscape has appeared...
|
423 |
|
424 |
with gr.Column():
|
425 |
-
text_input = gr.Textbox(label="OpenAlex
|
426 |
-
|
427 |
-
|
428 |
-
|
|
|
|
|
|
|
|
|
429 |
markdown = gr.Markdown(label="")
|
430 |
html = gr.HTML(label="HTML preview", show_label=True)
|
431 |
|
432 |
-
new_btn.click(fn=predict, inputs=[text_input, sample_size_slider, reduce_sample_checkbox], outputs=[markdown, html])
|
433 |
|
434 |
|
435 |
|
|
|
239 |
|
240 |
|
241 |
|
242 |
+
def predict(text_input, sample_size_slider, reduce_sample_checkbox,sample_reduction_method, progress=gr.Progress()):
|
243 |
|
244 |
print('getting data to project')
|
245 |
progress(0, desc="Starting...")
|
|
|
259 |
|
260 |
|
261 |
|
262 |
+
|
263 |
records_df = pd.DataFrame(records)
|
264 |
records_df['abstract'] = [invert_abstract(t) for t in records_df['abstract_inverted_index']]
|
265 |
|
|
|
272 |
|
273 |
|
274 |
|
275 |
+
|
276 |
|
277 |
if reduce_sample_checkbox:
|
278 |
+
sample_size = min(sample_size_slider, len(records_df))
|
279 |
+
if sample_reduction_method == "Random":
|
280 |
+
records_df = records_df.sample(sample_size)
|
281 |
+
|
282 |
+
elif sample_reduction_method == "Order of Results":
|
283 |
+
records_df = records_df.iloc[:sample_size]
|
284 |
+
|
285 |
print(records_df)
|
286 |
+
|
287 |
|
288 |
progress(0.3, desc="Embedding Data...")
|
289 |
texts_to_embedd = [title + tokenizer.sep_token + publication + tokenizer.sep_token + abstract for title, publication, abstract in zip(records_df['title'],records_df['parsed_publication'], records_df['abstract'])]
|
|
|
430 |
# You're a philosopher of science who wonders where the concept of a fitness landscape has appeared...
|
431 |
|
432 |
with gr.Column():
|
433 |
+
text_input = gr.Textbox(label="OpenAlex-search URL")
|
434 |
+
with gr.Row():
|
435 |
+
reduce_sample_checkbox = gr.Checkbox(label="Reduce Sample Size", value=True, info="Reduce sample size.")
|
436 |
+
sample_size_slider = gr.Slider(label="Sample Size", minimum=10, maximum=20000, step=10, value=1000, info="How many samples to keep.")
|
437 |
+
sample_reduction_method = gr.Dropdown(["Order of Results", "Random"], label="Order of Results", info="How to choose the samples to keep.")
|
438 |
+
|
439 |
+
|
440 |
+
new_btn = gr.Button("Run Query",variant='primary')
|
441 |
markdown = gr.Markdown(label="")
|
442 |
html = gr.HTML(label="HTML preview", show_label=True)
|
443 |
|
444 |
+
new_btn.click(fn=predict, inputs=[text_input, sample_size_slider, reduce_sample_checkbox,sample_reduction_method], outputs=[markdown, html])
|
445 |
|
446 |
|
447 |
|