Spaces:
Runtime error
Runtime error
File size: 5,521 Bytes
c5ff415 2e2ab19 5c7bf4d 5f5d98b c5ff415 2e2ab19 5c7bf4d 2e2ab19 aad244d 5f5d98b c5ff415 5f5d98b aad244d 5f5d98b c5ff415 5f5d98b d10318a 5f5d98b c5ff415 5f5d98b c5ff415 2e2ab19 c5ff415 2e2ab19 5c7bf4d 5f5d98b c5ff415 65c52e1 c5ff415 c3141d7 c5ff415 93b5fbc 2e2ab19 c5ff415 5f5d98b 2e2ab19 5f5d98b 2e2ab19 5f5d98b 2e2ab19 c5ff415 cfca6ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
def get_matches(query, db_name="miread_contrastive"):
"""
Wrapper to call the similarity search on the required index
"""
matches = vecdbs[index_names.index(db_name)].similarity_search_with_score(query, k=60)
return matches
def inference(query, model="miread_contrastive"):
"""
This function processes information retrieved by the get_matches() function
Returns - Gradio update commands for the authors, abstracts and journals tablular output
"""
matches = get_matches(query, model)
auth_counts = {}
journal_bucket = {}
author_table = [] # Author table
abstract_table = [] # Abstract table
# Calculate normalized scores
scores = [round(match[1].item(), 3) for match in matches]
min_score, max_score = min(scores), max(scores)
normaliser = lambda x: round(1 - (x-min_score)/max_score, 3)
for i, (doc, score) in enumerate(matches):
norm_score = round(normaliser(round(score.item(), 3)), 3)
metadata = doc.metadata
# Extract metadata
title = metadata['title']
author = metadata['authors'][0].title()
date = metadata.get('date', 'None')
link = metadata.get('link', 'None')
submitter = metadata.get('submitter', 'None')
journal = metadata['journal'].strip() if metadata['journal'] else 'None'
# Update journal scores
if journal != 'None':
j_bucket[journal] = j_bucket.get(journal, 0) + norm_score
# Build author table (limit 2 entries per author)
if auth_counts.get(author, 0) < 2:
author_table.append([i+1, norm_score, author, title, link, date])
auth_counts[author] = auth_counts.get(author, 0) + 1
# Build abstract table
abstract_table.append([i+1, title, author, submitter, journal, date, link, norm_score])
# Build journal table
del j_bucket['None']
journal_table = [[i+1, j, s] for i, (j, s) in enumerate(
sorted(j_bucket.items(), key=lambda x: x[1], reverse=True)
)]
return [
gr.Dataframe.update(value=abstract_table, visible=True),
gr.Dataframe.update(value=journal_table, visible=True),
gr.Dataframe.update(value=author_table, visible=True)
]
index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
model_names = [
"biodatlab/MIReAD-Neuro-Large",
"biodatlab/MIReAD-Neuro-Contrastive",
"biodatlab/SciBERT-Neuro-Contrastive",
]
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedders = [HuggingFaceEmbeddings(
model_name=name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs) for name in model_names]
vecdbs = [
FAISS.load_local(index_name, faiss_embedder)
for index_name, faiss_embedder in zip(index_names, faiss_embedders)
]
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# NBDT Recommendation Engine for Editors")
gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click on the appropriate \"Find Matches\" button.\
Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
abst = gr.Textbox(label="Abstract", lines=10)
action_btn1 = gr.Button(value="Find Matches with MIReAD-Neuro-Large")
action_btn2 = gr.Button(value="Find Matches with MIReAD-Neuro-Contrastive")
action_btn3 = gr.Button(
value="Find Matches with SciBERT-Neuro-Contrastive")
with gr.Tab("Authors"):
n_output = gr.Dataframe(
headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
datatype=['number', 'number', 'str', 'str', 'str', 'str'],
col_count=(6, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Abstracts"):
a_output = gr.Dataframe(
headers=['No.', 'Title', 'Author', 'Corresponding Author',
'Journal', 'Date', 'Link', 'Score'],
datatype=['number', 'str', 'str', 'str',
'str', 'str', 'str', 'number'],
col_count=(8, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Journals"):
j_output = gr.Dataframe(
headers=['No.', 'Name', 'Score'],
datatype=['number', 'str', 'number'],
col_count=(3, "fixed"),
wrap=True,
visible=False
)
action_btn1.click(
fn=lambda x: inference(x, index_names[0]),
inputs=[abst],
outputs=[a_output, j_output, n_output],
api_name="neurojane"
)
action_btn2.click(
fn=lambda x: inference(x, index_names[1]),
inputs=[abst],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
action_btn3.click(
fn=lambda x: inference(x, index_names[2]),
inputs=[abst,],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
demo.launch(debug=True)
|