Spaces:
Runtime error
Runtime error
File size: 5,699 Bytes
9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import torch
def get_matches1(query):
matches = vecdb1.similarity_search_with_score(query, k=60)
return matches
def get_matches2(query):
matches = vecdb2.similarity_search_with_score(query, k=60)
return matches
def inference(query,method=1):
if method==1:
matches = get_matches1(query)
else:
matches = get_matches2(query)
auth_counts = {}
j_bucket = {}
n_table = []
a_table = []
scores = [round(match[1].item(), 3) for match in matches]
min_score = min(scores)
max_score = max(scores)
def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
for i, match in enumerate(matches):
doc = match[0]
score = round(normaliser(round(match[1].item(), 3)), 3)
title = doc.metadata['title']
author = doc.metadata['authors'][0].title()
date = doc.metadata.get('date', 'None')
link = doc.metadata.get('link', 'None')
submitter = doc.metadata.get('submitter', 'None')
# journal = doc.metadata.get('journal', 'None').strip()
journal = doc.metadata['journal']
if (journal is None or journal.strip() == ''):
journal = 'None'
else:
journal = journal.strip()
# For journals
if journal not in j_bucket:
j_bucket[journal] = score
else:
j_bucket[journal] += score
# For authors
record = [i+1,
score,
author,
title,
link,
date]
if auth_counts.get(author, 0) < 2:
n_table.append(record)
if auth_counts.get(author, 0) == 0:
auth_counts[author] = 1
else:
auth_counts[author] += 1
# For abstracts
record = [i+1,
title,
author,
submitter,
journal,
date,
link,
score
]
a_table.append(record)
del j_bucket['None']
j_table = sorted([[journal, round(score, 3)] for journal,
score in j_bucket.items()],
key=lambda x: x[1], reverse=True)
j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
j_output = gr.Dataframe.update(value=j_table, visible=True)
n_output = gr.Dataframe.update(value=n_table, visible=True)
a_output = gr.Dataframe.update(value=a_table, visible=True)
return [a_output, j_output, n_output]
def inference1(query):
return inference(query,1)
def inference2(query):
return inference(query,2)
model1_name = "biodatlab/MIReAD-Neuro-Large"
model2_name = "biodatlab/MIReAD-Neuro-Contrastive"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder1 = HuggingFaceEmbeddings(
model_name=model1_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
faiss_embedder2 = HuggingFaceEmbeddings(
model_name=model2_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
vecdb1 = FAISS.load_local("nbdt_index", faiss_embedder1)
vecdb2 = FAISS.load_local("indexes", faiss_embedder2)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# NBDT Recommendation Engine for Editors")
gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
abst = gr.Textbox(label="Abstract", lines=10)
action_btn = gr.Button(value="Find Matches with Normal Model")
action2_btn = gr.Button(value="Find Matches with Contrastive Model")
with gr.Tab("Authors"):
n_output = gr.Dataframe(
headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
datatype=['number', 'number', 'str', 'str', 'str', 'str'],
col_count=(6, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Abstracts"):
a_output = gr.Dataframe(
headers=['No.', 'Title', 'Author', 'Corresponding Author',
'Journal', 'Date', 'Link', 'Score'],
datatype=['number', 'str', 'str', 'str',
'str', 'str', 'str', 'number'],
col_count=(8, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Journals"):
j_output = gr.Dataframe(
headers=['No.', 'Name', 'Score'],
datatype=['number', 'str', 'number'],
col_count=(3, "fixed"),
wrap=True,
visible=False
)
action_btn.click(fn=inference1,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
action2_btn.click(fn=inference2,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
demo.launch(debug=True)
|