File size: 5,521 Bytes
c5ff415
 
 
 
 
2e2ab19
5c7bf4d
 
 
5f5d98b
c5ff415
 
 
2e2ab19
5c7bf4d
 
 
 
2e2ab19
aad244d
5f5d98b
 
 
 
 
c5ff415
5f5d98b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aad244d
5f5d98b
 
 
 
 
c5ff415
5f5d98b
d10318a
5f5d98b
 
 
c5ff415
5f5d98b
 
 
 
 
c5ff415
 
2e2ab19
 
 
 
 
 
c5ff415
 
2e2ab19
5c7bf4d
 
 
5f5d98b
 
 
 
c5ff415
 
 
 
 
65c52e1
c5ff415
c3141d7
c5ff415
 
 
93b5fbc
 
2e2ab19
 
c5ff415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f5d98b
 
 
2e2ab19
5f5d98b
 
 
 
 
2e2ab19
 
5f5d98b
 
 
2e2ab19
 
c5ff415
cfca6ca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings


def get_matches(query, db_name="miread_contrastive"):
    """
    Wrapper to call the similarity search on the required index
    """
    matches = vecdbs[index_names.index(db_name)].similarity_search_with_score(query, k=60)
    return matches


def inference(query, model="miread_contrastive"):
    """
    This function processes information retrieved by the get_matches() function
    Returns - Gradio update commands for the authors, abstracts and journals tablular output
    """
    matches = get_matches(query, model)
    auth_counts = {}
    journal_bucket = {}
    author_table = []  # Author table
    abstract_table = []  # Abstract table

    # Calculate normalized scores
    scores = [round(match[1].item(), 3) for match in matches]
    min_score, max_score = min(scores), max(scores)
    normaliser = lambda x: round(1 - (x-min_score)/max_score, 3)

    for i, (doc, score) in enumerate(matches):
        norm_score = round(normaliser(round(score.item(), 3)), 3)
        metadata = doc.metadata
       
        # Extract metadata
        title = metadata['title']
        author = metadata['authors'][0].title()
        date = metadata.get('date', 'None')
        link = metadata.get('link', 'None')
        submitter = metadata.get('submitter', 'None')
        journal = metadata['journal'].strip() if metadata['journal'] else 'None'

        # Update journal scores
        if journal != 'None':
            j_bucket[journal] = j_bucket.get(journal, 0) + norm_score

        # Build author table (limit 2 entries per author)
        if auth_counts.get(author, 0) < 2:
            author_table.append([i+1, norm_score, author, title, link, date])
            auth_counts[author] = auth_counts.get(author, 0) + 1

        # Build abstract table
        abstract_table.append([i+1, title, author, submitter, journal, date, link, norm_score])

    # Build journal table
    del j_bucket['None']
    journal_table = [[i+1, j, s] for i, (j, s) in enumerate(
        sorted(j_bucket.items(), key=lambda x: x[1], reverse=True)
    )]

    return [
        gr.Dataframe.update(value=abstract_table, visible=True),
        gr.Dataframe.update(value=journal_table, visible=True), 
        gr.Dataframe.update(value=author_table, visible=True)
    ]


index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
model_names = [
    "biodatlab/MIReAD-Neuro-Large",
    "biodatlab/MIReAD-Neuro-Contrastive",
    "biodatlab/SciBERT-Neuro-Contrastive",
]
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedders = [HuggingFaceEmbeddings(
    model_name=name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs) for name in model_names]
vecdbs = [
    FAISS.load_local(index_name, faiss_embedder)
    for index_name, faiss_embedder in zip(index_names, faiss_embedders)
]

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# NBDT Recommendation Engine for Editors")
    gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
    It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
    To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click on the appropriate \"Find Matches\" button.\
    Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
    The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")

    abst = gr.Textbox(label="Abstract", lines=10)

    action_btn1 = gr.Button(value="Find Matches with MIReAD-Neuro-Large")
    action_btn2 = gr.Button(value="Find Matches with MIReAD-Neuro-Contrastive")
    action_btn3 = gr.Button(
        value="Find Matches with SciBERT-Neuro-Contrastive")

    with gr.Tab("Authors"):
        n_output = gr.Dataframe(
            headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
            datatype=['number', 'number', 'str', 'str', 'str', 'str'],
            col_count=(6, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Abstracts"):
        a_output = gr.Dataframe(
            headers=['No.', 'Title', 'Author', 'Corresponding Author',
                     'Journal', 'Date', 'Link', 'Score'],
            datatype=['number', 'str', 'str', 'str',
                      'str', 'str', 'str', 'number'],
            col_count=(8, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Journals"):
        j_output = gr.Dataframe(
            headers=['No.', 'Name', 'Score'],
            datatype=['number', 'str', 'number'],
            col_count=(3, "fixed"),
            wrap=True,
            visible=False
        )

    action_btn1.click(
        fn=lambda x: inference(x, index_names[0]),
        inputs=[abst],
        outputs=[a_output, j_output, n_output],
        api_name="neurojane"
    )
    action_btn2.click(
        fn=lambda x: inference(x, index_names[1]),
        inputs=[abst],
        outputs=[a_output, j_output, n_output],
        api_name="neurojane")
    action_btn3.click(
        fn=lambda x: inference(x, index_names[2]),
        inputs=[abst,],
        outputs=[a_output, j_output, n_output],
        api_name="neurojane")

demo.launch(debug=True)