File size: 1,624 Bytes
1b69bf3
 
 
 
 
 
 
 
 
4025c5d
1b69bf3
f2b9694
1b69bf3
4025c5d
1b69bf3
 
 
 
 
 
4025c5d
1b69bf3
 
 
 
7c0d447
 
 
1b69bf3
 
 
 
da7e9fe
62338e3
1b69bf3
 
 
 
 
4074cac
1b69bf3
4074cac
da7e9fe
06b44ea
 
 
1b69bf3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import faiss
import gradio as gr
import numpy as np
import pandas as pd
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer

DIM = 768
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
print("Model loaded successfully")

papers_df = pd.read_csv("data/cvpr2024_papers_with_details.csv", index_col=None, on_bad_lines='skip')
papers_df = papers_df[~papers_df["summary"].isna() & ~papers_df["pdf_path"].isna()]
print("Data loaded successfully")

with open('data/embeddings.npy', 'rb') as f:
    embeddings = np.load(f)

index = faiss.IndexFlatL2(DIM)
index.add(embeddings)
print("Index loaded successfully")


def encode_query(query):
    query_embeddings = model.encode([query], convert_to_tensor=True)
    query_embeddings = F.layer_norm(query_embeddings, normalized_shape=(query_embeddings.shape[1],))
    query_embeddings = query_embeddings[:, :DIM]
    query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
    return query_embeddings

def search_nearest_papers(query, k=5):
    query_embeddings = encode_query(query)
    D, I = index.search(query_embeddings, k)
    return papers_df.iloc[I[0]][["Title", "arXiv_link"]]

demo = gr.Interface(
    search_nearest_papers,
    [
        "text",
        gr.Slider(1, 10, value=5),
    ],
    gr.Dataframe(
            headers=["Title", "PDF"],
    ),
    title="CVPR 2024 Paper Search",
    description="Semantic search over CPVR 2024 paper summary. This app was made using the data available on https://github.com/harpreetsahota204/CVPR-2024-Papers.",
)

if __name__ == "__main__":
    demo.launch()