Jai12345 commited on
Commit
0d0e374
·
1 Parent(s): 6c811e7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer, CrossEncoder, util
2
+ import torch
3
+ import pickle
4
+ import pandas as pd
5
+
6
+
7
+ bi_encoder = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
8
+ cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
9
+ corpus_embeddings = pd.read_pickle("corpus_embeddings_cpu.pkl")
10
+ corpus = pd.read_pickle("corpus.pkl")
11
+
12
+
13
+ def search(query, top_k=100):
14
+ print("Top 5 Answer by the NSE:")
15
+ print()
16
+ ans = []
17
+ ##### Sematic Search #####
18
+ # Encode the query using the bi-encoder and find potentially relevant passages
19
+ question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
20
+ hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k)
21
+ hits = hits[0] # Get the hits for the first query
22
+
23
+ ##### Re-Ranking #####
24
+ # Now, score all retrieved passages with the cross_encoder
25
+ cross_inp = [[query, corpus[hit['corpus_id']]] for hit in hits]
26
+ cross_scores = cross_encoder.predict(cross_inp)
27
+
28
+ # Sort results by the cross-encoder scores
29
+ for idx in range(len(cross_scores)):
30
+ hits[idx]['cross-score'] = cross_scores[idx]
31
+
32
+ hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
33
+
34
+ for idx, hit in enumerate(hits[0:5]):
35
+ ans.append(corpus[hit['corpus_id']])
36
+ return ans[0], ans[1], ans[2], ans[3], ans[4]
37
+
38
+
39
+ exp = ["Who is steve jobs?", "What is coldplay?", "What is a turing test?",
40
+ "What is the most interesting thing about our universe?", "What are the most beautiful places on earth?"]
41
+
42
+ desc = "This is a semantic search engine powered by SentenceTransformers (Nils_Reimers) with a retrieval and reranking system on Wikipedia corous. This will return the top 5 results. So Quest on with Transformers."
43
+
44
+ inp = gr.inputs.Textbox(lines=1, placeholder=None, default="", label="search you query here")
45
+ out = gr.outputs.Textbox(type="auto", label="search results")
46
+
47
+ iface = gr.Interface(fn=search, inputs=inp, outputs=[out, out, out, out, out], examples=exp, article=desc,
48
+ title="Search Engine", theme="huggingface", layout='vertical')
49
+ iface.launch()