acrowth commited on
Commit
0cfd68a
·
1 Parent(s): 0e299fb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack.document_stores import InMemoryDocumentStore
2
+ import pandas as pd
3
+ import gradio as gr
4
+
5
+ df=pd.read_parquet('df.parquet')
6
+ candidats=pd.read_parquet('candidats.parquet')
7
+
8
+ document_store = InMemoryDocumentStore(use_bm25=True)
9
+ docs=df.drop_duplicates(subset=['fileclean']).rename(columns={'fileclean':'content'}).to_dict(orient='records')
10
+ document_store.write_documents(docs)
11
+ from haystack.nodes import BM25Retriever
12
+ retriever = BM25Retriever(document_store=document_store)
13
+ from haystack.pipelines import DocumentSearchPipeline
14
+ pipeline = DocumentSearchPipeline(retriever=retriever)
15
+
16
+ def semanticsearch(query):
17
+ result = pipeline.run(
18
+ query=query,
19
+ params={
20
+ "Retriever": {
21
+ "top_k": 10
22
+ }
23
+ },debug=False
24
+ )
25
+ results=[]
26
+ for document in result['documents']:
27
+ result=document.to_dict()
28
+ for c in ['content_type','embedding','id']:
29
+ result.pop(c)
30
+ results.append(result)
31
+ results=pd.DataFrame(results)
32
+ return results
33
+
34
+ demo = gr.Interface(
35
+ semanticsearch,
36
+ [
37
+ gr.Dropdown([candidats.sort_values(by='text').text.tolist()]),
38
+ ],
39
+ [gr.Dataframe()]
40
+
41
+ )
42
+
43
+ if __name__ == "__main__":
44
+ demo.launch()