davidberenstein1957 commited on
Commit
502be4c
·
verified ·
1 Parent(s): 44a8b04

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import CrossEncoder
3
+ import pandas as pd
4
+
5
+ reranker = CrossEncoder("sentence-transformers/all-MiniLM-L12-v2")
6
+
7
+
8
+ def rerank_documents(query: str, documents: pd.DataFrame) -> pd.DataFrame:
9
+ documents = documents.copy()
10
+ documents = documents.drop_duplicates("chunk")
11
+ documents["rank"] = reranker.predict([[query, hit] for hit in documents["chunk"]])
12
+ documents = documents.sort_values(by="rank", ascending=False)
13
+ return documents
14
+
15
+
16
+ with gr.Blocks() as demo:
17
+ gr.Markdown("""# RAG Hub Datasets
18
+
19
+ Part of [smol blueprint](https://github.com/davidberenstein1957/smol-blueprint) - a smol blueprint for AI development, focusing on practical examples of RAG, information extraction, analysis and fine-tuning in the age of LLMs.""")
20
+
21
+ query_input = gr.Textbox(
22
+ label="Query", placeholder="Enter your question here...", lines=3
23
+ )
24
+ documents_input = gr.Dataframe(
25
+ label="Documents", headers=["chunk"], wrap=True, interactive=True
26
+ )
27
+
28
+ submit_btn = gr.Button("Submit")
29
+ documents_output = gr.Dataframe(
30
+ label="Documents", headers=["chunk", "rank"], wrap=True
31
+ )
32
+
33
+ submit_btn.click(
34
+ fn=rerank_documents,
35
+ inputs=[query_input, documents_input],
36
+ outputs=[documents_output],
37
+ )
38
+
39
+ demo.launch()