Add initial implementation of BestRAG library with Streamlit app and README updates
Browse files- README.md +36 -1
- app.py +83 -0
- requirements.txt +2 -0
README.md
CHANGED
@@ -11,4 +11,39 @@ license: mit
|
|
11 |
short_description: 'BestRAG: Hybrid Retrieval-Augmented Generation library'
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
short_description: 'BestRAG: Hybrid Retrieval-Augmented Generation library'
|
12 |
---
|
13 |
|
14 |
+
# BestRAG - Hybrid Retrieval-Augmented Generation (RAG)
|
15 |
+
|
16 |
+
**BestRAG** is a Python library that leverages a hybrid Retrieval-Augmented Generation (RAG) approach to efficiently store and retrieve embeddings. By combining dense, sparse, and late interaction embeddings, BestRAG offers a robust solution for managing large datasets.
|
17 |
+
|
18 |
+
## Features
|
19 |
+
|
20 |
+
- π **Hybrid RAG**: Utilizes dense, sparse, and late interaction embeddings for enhanced performance.
|
21 |
+
- π **Easy Integration**: Simple API for storing and searching embeddings.
|
22 |
+
- π **PDF Support**: Directly store embeddings from PDF documents.
|
23 |
+
|
24 |
+
## How to Use
|
25 |
+
|
26 |
+
1. **Initialize BestRAG**: Enter your Qdrant URL, API Key, and Collection Name, then click "Initialize BestRAG".
|
27 |
+
2. **Create Embeddings**: Upload a PDF file and click "Create Embeddings" to store embeddings.
|
28 |
+
3. **Search Embeddings**: Enter a search query and set the limit, then click "Search" to retrieve results.
|
29 |
+
|
30 |
+
## Installation
|
31 |
+
|
32 |
+
You can use BestRAG freely by installing it with:
|
33 |
+
|
34 |
+
```bash
|
35 |
+
pip install bestrag
|
36 |
+
```
|
37 |
+
|
38 |
+
For more details, visit the [GitHub repository](https://github.com/samadpls/BestRAG).
|
39 |
+
|
40 |
+
[](https://github.com/samadpls/BestRAG)
|
41 |
+
[](https://pypi.org/project/bestrag/)
|
42 |
+
|
43 |
+
> **Note**: Qdrant offers a free tier with 4GB of storage. To generate your API key and endpoint, visit [Qdrant](https://qdrant.tech/).
|
44 |
+
|
45 |
+
Made with β€οΈ by [samadpls](https://github.com/samadpls)
|
46 |
+
|
47 |
+
---
|
48 |
+
|
49 |
+
Please like this project on [GitHub](https://github.com/samadpls/BestRAG) if you find it useful!
|
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import streamlit as st
|
3 |
+
from bestrag import BestRAG
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Streamlit app title
|
7 |
+
col1, col2 = st.columns([1, 5])
|
8 |
+
with col1:
|
9 |
+
st.image("https://github.com/user-attachments/assets/e23d11d5-2d7b-44e2-aa11-59ddcb66bebc", width=140)
|
10 |
+
with col2:
|
11 |
+
st.title("BestRAG - Hybrid Retrieval-Augmented Generation (RAG)")
|
12 |
+
|
13 |
+
st.markdown("""
|
14 |
+
[](https://github.com/samadpls/BestRAG)
|
15 |
+
[](https://pypi.org/project/bestrag/)
|
16 |
+
|
17 |
+
> **Note**: Qdrant offers a free tier with 4GB of storage. To generate your API key and endpoint, visit [Qdrant](https://qdrant.tech/).
|
18 |
+
|
19 |
+
You can use BestRAG freely by installing it with `pip install bestrag`. For more details, visit the [GitHub repository](https://github.com/samadpls/BestRAG).
|
20 |
+
|
21 |
+
Made with β€οΈ by [samadpls](https://github.com/samadpls)
|
22 |
+
""")
|
23 |
+
|
24 |
+
# Input fields for BestRAG initialization
|
25 |
+
url = st.text_input("Qdrant URL", "https://YOUR_QDRANT_URL")
|
26 |
+
api_key = st.text_input("Qdrant API Key", "YOUR_API_KEY")
|
27 |
+
collection_name = st.text_input("Collection Name", "YOUR_COLLECTION_NAME")
|
28 |
+
|
29 |
+
# Initialize BestRAG only when the user clicks a button
|
30 |
+
if st.button("Initialize BestRAG"):
|
31 |
+
st.session_state['rag'] = BestRAG(url=url, api_key=api_key, collection_name=collection_name)
|
32 |
+
st.success("BestRAG initialized successfully!")
|
33 |
+
|
34 |
+
# Check if BestRAG is initialized
|
35 |
+
if 'rag' in st.session_state:
|
36 |
+
rag = st.session_state['rag']
|
37 |
+
|
38 |
+
# Tabs for different functionalities
|
39 |
+
tab1, tab2 = st.tabs(["Create Embeddings", "Search Embeddings"])
|
40 |
+
|
41 |
+
with tab1:
|
42 |
+
st.header("Create Embeddings")
|
43 |
+
|
44 |
+
# File uploader for PDF
|
45 |
+
pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
|
46 |
+
|
47 |
+
if st.button("Create Embeddings"):
|
48 |
+
if pdf_file is not None:
|
49 |
+
# Save the uploaded PDF to a temporary file
|
50 |
+
temp_pdf_path = os.path.join("/tmp", pdf_file.name)
|
51 |
+
with open(temp_pdf_path, "wb") as f:
|
52 |
+
f.write(pdf_file.getbuffer())
|
53 |
+
|
54 |
+
# Use the uploaded PDF's name
|
55 |
+
pdf_name = pdf_file.name
|
56 |
+
|
57 |
+
# Store PDF embeddings
|
58 |
+
rag.store_pdf_embeddings(temp_pdf_path, pdf_name)
|
59 |
+
st.success(f"Embeddings created for {pdf_name}")
|
60 |
+
else:
|
61 |
+
st.error("Please upload a PDF file.")
|
62 |
+
|
63 |
+
with tab2:
|
64 |
+
st.header("Search Embeddings")
|
65 |
+
|
66 |
+
# Input fields for search
|
67 |
+
query = st.text_input("Search Query", "example query")
|
68 |
+
limit = st.number_input("Limit", min_value=1, max_value=20, value=5)
|
69 |
+
|
70 |
+
if st.button("Search"):
|
71 |
+
# Perform search
|
72 |
+
results = rag.search(query, limit)
|
73 |
+
|
74 |
+
# Display results
|
75 |
+
st.subheader("Search Results")
|
76 |
+
for result in results.points:
|
77 |
+
st.json({
|
78 |
+
"id": result.id,
|
79 |
+
"score": result.score,
|
80 |
+
"payload": result.payload
|
81 |
+
})
|
82 |
+
else:
|
83 |
+
st.warning("Please initialize BestRAG first.")
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
bestrag
|