freud_rag / app.py
ruggsea's picture
initial
a37b18d
raw
history blame
4.1 kB
import streamlit as st
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
# Page config
st.set_page_config(
page_title="Freud Works Search",
page_icon="πŸ“š",
layout="wide"
)
# Title
st.title("Freud Works Hybrid Search")
st.markdown("""
This demo allows you to search through Freud's complete works using a hybrid approach combining:
- BM25 (keyword-based search)
- Vector search (semantic similarity)
""")
@st.cache_resource
def load_indices():
"""Load the index and create retrievers"""
# Load embeddings
embed_model = HuggingFaceEmbedding(model_name="multi-qa-MiniLM-L6-cos-v1")
Settings.embed_model = embed_model
# Load index
storage_context = StorageContext.from_defaults(persist_dir="freud_index")
index = load_index_from_storage(storage_context=storage_context)
# Create retrievers
vector_retriever = index.as_retriever(similarity_top_k=10)
bm25_retriever = BM25Retriever.from_defaults(
index, similarity_top_k=10
)
# Create hybrid retriever
hybrid_retriever = QueryFusionRetriever(
[vector_retriever, bm25_retriever],
similarity_top_k=10,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=True,
)
return index, vector_retriever, bm25_retriever, hybrid_retriever
# Load indices
index, vector_retriever, bm25_retriever, hybrid_retriever = load_indices()
# Search interface
search_query = st.text_input("Enter your search query:", placeholder="e.g. Oedipus complex")
# Add top_k selector
top_k = st.slider("Number of results to return:", min_value=1, max_value=20, value=10)
# Update retrievers with new top_k
vector_retriever.similarity_top_k = top_k
bm25_retriever.similarity_top_k = top_k
hybrid_retriever.similarity_top_k = top_k
# Search type selector
search_type = st.radio(
"Select search method:",
["Hybrid", "Vector", "BM25"],
horizontal=True,
help="""
- **BM25**: Keyword-based search that works best for exact matches and specific terms. Similar to traditional search engines.
- **Vector**: Semantic search that understands the meaning of your query, even if it uses different words than the source text.
- **Hybrid**: Combines both approaches for better overall results, balancing exact matches with semantic understanding.
"""
)
if search_query:
with st.spinner('Searching...'):
if search_type == "Hybrid":
nodes = hybrid_retriever.retrieve(search_query)
elif search_type == "Vector":
nodes = vector_retriever.retrieve(search_query)
else: # BM25
nodes = bm25_retriever.retrieve(search_query)
# Display results
st.subheader(f"Search Results")
for i, node in enumerate(nodes, 1):
# Create a preview of the text (first 200 characters)
preview = node.text[:200] + "..." if len(node.text) > 200 else node.text
# Format score to 3 decimal places
score = f"{node.score:.3f}" if hasattr(node, 'score') else "N/A"
# Create expandable container with new title format
with st.expander(f"Result {i} (score: {score})\n\n{preview}", expanded=False):
st.markdown(node.text)
if node.metadata:
st.markdown("---")
st.markdown("**Source:**")
st.json(node.metadata)
# Add sidebar with information
with st.sidebar:
st.header("About")
st.markdown("""
This demo searches through Freud's complete works using:
- **BM25**: Traditional keyword-based search
- **Vector Search**: Semantic similarity using embeddings
- **Hybrid**: Combines both approaches
""")