|
import streamlit as st |
|
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage |
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
from llama_index.core import Settings |
|
from llama_index.retrievers.bm25 import BM25Retriever |
|
from llama_index.core.retrievers import QueryFusionRetriever |
|
|
|
|
|
st.set_page_config( |
|
page_title="Freud Works Search", |
|
page_icon="π", |
|
layout="wide" |
|
) |
|
|
|
|
|
st.title("Freud Works Hybrid Search") |
|
st.markdown(""" |
|
This demo allows you to search through Freud's complete works using a hybrid approach combining: |
|
- BM25 (keyword-based search) |
|
- Vector search (semantic similarity) |
|
""") |
|
|
|
@st.cache_resource |
|
def load_indices(): |
|
"""Load the index and create retrievers""" |
|
|
|
embed_model = HuggingFaceEmbedding(model_name="multi-qa-MiniLM-L6-cos-v1") |
|
Settings.embed_model = embed_model |
|
|
|
|
|
storage_context = StorageContext.from_defaults(persist_dir="freud_index") |
|
index = load_index_from_storage(storage_context=storage_context) |
|
|
|
|
|
vector_retriever = index.as_retriever(similarity_top_k=10) |
|
bm25_retriever = BM25Retriever.from_defaults( |
|
index, similarity_top_k=10 |
|
) |
|
|
|
|
|
hybrid_retriever = QueryFusionRetriever( |
|
[vector_retriever, bm25_retriever], |
|
similarity_top_k=10, |
|
num_queries=1, |
|
mode="reciprocal_rerank", |
|
use_async=True, |
|
verbose=True, |
|
) |
|
|
|
return index, vector_retriever, bm25_retriever, hybrid_retriever |
|
|
|
|
|
index, vector_retriever, bm25_retriever, hybrid_retriever = load_indices() |
|
|
|
|
|
search_query = st.text_input("Enter your search query:", placeholder="e.g. Oedipus complex") |
|
|
|
|
|
top_k = st.slider("Number of results to return:", min_value=1, max_value=20, value=10) |
|
|
|
|
|
vector_retriever.similarity_top_k = top_k |
|
bm25_retriever.similarity_top_k = top_k |
|
hybrid_retriever.similarity_top_k = top_k |
|
|
|
|
|
search_type = st.radio( |
|
"Select search method:", |
|
["Hybrid", "Vector", "BM25"], |
|
horizontal=True, |
|
help=""" |
|
- **BM25**: Keyword-based search that works best for exact matches and specific terms. Similar to traditional search engines. |
|
- **Vector**: Semantic search that understands the meaning of your query, even if it uses different words than the source text. |
|
- **Hybrid**: Combines both approaches for better overall results, balancing exact matches with semantic understanding. |
|
""" |
|
) |
|
|
|
if search_query: |
|
with st.spinner('Searching...'): |
|
if search_type == "Hybrid": |
|
nodes = hybrid_retriever.retrieve(search_query) |
|
elif search_type == "Vector": |
|
nodes = vector_retriever.retrieve(search_query) |
|
else: |
|
nodes = bm25_retriever.retrieve(search_query) |
|
|
|
|
|
st.subheader(f"Search Results") |
|
|
|
for i, node in enumerate(nodes, 1): |
|
|
|
preview = node.text[:200] + "..." if len(node.text) > 200 else node.text |
|
|
|
|
|
score = f"{node.score:.3f}" if hasattr(node, 'score') else "N/A" |
|
|
|
|
|
with st.expander(f"Result {i} (score: {score})\n\n{preview}", expanded=False): |
|
st.markdown(node.text) |
|
if node.metadata: |
|
st.markdown("---") |
|
st.markdown("**Source:**") |
|
st.json(node.metadata) |
|
|
|
|
|
with st.sidebar: |
|
st.header("About") |
|
st.markdown(""" |
|
This demo searches through Freud's complete works using: |
|
|
|
- **BM25**: Traditional keyword-based search |
|
- **Vector Search**: Semantic similarity using embeddings |
|
- **Hybrid**: Combines both approaches |
|
""") |
|
|