import streamlit as st from pyserini.search.lucene import LuceneSearcher import json st.set_page_config(page_title="Pyserini x Datasets", page_icon='🌸', layout="centered") searcher = LuceneSearcher('index') cola, colb, colc = st.columns([5,4,5]) with colb: st.image("logo.jpeg") col1, col2 = st.columns([9, 1]) with col1: search_query = st.text_input(label="", placeholder="Search") with col2: st.write('#') button_clicked = st.button("🔎") if search_query or button_clicked: num_results = None #search_results = searcher.search(myquery, limit=num_results) search_results = searcher.search(search_query, k=10) #st.write(dir(search_results[0])) #st.write(f"
Retrieved {len(search_results):,.0f} documents in {search_results.runtime*1000:.2f} ms
", unsafe_allow_html=True) for result in search_results[:10]: #keywords = ', '.join(result.key_terms('text')) #meta = result['meta'] #st.write(f"Document Keywords: {keywords}", unsafe_allow_html=True) result = json.loads(result.raw) doc = result["contents"] result_id = result["id"] st.write(f"Document ID: {result_id}", unsafe_allow_html=True) try: st.write( doc[:100], unsafe_allow_html=True ) except: pass #with st.expander("Document Text", expanded=False): #st.write(result['text'][:1600]) st.write('---')