Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -243,7 +243,7 @@ class BaseInvertedIndexRetriever(BaseRetriever):
|
|
243 |
def index_class(self) -> Type[InvertedIndex]:
|
244 |
pass
|
245 |
|
246 |
-
def
|
247 |
self.index = self.index_class.from_saved(index_dir)
|
248 |
|
249 |
def get_term_weights(self, query: str, cid: str) -> Dict[str, float]:
|
@@ -307,38 +307,44 @@ demo: Optional[gr.Interface] = None # Assign your gradio demo to this variable
|
|
307 |
return_type = List[Hit]
|
308 |
|
309 |
## YOUR_CODE_STARTS_HERE
|
310 |
-
# Building BM25 index and save:
|
311 |
bm25_index = BM25Index.build_from_documents(
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
|
320 |
|
321 |
corpus_dict = {doc.collection_id: doc.text for doc in sciq.corpus}
|
322 |
|
323 |
-
|
|
|
|
|
324 |
results = bm25_retriever.retrieve(query)
|
325 |
hits = [
|
326 |
{
|
327 |
"cid": cid,
|
328 |
"score": score,
|
329 |
-
"text": corpus_dict[cid]
|
330 |
}
|
331 |
for cid, score in results.items()
|
332 |
]
|
333 |
return hits
|
334 |
|
335 |
-
|
336 |
-
|
|
|
|
|
|
|
337 |
demo = gr.Interface(
|
338 |
-
fn=
|
339 |
-
inputs=gr.Textbox(label="Enter your query"),
|
340 |
-
outputs=gr.Textbox(label="Results", lines=20, interactive=False),
|
341 |
-
title="BM25
|
|
|
342 |
)
|
343 |
-
|
344 |
-
|
|
|
|
|
|
243 |
def index_class(self) -> Type[InvertedIndex]:
|
244 |
pass
|
245 |
|
246 |
+
def __init__(self, index_dir: str) -> None:
|
247 |
self.index = self.index_class.from_saved(index_dir)
|
248 |
|
249 |
def get_term_weights(self, query: str, cid: str) -> Dict[str, float]:
|
|
|
307 |
return_type = List[Hit]
|
308 |
|
309 |
## YOUR_CODE_STARTS_HERE
|
|
|
310 |
bm25_index = BM25Index.build_from_documents(
|
311 |
+
documents=iter(sciq.corpus),
|
312 |
+
ndocs=len(sciq.corpus),
|
313 |
+
k1=0.9,
|
314 |
+
b=0.4
|
315 |
+
)
|
316 |
+
bm25_index.save("output/bm25_index_b") # Save index to directory
|
317 |
+
bm25_retriever = BM25Retriever(index_dir="output/bm25_index_b")
|
|
|
318 |
|
319 |
corpus_dict = {doc.collection_id: doc.text for doc in sciq.corpus}
|
320 |
|
321 |
+
# Search function for the BM25 system
|
322 |
+
def search(query: str) -> List[Hit]:
|
323 |
+
# Replace the following placeholder with actual retrieval logic
|
324 |
results = bm25_retriever.retrieve(query)
|
325 |
hits = [
|
326 |
{
|
327 |
"cid": cid,
|
328 |
"score": score,
|
329 |
+
"text": corpus_dict[cid] # Assuming sciq.corpus maps cids to document texts
|
330 |
}
|
331 |
for cid, score in results.items()
|
332 |
]
|
333 |
return hits
|
334 |
|
335 |
+
def handle_search(query):
|
336 |
+
results = search(query)
|
337 |
+
return results
|
338 |
+
|
339 |
+
|
340 |
demo = gr.Interface(
|
341 |
+
fn=handle_search, # The function to process input
|
342 |
+
inputs=gr.Textbox(label="Enter your search query"), # Input: Textbox
|
343 |
+
outputs=gr.Textbox(label="Search Results", lines=20, interactive=False), # Output: Textbox
|
344 |
+
title="BM25 Search Engine Demo on SciQ Dataset", # Title of the app
|
345 |
+
description="Enter your search query to get the results from the SciQ dataset." # Description
|
346 |
)
|
347 |
+
|
348 |
+
# Launch the app with shareable URL
|
349 |
+
demo.launch(debug=True)
|
350 |
+
|