Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -243,7 +243,7 @@ class BaseInvertedIndexRetriever(BaseRetriever):
|
|
| 243 |
def index_class(self) -> Type[InvertedIndex]:
|
| 244 |
pass
|
| 245 |
|
| 246 |
-
def
|
| 247 |
self.index = self.index_class.from_saved(index_dir)
|
| 248 |
|
| 249 |
def get_term_weights(self, query: str, cid: str) -> Dict[str, float]:
|
|
@@ -307,38 +307,44 @@ demo: Optional[gr.Interface] = None # Assign your gradio demo to this variable
|
|
| 307 |
return_type = List[Hit]
|
| 308 |
|
| 309 |
## YOUR_CODE_STARTS_HERE
|
| 310 |
-
# Building BM25 index and save:
|
| 311 |
bm25_index = BM25Index.build_from_documents(
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
|
| 320 |
|
| 321 |
corpus_dict = {doc.collection_id: doc.text for doc in sciq.corpus}
|
| 322 |
|
| 323 |
-
|
|
|
|
|
|
|
| 324 |
results = bm25_retriever.retrieve(query)
|
| 325 |
hits = [
|
| 326 |
{
|
| 327 |
"cid": cid,
|
| 328 |
"score": score,
|
| 329 |
-
"text": corpus_dict[cid]
|
| 330 |
}
|
| 331 |
for cid, score in results.items()
|
| 332 |
]
|
| 333 |
return hits
|
| 334 |
|
| 335 |
-
|
| 336 |
-
|
|
|
|
|
|
|
|
|
|
| 337 |
demo = gr.Interface(
|
| 338 |
-
fn=
|
| 339 |
-
inputs=gr.Textbox(label="Enter your query"),
|
| 340 |
-
outputs=gr.Textbox(label="Results", lines=20, interactive=False),
|
| 341 |
-
title="BM25
|
|
|
|
| 342 |
)
|
| 343 |
-
|
| 344 |
-
|
|
|
|
|
|
|
|
|
| 243 |
def index_class(self) -> Type[InvertedIndex]:
|
| 244 |
pass
|
| 245 |
|
| 246 |
+
def __init__(self, index_dir: str) -> None:
|
| 247 |
self.index = self.index_class.from_saved(index_dir)
|
| 248 |
|
| 249 |
def get_term_weights(self, query: str, cid: str) -> Dict[str, float]:
|
|
|
|
| 307 |
return_type = List[Hit]
|
| 308 |
|
| 309 |
## YOUR_CODE_STARTS_HERE
|
|
|
|
| 310 |
bm25_index = BM25Index.build_from_documents(
|
| 311 |
+
documents=iter(sciq.corpus),
|
| 312 |
+
ndocs=len(sciq.corpus),
|
| 313 |
+
k1=0.9,
|
| 314 |
+
b=0.4
|
| 315 |
+
)
|
| 316 |
+
bm25_index.save("output/bm25_index_b") # Save index to directory
|
| 317 |
+
bm25_retriever = BM25Retriever(index_dir="output/bm25_index_b")
|
|
|
|
| 318 |
|
| 319 |
corpus_dict = {doc.collection_id: doc.text for doc in sciq.corpus}
|
| 320 |
|
| 321 |
+
# Search function for the BM25 system
|
| 322 |
+
def search(query: str) -> List[Hit]:
|
| 323 |
+
# Replace the following placeholder with actual retrieval logic
|
| 324 |
results = bm25_retriever.retrieve(query)
|
| 325 |
hits = [
|
| 326 |
{
|
| 327 |
"cid": cid,
|
| 328 |
"score": score,
|
| 329 |
+
"text": corpus_dict[cid] # Assuming sciq.corpus maps cids to document texts
|
| 330 |
}
|
| 331 |
for cid, score in results.items()
|
| 332 |
]
|
| 333 |
return hits
|
| 334 |
|
| 335 |
+
def handle_search(query):
|
| 336 |
+
results = search(query)
|
| 337 |
+
return results
|
| 338 |
+
|
| 339 |
+
|
| 340 |
demo = gr.Interface(
|
| 341 |
+
fn=handle_search, # The function to process input
|
| 342 |
+
inputs=gr.Textbox(label="Enter your search query"), # Input: Textbox
|
| 343 |
+
outputs=gr.Textbox(label="Search Results", lines=20, interactive=False), # Output: Textbox
|
| 344 |
+
title="BM25 Search Engine Demo on SciQ Dataset", # Title of the app
|
| 345 |
+
description="Enter your search query to get the results from the SciQ dataset." # Description
|
| 346 |
)
|
| 347 |
+
|
| 348 |
+
# Launch the app with shareable URL
|
| 349 |
+
demo.launch(debug=True)
|
| 350 |
+
|