asimmetti commited on
Commit
f373cd9
·
verified ·
1 Parent(s): 4f56247

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -243,7 +243,7 @@ class BaseInvertedIndexRetriever(BaseRetriever):
243
  def index_class(self) -> Type[InvertedIndex]:
244
  pass
245
 
246
- def _init_(self, index_dir: str) -> None:
247
  self.index = self.index_class.from_saved(index_dir)
248
 
249
  def get_term_weights(self, query: str, cid: str) -> Dict[str, float]:
@@ -307,38 +307,44 @@ demo: Optional[gr.Interface] = None # Assign your gradio demo to this variable
307
  return_type = List[Hit]
308
 
309
  ## YOUR_CODE_STARTS_HERE
310
- # Building BM25 index and save:
311
  bm25_index = BM25Index.build_from_documents(
312
- documents=iter(sciq.corpus),
313
- ndocs=12160,
314
- show_progress_bar=True
315
- )
316
- bm25_index.save("output/bm25_index")
317
-
318
- # Loading index and use BM25 retriever to retrieve:
319
- bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
320
 
321
  corpus_dict = {doc.collection_id: doc.text for doc in sciq.corpus}
322
 
323
- def get_query(query):
 
 
324
  results = bm25_retriever.retrieve(query)
325
  hits = [
326
  {
327
  "cid": cid,
328
  "score": score,
329
- "text": corpus_dict[cid]
330
  }
331
  for cid, score in results.items()
332
  ]
333
  return hits
334
 
335
-
336
-
 
 
 
337
  demo = gr.Interface(
338
- fn=get_query,
339
- inputs=gr.Textbox(label="Enter your query"),
340
- outputs=gr.Textbox(label="Results", lines=20, interactive=False),
341
- title="BM25 Query Engine"
 
342
  )
343
- ## YOUR_CODE_ENDS_HERE
344
- demo.launch()
 
 
 
243
  def index_class(self) -> Type[InvertedIndex]:
244
  pass
245
 
246
+ def __init__(self, index_dir: str) -> None:
247
  self.index = self.index_class.from_saved(index_dir)
248
 
249
  def get_term_weights(self, query: str, cid: str) -> Dict[str, float]:
 
307
  return_type = List[Hit]
308
 
309
  ## YOUR_CODE_STARTS_HERE
 
310
  bm25_index = BM25Index.build_from_documents(
311
+ documents=iter(sciq.corpus),
312
+ ndocs=len(sciq.corpus),
313
+ k1=0.9,
314
+ b=0.4
315
+ )
316
+ bm25_index.save("output/bm25_index_b") # Save index to directory
317
+ bm25_retriever = BM25Retriever(index_dir="output/bm25_index_b")
 
318
 
319
  corpus_dict = {doc.collection_id: doc.text for doc in sciq.corpus}
320
 
321
+ # Search function for the BM25 system
322
+ def search(query: str) -> List[Hit]:
323
+ # Replace the following placeholder with actual retrieval logic
324
  results = bm25_retriever.retrieve(query)
325
  hits = [
326
  {
327
  "cid": cid,
328
  "score": score,
329
+ "text": corpus_dict[cid] # Assuming sciq.corpus maps cids to document texts
330
  }
331
  for cid, score in results.items()
332
  ]
333
  return hits
334
 
335
+ def handle_search(query):
336
+ results = search(query)
337
+ return results
338
+
339
+
340
  demo = gr.Interface(
341
+ fn=handle_search, # The function to process input
342
+ inputs=gr.Textbox(label="Enter your search query"), # Input: Textbox
343
+ outputs=gr.Textbox(label="Search Results", lines=20, interactive=False), # Output: Textbox
344
+ title="BM25 Search Engine Demo on SciQ Dataset", # Title of the app
345
+ description="Enter your search query to get the results from the SciQ dataset." # Description
346
  )
347
+
348
+ # Launch the app with shareable URL
349
+ demo.launch(debug=True)
350
+