Spaces:

mrneuralnet
/

mle-case-study

Sleeping

App Files Files Community

manfredmichael commited on May 17, 2024

Commit

beb7154

1 Parent(s): 3676673

Fix page always restarting

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +39 -39
calculate_mmr.py +34 -0
retrieval_pipeline/cache.py +42 -44

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ **/__pycache__/

app.py CHANGED Viewed

@@ -5,12 +5,13 @@ import os, time
 import uuid
 from retrieval_pipeline import get_retriever, get_compression_retriever
 import benchmark
-def get_result(query, compression_retriever):
     t0 = time.time()
-    retrieved_chunks = compression_retriever.get_relevant_documents(query)
     latency = time.time() - t0
     return retrieved_chunks, latency
@@ -19,62 +20,61 @@ st.set_page_config(
     page_title="Retrieval Demo"
 )
-def setup():
     load_dotenv()
     ELASTICSEARCH_URL = os.getenv('ELASTICSEARCH_URL')
     retriever = get_retriever(index='masa.ai', elasticsearch_url=ELASTICSEARCH_URL)
     compression_retriever = get_compression_retriever(retriever)
-    return compression_retriever
 def main():
     st.title("Part 3: Search")
-    # st.sidebar.write("According to the Model Size 👇")
-    # menu = ["Nano", "Small", "Medium", "Large"]
-    # choice = st.sidebar.selectbox("Choose", menu)
     st.sidebar.info("""
-**Model Options:**
-- **Nano**: ~4MB, blazing fast model with competitive performance (ranking precision).
-- **Small**: ~34MB, slightly slower with the best performance (ranking precision).
-- **Medium**: ~110MB, slower model with the best zero-shot performance (ranking precision).
-- **Large**: ~150MB, slower model with competitive performance (ranking precision) for 100+ languages.
 """)
     with st.spinner('Setting up...'):
-        compression_retriever = setup()
-    with st.expander("Tech Stack Used"):
-        st.markdown("""
-        **Flash Rank**: Ultra-lite & Super-fast Python library for search & retrieval re-ranking.
-        - **Ultra-lite**: No heavy dependencies. Runs on CPU with a tiny ~4MB reranking model.
-        - **Super-fast**: Speed depends on the number of tokens in passages and query, plus model depth.
-        - **Cost-efficient**: Ideal for serverless deployments with low memory and time requirements.
-        - **Based on State-of-the-Art Cross-encoders**: Includes models like ms-marco-TinyBERT-L-2-v2 (default), ms-marco-MiniLM-L-12-v2, rank-T5-flan, and ms-marco-MultiBERT-L-12.
-        - **Sleek Models for Efficiency**: Designed for minimal overhead in user-facing scenarios.
-        _Flash Rank is tailored for scenarios requiring efficient and effective reranking, balancing performance with resource usage._
-        """)
-    with st.form(key='input_form'):
-        query_input = st.text_area("Query Input")
-        # context_input = st.text_area("Context Input")
-        submit_button = st.form_submit_button(label='Retrieve')
-    if submit_button:
-        st.session_state.submitted = True
-    if 'submitted' in st.session_state:
-        with st.spinner('Processing...'):
-            result, latency = get_result(query_input, compression_retriever)
-            st.subheader("Please find the retrieved documents below 👇")
-            st.write("latency:", latency, " ms")
-            st.json(result)
 if __name__ == "__main__":
     main()

 import uuid
 from retrieval_pipeline import get_retriever, get_compression_retriever
+from retrieval_pipeline.cache import SemanticCache
 import benchmark
+def get_result(query, retriever, use_cache):
     t0 = time.time()
+    retrieved_chunks = retriever.get_relevant_documents(query, use_cache=use_cache)
     latency = time.time() - t0
     return retrieved_chunks, latency
     page_title="Retrieval Demo"
 )
+@st.cache_resource
+def setup_retriever():
     load_dotenv()
     ELASTICSEARCH_URL = os.getenv('ELASTICSEARCH_URL')
     retriever = get_retriever(index='masa.ai', elasticsearch_url=ELASTICSEARCH_URL)
     compression_retriever = get_compression_retriever(retriever)
+    semantic_cache_retriever = SemanticCache(compression_retriever)
+    return semantic_cache_retriever
+def retrieval_page(retriever, use_cache):
+    with st.form(key='input_form'):
+        query_input = st.text_area("Query Input")
+        submit_button = st.form_submit_button(label='Retrieve')
+    if submit_button:
+        with st.spinner('Processing...'):
+            result, latency = get_result(query_input, retriever=retriever, use_cache=use_cache)
+            st.subheader("Please find the retrieved documents below 👇")
+            st.write("latency:", latency, " s")
+            st.json(result)
 def main():
     st.title("Part 3: Search")
+    use_cache = st.sidebar.toggle("Use cache", value=True)
     st.sidebar.info("""
+**Retrieval Pipeline Evaluation Result:**
+- **MRR**: 0.756
+- **Avg. Latency**: 4.50s (on CPU, with cache turned off)
+- **Benchmark Result**: https://docs.google.com/spreadsheets/d/1WJnb8BieoxLch0gvb53ZzMS70r_G35PKm731ubdeNCA/edit?usp=sharing
 """)
     with st.spinner('Setting up...'):
+        retriever = setup_retriever()
+    retrieval_page(retriever, use_cache)
+    # with st.expander("Tech Stack Used"):
+    #     st.markdown("""
+    #     **Flash Rank**: Ultra-lite & Super-fast Python library for search & retrieval re-ranking.
+    #     - **Ultra-lite**: No heavy dependencies. Runs on CPU with a tiny ~4MB reranking model.
+    #     - **Super-fast**: Speed depends on the number of tokens in passages and query, plus model depth.
+    #     - **Cost-efficient**: Ideal for serverless deployments with low memory and time requirements.
+    #     - **Based on State-of-the-Art Cross-encoders**: Includes models like ms-marco-TinyBERT-L-2-v2 (default), ms-marco-MiniLM-L-12-v2, rank-T5-flan, and ms-marco-MultiBERT-L-12.
+    #     - **Sleek Models for Efficiency**: Designed for minimal overhead in user-facing scenarios.
+    #     _Flash Rank is tailored for scenarios requiring efficient and effective reranking, balancing performance with resource usage._
+    #     """)
 if __name__ == "__main__":
     main()

calculate_mmr.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import argparse
+import pandas as pd
+def find_reciprocal_rank(target, row, u, k):
+    for i in range(k):
+        q = row['q{}'.format(i+1)]
+        if target == q:
+            print(1/(i+1))
+            return 1/(i+1)
+    return 0
+def main(filename, k):
+    df = pd.read_csv(filename)
+    u = len(df)
+    sum_ = 0
+    for _, row in df.iterrows():
+        target = row['body']
+        reciprocal_rank = find_reciprocal_rank(target, row, u, k)
+        sum_ += reciprocal_rank
+    mrr = sum_ / u
+    print('U:', u)
+    print('MRR: ', mrr)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filename')
+    parser.add_argument('-k', type=int)
+    args = parser.parse_args()
+    main(filename=args.filename, k=args.k)

retrieval_pipeline/cache.py CHANGED Viewed

@@ -46,49 +46,47 @@ class SemanticCache:
         results = self.retriever.get_relevant_documents(query_text)
         return results
-    def get_relevant_documents(self, query: str) -> str:
         # Method to retrieve an answer from the cache or generate a new one
         start_time = time.time()
-    # try:
-        # First we obtain the embeddings corresponding to the user query
-        embedding = self.encoder.encode([query])
-        # Search for the nearest neighbor in the index
-        self.index.nprobe = 8
-        D, I = self.index.search(embedding, 1)
-        if D[0] >= 0:
-            if I[0][0] >= 0 and D[0][0] <= self.euclidean_threshold:
-                row_id = int(I[0][0])
-                print("Answer recovered from Cache. ")
-                print(f"{D[0][0]:.3f} smaller than {self.euclidean_threshold}")
-                print(f"Found cache in row: {row_id} with score {D[0][0]:.3f}")
-                end_time = time.time()
-                elapsed_time = end_time - start_time
-                print(f"Time taken: {elapsed_time:.3f} seconds")
-                return [Document(**doc[k]) for doc in self.cache["answers"][row_id]]
-        # Handle the case when there are not enough results
-        # or Euclidean distance is not met, asking to chromaDB.
-        answer = self.query_database(query)
-        # response_text = answer["documents"][0][0]
-        self.cache["query"].append(query)
-        self.cache["embeddings"].append(embedding[0].tolist())
-        self.cache["answers"].append([doc.__dict__ for doc in answer])
-        # self.cache["response_text"].append(response_text)
-        print("Answer recovered from ChromaDB. ")
-        # print(f"response_text: {response_text}")
-        self.index.add(embedding)
-        store_cache(self.json_file, self.cache)
-        end_time = time.time()
-        elapsed_time = end_time - start_time
-        print(f"Time taken: {elapsed_time:.3f} seconds")
-        return answer
-        # except Exception as e:
-        #     raise RuntimeError(f"Error during 'get_relevant_documents' method: {e}")

         results = self.retriever.get_relevant_documents(query_text)
         return results
+    def get_relevant_documents(self, query: str, use_cache=True) -> str:
         # Method to retrieve an answer from the cache or generate a new one
         start_time = time.time()
+        try:
+            # First we obtain the embeddings corresponding to the user query
+            embedding = self.encoder.encode([query])
+            # Search for the nearest neighbor in the index
+            self.index.nprobe = 8
+            D, I = self.index.search(embedding, 1)
+            if use_cache:
+                if D[0] >= 0:
+                    if I[0][0] >= 0 and D[0][0] <= self.euclidean_threshold:
+                        row_id = int(I[0][0])
+                        print("Answer recovered from Cache. ")
+                        print(f"{D[0][0]:.3f} smaller than {self.euclidean_threshold}")
+                        print(f"Found cache in row: {row_id} with score {D[0][0]:.3f}")
+                        end_time = time.time()
+                        elapsed_time = end_time - start_time
+                        print(f"Time taken: {elapsed_time:.3f} seconds")
+                        return [Document(**doc) for doc in self.cache["answers"][row_id]]
+            # Handle the case when there are not enough results
+            # or Euclidean distance is not met, asking to chromaDB.
+            answer = self.query_database(query)
+            # response_text = answer["documents"][0][0]
+            self.cache["query"].append(query)
+            self.cache["embeddings"].append(embedding[0].tolist())
+            self.cache["answers"].append([doc.__dict__ for doc in answer])
+            self.index.add(embedding)
+            store_cache(self.json_file, self.cache)
+            end_time = time.time()
+            elapsed_time = end_time - start_time
+            print(f"Time taken: {elapsed_time:.3f} seconds")
+            return answer
+        except Exception as e:
+            raise RuntimeError(f"Error during 'get_relevant_documents' method: {e}")