Spaces:

awinml
/

2-qa-earnings-sentencewise

Build error

App Files Files Community

Upload 17 files

#17

by awinml - opened May 29, 2023

base: refs/heads/main

←

from: refs/pr/17

Discussion Files changed

+49

-7

Files changed (2) hide show

utils/entity_extraction.py +36 -1
utils/retriever.py +13 -6

utils/entity_extraction.py CHANGED Viewed

@@ -21,6 +21,41 @@ def expand_list_of_lists(list_of_lists):
     return expanded_list
 def all_keywords_combs(texts):
     texts = [text.split(" ") for text in texts]
@@ -47,7 +82,7 @@ def extract_keywords(query_text, model):
     prompt = f"###Instruction:Extract the important keywords which describe the context accurately.\n\nInput:{query_text}\n\n###Response:"
     response = model.predict(prompt)
     keywords = response.split(", ")
-    keywords = all_keywords_combs(keywords)
     return keywords

     return expanded_list
+def keywords_no_companies(texts):
+    # Company list (to remove companies from extracted entities)
+    company_list = [
+        "apple",
+        "amd",
+        "amazon",
+        "cisco",
+        "google",
+        "microsoft",
+        "nvidia",
+        "asml",
+        "intel",
+        "micron",
+        "aapl",
+        "csco",
+        "msft",
+        "asml",
+        "nvda",
+        "googl",
+        "mu",
+        "intc",
+        "amzn",
+        "amd",
+    ]
+    texts = [text.split(" ") for text in texts]
+    texts = expand_list_of_lists(texts)
+    # Convert all strings to lowercase.
+    lower_texts = [text.lower() for text in texts]
+    keywords = [text for text in lower_texts if text not in company_list]
+    return keywords
 def all_keywords_combs(texts):
     texts = [text.split(" ") for text in texts]
     prompt = f"###Instruction:Extract the important keywords which describe the context accurately.\n\nInput:{query_text}\n\n###Response:"
     response = model.predict(prompt)
     keywords = response.split(", ")
+    keywords = keywords_no_companies(keywords)
     return keywords

utils/retriever.py CHANGED Viewed

@@ -15,6 +15,9 @@ def query_pinecone_sparse(
     else:
         participant = "Question"
     if year == "All":
         if quarter == "All":
             xc = index.query(
@@ -34,7 +37,7 @@ def query_pinecone_sparse(
                     "Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
                     "Ticker": {"$eq": ticker},
                     "QA_Flag": {"$eq": participant},
-                    "Keywords": {"$in": keywords}
                 },
                 include_metadata=True,
             )
@@ -56,7 +59,7 @@ def query_pinecone_sparse(
                     "Quarter": {"$eq": quarter},
                     "Ticker": {"$eq": ticker},
                     "QA_Flag": {"$eq": participant},
-                    "Keywords": {"$in": keywords}
                 },
                 include_metadata=True,
             )
@@ -71,7 +74,7 @@ def query_pinecone_sparse(
                 "Quarter": {"$eq": quarter},
                 "Ticker": {"$eq": ticker},
                 "QA_Flag": {"$eq": participant},
-                "Keywords": {"$in": keywords}
             },
             include_metadata=True,
         )
@@ -100,6 +103,10 @@ def query_pinecone(
     else:
         participant = "Question"
     if year == "All":
         if quarter == "All":
             xc = index.query(
@@ -118,7 +125,7 @@ def query_pinecone(
                     "Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
                     "Ticker": {"$eq": ticker},
                     "QA_Flag": {"$eq": participant},
-                    "Keywords": {"$in": keywords}
                 },
                 include_metadata=True,
             )
@@ -139,7 +146,7 @@ def query_pinecone(
                     "Quarter": {"$eq": quarter},
                     "Ticker": {"$eq": ticker},
                     "QA_Flag": {"$eq": participant},
-                    "Keywords": {"$in": keywords}
                 },
                 include_metadata=True,
             )
@@ -153,7 +160,7 @@ def query_pinecone(
                 "Quarter": {"$eq": quarter},
                 "Ticker": {"$eq": ticker},
                 "QA_Flag": {"$eq": participant},
-                "Keywords": {"$in": keywords}
             },
             include_metadata=True,
         )

     else:
         participant = "Question"
+    # Create filter dictionary based on keywords
+    filter_dict = [{'Keywords': word} for word in keywords]
     if year == "All":
         if quarter == "All":
             xc = index.query(
                     "Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
                     "Ticker": {"$eq": ticker},
                     "QA_Flag": {"$eq": participant},
+                    '$and': filter_dict
                 },
                 include_metadata=True,
             )
                     "Quarter": {"$eq": quarter},
                     "Ticker": {"$eq": ticker},
                     "QA_Flag": {"$eq": participant},
+                    '$and': filter_dict
                 },
                 include_metadata=True,
             )
                 "Quarter": {"$eq": quarter},
                 "Ticker": {"$eq": ticker},
                 "QA_Flag": {"$eq": participant},
+                '$and': filter_dict
             },
             include_metadata=True,
         )
     else:
         participant = "Question"
+    # Create filter dictionary based on keywords
+    filter_dict = [{'Keywords': word} for word in keywords]
     if year == "All":
         if quarter == "All":
             xc = index.query(
                     "Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
                     "Ticker": {"$eq": ticker},
                     "QA_Flag": {"$eq": participant},
+                    '$and': filter_dict
                 },
                 include_metadata=True,
             )
                     "Quarter": {"$eq": quarter},
                     "Ticker": {"$eq": ticker},
                     "QA_Flag": {"$eq": participant},
+                    '$and': filter_dict
                 },
                 include_metadata=True,
             )
                 "Quarter": {"$eq": quarter},
                 "Ticker": {"$eq": ticker},
                 "QA_Flag": {"$eq": participant},
+                '$and': filter_dict
             },
             include_metadata=True,
         )