Spaces:
Build error
Build error
Upload 17 files
#17
by
awinml
- opened
- utils/entity_extraction.py +36 -1
- utils/retriever.py +13 -6
utils/entity_extraction.py
CHANGED
|
@@ -21,6 +21,41 @@ def expand_list_of_lists(list_of_lists):
|
|
| 21 |
return expanded_list
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def all_keywords_combs(texts):
|
| 25 |
|
| 26 |
texts = [text.split(" ") for text in texts]
|
|
@@ -47,7 +82,7 @@ def extract_keywords(query_text, model):
|
|
| 47 |
prompt = f"###Instruction:Extract the important keywords which describe the context accurately.\n\nInput:{query_text}\n\n###Response:"
|
| 48 |
response = model.predict(prompt)
|
| 49 |
keywords = response.split(", ")
|
| 50 |
-
keywords =
|
| 51 |
return keywords
|
| 52 |
|
| 53 |
|
|
|
|
| 21 |
return expanded_list
|
| 22 |
|
| 23 |
|
| 24 |
+
def keywords_no_companies(texts):
|
| 25 |
+
# Company list (to remove companies from extracted entities)
|
| 26 |
+
|
| 27 |
+
company_list = [
|
| 28 |
+
"apple",
|
| 29 |
+
"amd",
|
| 30 |
+
"amazon",
|
| 31 |
+
"cisco",
|
| 32 |
+
"google",
|
| 33 |
+
"microsoft",
|
| 34 |
+
"nvidia",
|
| 35 |
+
"asml",
|
| 36 |
+
"intel",
|
| 37 |
+
"micron",
|
| 38 |
+
"aapl",
|
| 39 |
+
"csco",
|
| 40 |
+
"msft",
|
| 41 |
+
"asml",
|
| 42 |
+
"nvda",
|
| 43 |
+
"googl",
|
| 44 |
+
"mu",
|
| 45 |
+
"intc",
|
| 46 |
+
"amzn",
|
| 47 |
+
"amd",
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
texts = [text.split(" ") for text in texts]
|
| 51 |
+
texts = expand_list_of_lists(texts)
|
| 52 |
+
|
| 53 |
+
# Convert all strings to lowercase.
|
| 54 |
+
lower_texts = [text.lower() for text in texts]
|
| 55 |
+
keywords = [text for text in lower_texts if text not in company_list]
|
| 56 |
+
return keywords
|
| 57 |
+
|
| 58 |
+
|
| 59 |
def all_keywords_combs(texts):
|
| 60 |
|
| 61 |
texts = [text.split(" ") for text in texts]
|
|
|
|
| 82 |
prompt = f"###Instruction:Extract the important keywords which describe the context accurately.\n\nInput:{query_text}\n\n###Response:"
|
| 83 |
response = model.predict(prompt)
|
| 84 |
keywords = response.split(", ")
|
| 85 |
+
keywords = keywords_no_companies(keywords)
|
| 86 |
return keywords
|
| 87 |
|
| 88 |
|
utils/retriever.py
CHANGED
|
@@ -15,6 +15,9 @@ def query_pinecone_sparse(
|
|
| 15 |
else:
|
| 16 |
participant = "Question"
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
if year == "All":
|
| 19 |
if quarter == "All":
|
| 20 |
xc = index.query(
|
|
@@ -34,7 +37,7 @@ def query_pinecone_sparse(
|
|
| 34 |
"Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
|
| 35 |
"Ticker": {"$eq": ticker},
|
| 36 |
"QA_Flag": {"$eq": participant},
|
| 37 |
-
|
| 38 |
},
|
| 39 |
include_metadata=True,
|
| 40 |
)
|
|
@@ -56,7 +59,7 @@ def query_pinecone_sparse(
|
|
| 56 |
"Quarter": {"$eq": quarter},
|
| 57 |
"Ticker": {"$eq": ticker},
|
| 58 |
"QA_Flag": {"$eq": participant},
|
| 59 |
-
|
| 60 |
},
|
| 61 |
include_metadata=True,
|
| 62 |
)
|
|
@@ -71,7 +74,7 @@ def query_pinecone_sparse(
|
|
| 71 |
"Quarter": {"$eq": quarter},
|
| 72 |
"Ticker": {"$eq": ticker},
|
| 73 |
"QA_Flag": {"$eq": participant},
|
| 74 |
-
|
| 75 |
},
|
| 76 |
include_metadata=True,
|
| 77 |
)
|
|
@@ -100,6 +103,10 @@ def query_pinecone(
|
|
| 100 |
else:
|
| 101 |
participant = "Question"
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
if year == "All":
|
| 104 |
if quarter == "All":
|
| 105 |
xc = index.query(
|
|
@@ -118,7 +125,7 @@ def query_pinecone(
|
|
| 118 |
"Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
|
| 119 |
"Ticker": {"$eq": ticker},
|
| 120 |
"QA_Flag": {"$eq": participant},
|
| 121 |
-
|
| 122 |
},
|
| 123 |
include_metadata=True,
|
| 124 |
)
|
|
@@ -139,7 +146,7 @@ def query_pinecone(
|
|
| 139 |
"Quarter": {"$eq": quarter},
|
| 140 |
"Ticker": {"$eq": ticker},
|
| 141 |
"QA_Flag": {"$eq": participant},
|
| 142 |
-
|
| 143 |
},
|
| 144 |
include_metadata=True,
|
| 145 |
)
|
|
@@ -153,7 +160,7 @@ def query_pinecone(
|
|
| 153 |
"Quarter": {"$eq": quarter},
|
| 154 |
"Ticker": {"$eq": ticker},
|
| 155 |
"QA_Flag": {"$eq": participant},
|
| 156 |
-
|
| 157 |
},
|
| 158 |
include_metadata=True,
|
| 159 |
)
|
|
|
|
| 15 |
else:
|
| 16 |
participant = "Question"
|
| 17 |
|
| 18 |
+
# Create filter dictionary based on keywords
|
| 19 |
+
filter_dict = [{'Keywords': word} for word in keywords]
|
| 20 |
+
|
| 21 |
if year == "All":
|
| 22 |
if quarter == "All":
|
| 23 |
xc = index.query(
|
|
|
|
| 37 |
"Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
|
| 38 |
"Ticker": {"$eq": ticker},
|
| 39 |
"QA_Flag": {"$eq": participant},
|
| 40 |
+
'$and': filter_dict
|
| 41 |
},
|
| 42 |
include_metadata=True,
|
| 43 |
)
|
|
|
|
| 59 |
"Quarter": {"$eq": quarter},
|
| 60 |
"Ticker": {"$eq": ticker},
|
| 61 |
"QA_Flag": {"$eq": participant},
|
| 62 |
+
'$and': filter_dict
|
| 63 |
},
|
| 64 |
include_metadata=True,
|
| 65 |
)
|
|
|
|
| 74 |
"Quarter": {"$eq": quarter},
|
| 75 |
"Ticker": {"$eq": ticker},
|
| 76 |
"QA_Flag": {"$eq": participant},
|
| 77 |
+
'$and': filter_dict
|
| 78 |
},
|
| 79 |
include_metadata=True,
|
| 80 |
)
|
|
|
|
| 103 |
else:
|
| 104 |
participant = "Question"
|
| 105 |
|
| 106 |
+
# Create filter dictionary based on keywords
|
| 107 |
+
filter_dict = [{'Keywords': word} for word in keywords]
|
| 108 |
+
|
| 109 |
+
|
| 110 |
if year == "All":
|
| 111 |
if quarter == "All":
|
| 112 |
xc = index.query(
|
|
|
|
| 125 |
"Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
|
| 126 |
"Ticker": {"$eq": ticker},
|
| 127 |
"QA_Flag": {"$eq": participant},
|
| 128 |
+
'$and': filter_dict
|
| 129 |
},
|
| 130 |
include_metadata=True,
|
| 131 |
)
|
|
|
|
| 146 |
"Quarter": {"$eq": quarter},
|
| 147 |
"Ticker": {"$eq": ticker},
|
| 148 |
"QA_Flag": {"$eq": participant},
|
| 149 |
+
'$and': filter_dict
|
| 150 |
},
|
| 151 |
include_metadata=True,
|
| 152 |
)
|
|
|
|
| 160 |
"Quarter": {"$eq": quarter},
|
| 161 |
"Ticker": {"$eq": ticker},
|
| 162 |
"QA_Flag": {"$eq": participant},
|
| 163 |
+
'$and': filter_dict
|
| 164 |
},
|
| 165 |
include_metadata=True,
|
| 166 |
)
|