Spaces:
Runtime error
Runtime error
Upload 4 files
Browse filesAdding the app and related scripts
- filterminutes.py +71 -0
- prompts.py +23 -0
- public_app.py +83 -0
filterminutes.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
log = logging.getLogger('filter methods')
|
4 |
+
logging.basicConfig(level=logging.INFO)
|
5 |
+
|
6 |
+
|
7 |
+
def filter_docs_by_meta(docs, filter_dict):
|
8 |
+
"""
|
9 |
+
Filter documents by multiple parameters
|
10 |
+
Parameters:
|
11 |
+
docs : List[langchain.schema.Document]
|
12 |
+
filter_dict : Dict[str, Any]
|
13 |
+
|
14 |
+
Returns: List of filtered documents
|
15 |
+
|
16 |
+
Examples:
|
17 |
+
docs = [langchain.schema.Document(metadata={'a': 1, 'b': 2}, text='text1')
|
18 |
+
langchain.schema.Document(metadata={'a': 1, 'b': 3}, text='text2')]
|
19 |
+
filter_dict = {'a': 1}
|
20 |
+
filter_docs_by_meta(docs, filter_dict)
|
21 |
+
[langchain.schema.Document(metadata={'a': 1, 'b': 2}, text='text1')]
|
22 |
+
|
23 |
+
docs = [langchain.schema.Document(metadata={'a': 1, 'b': 2}, text='text1')
|
24 |
+
langchain.schema.Document(metadata={'a': 1, 'b': 3}, text='text2')]
|
25 |
+
filter_dict = {'a': 1, 'b': 2}
|
26 |
+
filter_docs_by_meta(docs, filter_dict)
|
27 |
+
[langchain.schema.Document(metadata={'a': 1, 'b': 2}, text='text1')]
|
28 |
+
|
29 |
+
"""
|
30 |
+
filtered_docs = []
|
31 |
+
for doc in docs:
|
32 |
+
append = True
|
33 |
+
for key, value in filter_dict.items():
|
34 |
+
if doc.metadata[key] != value:
|
35 |
+
append = False
|
36 |
+
break
|
37 |
+
if append:
|
38 |
+
filtered_docs.append(doc)
|
39 |
+
return filtered_docs
|
40 |
+
|
41 |
+
|
42 |
+
def search_with_filter(vector_store, query, filter_dict, target_k=5, init_k=100, step=50):
|
43 |
+
"""
|
44 |
+
Expand search with filter until reaching at least a pre-determined number of documents.
|
45 |
+
----------
|
46 |
+
Parameters
|
47 |
+
vector_store : langchain.vectorstores.FAISS
|
48 |
+
The FAISS vector store.
|
49 |
+
query : str
|
50 |
+
The query to search for.
|
51 |
+
filter_dict : Dict[str, Any]
|
52 |
+
The parameters to filer for
|
53 |
+
target_k : int
|
54 |
+
The minimum number of documents desired after filtering
|
55 |
+
init_k : int
|
56 |
+
The top-k documents to extract for the initial search.
|
57 |
+
step : int
|
58 |
+
The size of the step when enlarging the search.
|
59 |
+
|
60 |
+
Returns: List of at least target_k Documents for post-processing
|
61 |
+
|
62 |
+
"""
|
63 |
+
context = filter_docs_by_meta(vector_store.similarity_search(query, k=init_k), filter_dict)
|
64 |
+
while len(context) < target_k:
|
65 |
+
log.info(f'Context contains {len(context)} documents')
|
66 |
+
log.info(f'Expanding search with k={init_k}')
|
67 |
+
init_k += step
|
68 |
+
context = filter_docs_by_meta(vector_store.similarity_search(query, k=init_k), filter_dict)
|
69 |
+
log.info(f'Done. Context contains {len(context)} Documents matching the filtering criteria')
|
70 |
+
return context
|
71 |
+
|
prompts.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# ---Define the two prompts---
|
3 |
+
PROMPT_EXTRACT_DATE = """Extract the date elements from the question at the end of the two examples in numeric format.
|
4 |
+
If there is no date elements found output False. If there is mention of both year and month, use the datatime string format %Y-%m.
|
5 |
+
|
6 |
+
Example 1)The meeting took place in October 2022 ->> 2022-10. Then put them in a dictionary like so:
|
7 |
+
Example a) The meeting took place in October 2022 ->> (year: 2022, month: 10)
|
8 |
+
Example b) During 1 November 1968 ->> (year: 2022, month: 10, day:1). Use json format. You are allowed to use the keys 'year', 'month', 'day', and 'page'.
|
9 |
+
|
10 |
+
{question}
|
11 |
+
"""
|
12 |
+
|
13 |
+
PROMPT_FED_ANALYST = """You are a research analyst at a federal reserve bank and you are trying to answer questions
|
14 |
+
or provide answers to queries about meetings of the Federal Open Market Committee. Use the following pieces of
|
15 |
+
context to answer the question at the end, giving special attention to economic, cultural, financial, or political
|
16 |
+
developments. If you don't have all the elements to answer the query, say it explicitly. Finally, if you are not
|
17 |
+
provided with date elements, warn the user that the output is likely to be wrong due to the time sensitivity of questions
|
18 |
+
related to economic matters.
|
19 |
+
|
20 |
+
{context}
|
21 |
+
|
22 |
+
Question: {question}
|
23 |
+
"""
|
public_app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
from langchain import PromptTemplate, LLMChain
|
4 |
+
from langchain.chains.question_answering import load_qa_chain
|
5 |
+
from langchain.vectorstores import FAISS
|
6 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
7 |
+
from langchain.chat_models import ChatOpenAI
|
8 |
+
import gradio as gr
|
9 |
+
import json
|
10 |
+
|
11 |
+
from prompts import PROMPT_EXTRACT_DATE, PROMPT_FED_ANALYST
|
12 |
+
from filterminutes import search_with_filter
|
13 |
+
|
14 |
+
# --------------------------Load the sentence transformer and the vector store--------------------------#
|
15 |
+
model_name = 'sentence-transformers/all-mpnet-base-v2'
|
16 |
+
model_kwargs = {'device': 'cpu'}
|
17 |
+
encode_kwargs = {'normalize_embeddings': False}
|
18 |
+
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
|
19 |
+
vs = FAISS.load_local("MINUTES_FOMC_HISTORY", embeddings)
|
20 |
+
|
21 |
+
# --------------------------Import the prompts------------------#
|
22 |
+
PROMPT_DATE = PromptTemplate.from_template(PROMPT_EXTRACT_DATE)
|
23 |
+
PROMPT_ANALYST = PromptTemplate.from_template(PROMPT_FED_ANALYST)
|
24 |
+
|
25 |
+
|
26 |
+
# --------------------------define the qa chain for answering queries--------------------------#
|
27 |
+
def load_chains(open_ai_key):
|
28 |
+
date_extractor = LLMChain(llm=ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo', openai_api_key=open_ai_key),
|
29 |
+
prompt=PROMPT_DATE)
|
30 |
+
fed_chain = load_qa_chain(llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0, openai_api_key=open_ai_key),
|
31 |
+
chain_type='stuff', prompt=PROMPT_ANALYST)
|
32 |
+
return date_extractor, fed_chain
|
33 |
+
|
34 |
+
|
35 |
+
def get_chain(query, api_key):
|
36 |
+
"""
|
37 |
+
Detects the date, computes similarity, and answers the query using
|
38 |
+
only documents corresponding to the date requested.
|
39 |
+
The query is first passed to the date extractor to extract the date
|
40 |
+
and then to the qa chain to answer the query.
|
41 |
+
Parameters
|
42 |
+
----------
|
43 |
+
query : str
|
44 |
+
Query to be answered.
|
45 |
+
api_key : str
|
46 |
+
OpenAI API key.
|
47 |
+
|
48 |
+
Returns
|
49 |
+
Answer to the query.
|
50 |
+
"""
|
51 |
+
date_extractor, fed_chain = load_chains(api_key)
|
52 |
+
logging.info('Extracting the date in numeric format..')
|
53 |
+
date_response = date_extractor.run(query)
|
54 |
+
if date_response != 'False':
|
55 |
+
filter_date = json.loads(date_response)
|
56 |
+
|
57 |
+
logging.info(f'Date parameters retrieved: {filter_date}')
|
58 |
+
logging.info('Running the qa with filtered context..')
|
59 |
+
filtered_context = search_with_filter(vs, query, init_k=200, step=300, target_k=7, filter_dict=filter_date)
|
60 |
+
|
61 |
+
logging.info(20 * '-' + 'Metadata for the documents to be used' + 20 * '-')
|
62 |
+
for doc in filtered_context:
|
63 |
+
logging.info(doc.metadata)
|
64 |
+
else:
|
65 |
+
logging.info('No date elements found. Running the qa without filtering can output incorrect results.')
|
66 |
+
filtered_context = vs.similarity_search(query, k=7)
|
67 |
+
return fed_chain({'input_documents': filtered_context[:7], 'question': query})['output_text']
|
68 |
+
|
69 |
+
|
70 |
+
if __name__ == '__main__':
|
71 |
+
app = gr.Interface(fn=get_chain,
|
72 |
+
inputs=[gr.Textbox(lines=2, placeholder="Enter your query", label='Your query'),
|
73 |
+
gr.Textbox(lines=1, placeholder="Your OpenAI API key here", label='OpenAI Key')],
|
74 |
+
description='Query the public database in FRED from 1936-2023',
|
75 |
+
outputs=gr.Textbox(lines=1, label='Answer'),
|
76 |
+
title='Chat with the FOMC meeting minutes',
|
77 |
+
examples=[['What was the economic outlook from the staff presented in the meeting '
|
78 |
+
'of April 2009 with respect to labour market developments and industrial production?'],
|
79 |
+
['Who were the voting members present in the meeting on March 2010?'],
|
80 |
+
['How important was the pandemic of Covid-19 in the discussions during 2020?'],
|
81 |
+
['What was the impact of the oil crisis for the economic outlook during 1973?']],
|
82 |
+
)
|
83 |
+
app.launch()
|