Spaces:

ruisp
/

MonPol

Runtime error

App Files Files

ruisp commited on Jul 1, 2023

Commit

c657ec0

1 Parent(s): 3d38978

Upload 4 files

Browse files

Adding the app and related scripts

Files changed (3) hide show

filterminutes.py +71 -0
prompts.py +23 -0
public_app.py +83 -0

filterminutes.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import logging
+log = logging.getLogger('filter methods')
+logging.basicConfig(level=logging.INFO)
+def filter_docs_by_meta(docs, filter_dict):
+    """
+    Filter documents by multiple parameters
+    Parameters:
+        docs : List[langchain.schema.Document]
+        filter_dict :  Dict[str, Any]
+    Returns: List of filtered documents
+    Examples:
+        docs = [langchain.schema.Document(metadata={'a': 1, 'b': 2}, text='text1')
+                langchain.schema.Document(metadata={'a': 1, 'b': 3}, text='text2')]
+        filter_dict = {'a': 1}
+        filter_docs_by_meta(docs, filter_dict)
+        [langchain.schema.Document(metadata={'a': 1, 'b': 2}, text='text1')]
+        docs = [langchain.schema.Document(metadata={'a': 1, 'b': 2}, text='text1')
+                langchain.schema.Document(metadata={'a': 1, 'b': 3}, text='text2')]
+        filter_dict = {'a': 1, 'b': 2}
+        filter_docs_by_meta(docs, filter_dict)
+        [langchain.schema.Document(metadata={'a': 1, 'b': 2}, text='text1')]
+    """
+    filtered_docs = []
+    for doc in docs:
+        append = True
+        for key, value in filter_dict.items():
+            if doc.metadata[key] != value:
+                append = False
+                break
+        if append:
+            filtered_docs.append(doc)
+    return filtered_docs
+def search_with_filter(vector_store, query, filter_dict, target_k=5, init_k=100, step=50):
+    """
+    Expand search with filter until reaching at least a pre-determined number of documents.
+    ----------
+    Parameters
+        vector_store : langchain.vectorstores.FAISS
+            The FAISS vector store.
+        query : str
+            The query to search for.
+        filter_dict :  Dict[str, Any]
+            The parameters to filer for
+        target_k : int
+            The minimum number of documents desired after filtering
+        init_k : int
+            The top-k documents to extract for the initial search.
+        step : int
+            The size of the step when enlarging the search.
+    Returns: List of at least target_k Documents for post-processing
+    """
+    context = filter_docs_by_meta(vector_store.similarity_search(query, k=init_k), filter_dict)
+    while len(context) < target_k:
+        log.info(f'Context contains {len(context)} documents')
+        log.info(f'Expanding search with k={init_k}')
+        init_k += step
+        context = filter_docs_by_meta(vector_store.similarity_search(query, k=init_k), filter_dict)
+    log.info(f'Done. Context contains {len(context)} Documents matching the filtering criteria')
+    return context

prompts.py ADDED Viewed

	@@ -0,0 +1,23 @@

+# ---Define the two prompts---
+PROMPT_EXTRACT_DATE = """Extract the date elements from the question at the end of the two examples in numeric format.
+ If there is no date elements found output False. If there is mention of both year and month, use the datatime string format %Y-%m.
+Example 1)The meeting took place in October 2022 ->> 2022-10. Then put them in a dictionary like so:
+Example a) The meeting took place in October 2022 ->> (year: 2022, month: 10)
+ Example b) During 1 November 1968 ->> (year: 2022, month: 10, day:1). Use json format. You are allowed to use the keys 'year', 'month', 'day', and 'page'.
+{question}
+"""
+PROMPT_FED_ANALYST = """You are a research analyst at a federal reserve bank and you are trying to answer questions
+or provide answers to queries about meetings of the Federal Open Market Committee. Use the following pieces of
+context to answer the question at the end, giving special attention to economic, cultural, financial, or political
+developments. If you don't have all the elements to answer the query, say it explicitly. Finally, if you are not
+provided with date elements, warn the user that the output is likely to be wrong due to the time sensitivity of questions
+ related to economic matters.
+{context}
+Question: {question}
+"""

public_app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import logging
+from langchain import PromptTemplate, LLMChain
+from langchain.chains.question_answering import load_qa_chain
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.chat_models import ChatOpenAI
+import gradio as gr
+import json
+from prompts import PROMPT_EXTRACT_DATE, PROMPT_FED_ANALYST
+from filterminutes import search_with_filter
+# --------------------------Load the sentence transformer and the vector store--------------------------#
+model_name = 'sentence-transformers/all-mpnet-base-v2'
+model_kwargs = {'device': 'cpu'}
+encode_kwargs = {'normalize_embeddings': False}
+embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
+vs = FAISS.load_local("MINUTES_FOMC_HISTORY", embeddings)
+# --------------------------Import the prompts------------------#
+PROMPT_DATE = PromptTemplate.from_template(PROMPT_EXTRACT_DATE)
+PROMPT_ANALYST = PromptTemplate.from_template(PROMPT_FED_ANALYST)
+# --------------------------define the qa chain for answering queries--------------------------#
+def load_chains(open_ai_key):
+    date_extractor = LLMChain(llm=ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo', openai_api_key=open_ai_key),
+                              prompt=PROMPT_DATE)
+    fed_chain = load_qa_chain(llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0, openai_api_key=open_ai_key),
+                              chain_type='stuff', prompt=PROMPT_ANALYST)
+    return date_extractor, fed_chain
+def get_chain(query, api_key):
+    """
+    Detects the date, computes similarity, and answers the query using
+    only documents corresponding to the date requested.
+    The query is first passed to the date extractor to extract the date
+    and then to the qa chain to answer the query.
+    Parameters
+    ----------
+    query : str
+        Query to be answered.
+    api_key : str
+        OpenAI API key.
+    Returns
+        Answer to the query.
+    """
+    date_extractor, fed_chain = load_chains(api_key)
+    logging.info('Extracting the date in numeric format..')
+    date_response = date_extractor.run(query)
+    if date_response != 'False':
+        filter_date = json.loads(date_response)
+        logging.info(f'Date parameters retrieved: {filter_date}')
+        logging.info('Running the qa with filtered context..')
+        filtered_context = search_with_filter(vs, query, init_k=200, step=300, target_k=7, filter_dict=filter_date)
+        logging.info(20 * '-' + 'Metadata for the documents to be used' + 20 * '-')
+        for doc in filtered_context:
+            logging.info(doc.metadata)
+    else:
+        logging.info('No date elements found. Running the qa without filtering can output incorrect results.')
+        filtered_context = vs.similarity_search(query, k=7)
+    return fed_chain({'input_documents': filtered_context[:7], 'question': query})['output_text']
+if __name__ == '__main__':
+    app = gr.Interface(fn=get_chain,
+                        inputs=[gr.Textbox(lines=2, placeholder="Enter your query", label='Your query'),
+                                gr.Textbox(lines=1, placeholder="Your OpenAI API key here", label='OpenAI Key')],
+                        description='Query the public database in FRED from 1936-2023',
+                        outputs=gr.Textbox(lines=1,  label='Answer'),
+                        title='Chat with the FOMC meeting minutes',
+                        examples=[['What was the economic outlook from the staff presented in the meeting '
+                                   'of April 2009 with respect to labour market developments and industrial production?'],
+                                   ['Who were the voting members present in the meeting on March 2010?'],
+                                   ['How important was the pandemic of Covid-19 in the discussions during 2020?'],
+                                   ['What was the impact of the oil crisis for the economic outlook during 1973?']],
+                        )
+    app.launch()