Spaces:

ruisp
/

MonPol

Runtime error

App Files Files

ruisp commited on Jul 6, 2023

Commit

032e57d

1 Parent(s): f122dc0

Redesigned the prompts and added tests for date extraction.

Browse files

Files changed (2) hide show

prompts.py +15 -9
test_date.py +30 -0

prompts.py CHANGED Viewed

@@ -1,14 +1,20 @@
-# ---Define the two prompts---
-PROMPT_EXTRACT_DATE = """Extract the date elements from the question at the end of the two examples in numeric format.
- If there is no date elements found output False. If there is mention of both year and month, use the datatime string format %Y-%m.
-Example 1)The meeting took place in October 2022 ->> 2022-10. Then put them in a dictionary like so:
-Example a) The meeting took place in October 2022 ->> (year: 2022, month: 10)
- Example b) During 1 November 1968 ->> (year: 2022, month: 10, day:1). Use json format. You are allowed to use the keys 'year', 'month', 'day', and 'page'.
-{question}
-"""
 PROMPT_FED_ANALYST = """You are a research analyst at a federal reserve bank and you are trying to answer questions
 or provide answers to queries about meetings of the Federal Open Market Committee. Use the following pieces of

+# ---Define the two prompts----
+PROMPT_EXTRACT_DATE = """ I will present you with a statement and I only require you to
+do one task, that of extracting the date elements of it. You don't summarize, outline, or translate anything,
+The description of the task follows:
+Search the statement for explicit information about the month and the year.
+If any year or month elements are found, proceed with the following steps:
+1. If present in the statement, extract and format the year using a 4-digit numeric format (yyyy).
+2. If present in the statement, extract and format the month using a 2-digit numeric format (mm). For instance 'March' to  '03'.
+Create a python dictionary with the extracted date elements and the respective keys, only including the elements found.
+If there are one or more date elements found, output the dictionary as a string.
+If no date elements are found, that is neither month nor year information, repeat the steps but focus on the
+ mid to last part of the sentence."
+If then no date elements are found, output False.
+The statement follows.
+{question}"""
 PROMPT_FED_ANALYST = """You are a research analyst at a federal reserve bank and you are trying to answer questions
 or provide answers to queries about meetings of the Federal Open Market Committee. Use the following pieces of

test_date.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import json
+import pytest
+from langchain import PromptTemplate, LLMChain
+from langchain.chat_models import ChatOpenAI
+from FOMCTexts.prompts import PROMPT_EXTRACT_DATE
+PROMPT_DATE = PromptTemplate.from_template(PROMPT_EXTRACT_DATE)
+date_extractor = LLMChain(prompt=PROMPT_DATE, llm=ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo'))
+@pytest.mark.parametrize(
+    "statement, expected_dict_date",
+    [
+        ('Summarize in two paragraphs the monetary policy outlook discussed by the participants during the meeting of June 2023', {"year": "2023", "month": "06"}),
+        ("What were the key takeaways from the conference in September 2022?", {"year": "2022", "month": "09"}),
+        ("Provide an update on the project status in March 2023.", {"year": "2023", "month": "03"}),
+        ("Who attended the meeting on May 2021?", {"year": "2021", "month": "05"}),
+        ("Please summarize the findings of the study conducted in January 2022.", {"year": "2022", "month": "01"}),
+        ("Who were the participants in the event on July 2020?", {"year": "2020", "month": "07"}),
+        ("Provide an overview of the report in August 2019.", {"year": "2019", "month": "08"}),
+        ("Who presented the findings of the research in November 2021?", {"year": "2021", "month": "11"}),
+        ("What were the discussions during the meeting in December 2024?", {"year": "2024", "month": "12"})
+    ]
+)
+def test_date_extraction(statement, expected_dict_date):
+    dict_date = json.loads(date_extractor.run(statement).replace('\n', '').replace(' ', ''))
+    for k in dict_date:
+        assert dict_date[k] == expected_dict_date[k]