Spaces:
Runtime error
Runtime error
Redesigned the prompts and added tests for date extraction.
Browse files- prompts.py +15 -9
- test_date.py +30 -0
prompts.py
CHANGED
@@ -1,14 +1,20 @@
|
|
1 |
|
2 |
-
# ---Define the two prompts
|
3 |
-
PROMPT_EXTRACT_DATE = """
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
Example a) The meeting took place in October 2022 ->> (year: 2022, month: 10)
|
8 |
-
Example b) During 1 November 1968 ->> (year: 2022, month: 10, day:1). Use json format. You are allowed to use the keys 'year', 'month', 'day', and 'page'.
|
9 |
-
|
10 |
-
{question}
|
11 |
-
"""
|
12 |
|
13 |
PROMPT_FED_ANALYST = """You are a research analyst at a federal reserve bank and you are trying to answer questions
|
14 |
or provide answers to queries about meetings of the Federal Open Market Committee. Use the following pieces of
|
|
|
1 |
|
2 |
+
# ---Define the two prompts----
|
3 |
+
PROMPT_EXTRACT_DATE = """ I will present you with a statement and I only require you to
|
4 |
+
do one task, that of extracting the date elements of it. You don't summarize, outline, or translate anything,
|
5 |
+
The description of the task follows:
|
6 |
+
Search the statement for explicit information about the month and the year.
|
7 |
+
If any year or month elements are found, proceed with the following steps:
|
8 |
+
1. If present in the statement, extract and format the year using a 4-digit numeric format (yyyy).
|
9 |
+
2. If present in the statement, extract and format the month using a 2-digit numeric format (mm). For instance 'March' to '03'.
|
10 |
+
Create a python dictionary with the extracted date elements and the respective keys, only including the elements found.
|
11 |
+
If there are one or more date elements found, output the dictionary as a string.
|
12 |
+
If no date elements are found, that is neither month nor year information, repeat the steps but focus on the
|
13 |
+
mid to last part of the sentence."
|
14 |
+
If then no date elements are found, output False.
|
15 |
+
The statement follows.
|
16 |
|
17 |
+
{question}"""
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
PROMPT_FED_ANALYST = """You are a research analyst at a federal reserve bank and you are trying to answer questions
|
20 |
or provide answers to queries about meetings of the Federal Open Market Committee. Use the following pieces of
|
test_date.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
import pytest
|
4 |
+
from langchain import PromptTemplate, LLMChain
|
5 |
+
from langchain.chat_models import ChatOpenAI
|
6 |
+
from FOMCTexts.prompts import PROMPT_EXTRACT_DATE
|
7 |
+
|
8 |
+
|
9 |
+
PROMPT_DATE = PromptTemplate.from_template(PROMPT_EXTRACT_DATE)
|
10 |
+
date_extractor = LLMChain(prompt=PROMPT_DATE, llm=ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo'))
|
11 |
+
|
12 |
+
|
13 |
+
@pytest.mark.parametrize(
|
14 |
+
"statement, expected_dict_date",
|
15 |
+
[
|
16 |
+
('Summarize in two paragraphs the monetary policy outlook discussed by the participants during the meeting of June 2023', {"year": "2023", "month": "06"}),
|
17 |
+
("What were the key takeaways from the conference in September 2022?", {"year": "2022", "month": "09"}),
|
18 |
+
("Provide an update on the project status in March 2023.", {"year": "2023", "month": "03"}),
|
19 |
+
("Who attended the meeting on May 2021?", {"year": "2021", "month": "05"}),
|
20 |
+
("Please summarize the findings of the study conducted in January 2022.", {"year": "2022", "month": "01"}),
|
21 |
+
("Who were the participants in the event on July 2020?", {"year": "2020", "month": "07"}),
|
22 |
+
("Provide an overview of the report in August 2019.", {"year": "2019", "month": "08"}),
|
23 |
+
("Who presented the findings of the research in November 2021?", {"year": "2021", "month": "11"}),
|
24 |
+
("What were the discussions during the meeting in December 2024?", {"year": "2024", "month": "12"})
|
25 |
+
]
|
26 |
+
)
|
27 |
+
def test_date_extraction(statement, expected_dict_date):
|
28 |
+
dict_date = json.loads(date_extractor.run(statement).replace('\n', '').replace(' ', ''))
|
29 |
+
for k in dict_date:
|
30 |
+
assert dict_date[k] == expected_dict_date[k]
|