File size: 9,782 Bytes
2fb0169
 
 
4792c87
152dfa1
941e6a0
2fb0169
 
 
36aeeec
 
941e6a0
 
152dfa1
2fb0169
 
 
 
1770a97
 
 
2fb0169
 
 
 
 
 
 
 
 
 
 
75c6666
 
2fb0169
3201760
2fb0169
 
c9767eb
941e6a0
c9767eb
 
 
 
 
 
 
 
 
 
 
 
 
 
941e6a0
 
c9767eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
941e6a0
c9767eb
 
941e6a0
 
 
 
 
 
 
c9767eb
941e6a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4792c87
 
941e6a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4792c87
2fb0169
941e6a0
 
 
 
 
 
 
 
 
 
 
2fb0169
71a34b2
2fb0169
 
941e6a0
c9767eb
941e6a0
2fb0169
 
cfe1e0a
a839174
cfe1e0a
2fb0169
4a4855c
2fb0169
4792c87
 
2fb0169
941e6a0
 
2fb0169
941e6a0
2fb0169
 
71a34b2
4792c87
941e6a0
 
2fb0169
941e6a0
2fb0169
 
 
 
 
 
adf13ff
2fb0169
 
b8958bb
75c6666
2fb0169
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import os
import pandas as pd
import requests
from functools import lru_cache
from pydantic import Field, BaseModel
from typing import Any, Optional

from omegaconf import OmegaConf

from vectara_agentic.agent import Agent
from vectara_agentic.tools import ToolsFactory, VectaraToolFactory
from vectara_agentic.agent_config import AgentConfig
from vectara_agentic.sub_query_workflow import SubQuestionQueryWorkflow

from dotenv import load_dotenv
load_dotenv(override=True)

tickers = {
    "C": "Citigroup",
    "COF": "Capital One",
    "JPM": "JPMorgan Chase",
    "AAPL": "Apple Computer", 
    "GOOG": "Google", 
    "AMZN": "Amazon",
    "SNOW": "Snowflake",
    "TEAM": "Atlassian",
    "TSLA": "Tesla",
    "NVDA": "Nvidia",
    "MSFT": "Microsoft",
    "AMD": "Advanced Micro Devices",
    "INTC": "Intel",
    "NFLX": "Netflix",
    "STT": "State Street",
    "BK": "Bank of New York Mellon",
}
years = range(2015, 2025)
initial_prompt = "How can I help you today?"

# Tool to get the income statement for a given company and year using the FMP API
@lru_cache(maxsize=256)
def fmp_income_statement(
    ticker: str = Field(description="the ticker symbol of the company.", examples=["AAPL", "GOOG", "AMZN"]),
    year: int = Field(description="the year for which to get the income statement.", examples=[2020, 2021, 2022]),
) -> str:
    """
    Get the income statement for a given company and year using the FMP (https://financialmodelingprep.com) API.
    Args:
        ticker (str): the ticker symbol of the company.
        year (int): the year for which to get the income statement.

    Returns:
        A dictionary with the income statement data. 
        All data is in USD, but you can convert it to more compact form like K, M, B.
    """
    if ticker not in tickers or year not in years:
        return "Invalid ticker or year. Please call this tool with a valid company ticker and year."
    fmp_api_key = os.environ.get("FMP_API_KEY", None)
    if fmp_api_key is None:
        return "FMP_API_KEY environment variable not set. This tool does not work."
    url = f"https://financialmodelingprep.com/api/v3/income-statement/{ticker}?apikey={fmp_api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        income_statement = pd.DataFrame(data)
        if len(income_statement) == 0 or "date" not in income_statement.columns:
            return "No data found for the given ticker symbol."
        income_statement["date"] = pd.to_datetime(income_statement["date"])
        income_statement_specific_year = income_statement[
            income_statement["date"].dt.year == int(year)
        ]
        values_dict = income_statement_specific_year.to_dict(orient="records")[0]
        return f"Financial results: {', '.join([f'{key}={value}' for key, value in values_dict.items() if key not in ['date', 'cik', 'link', 'finalLink']])}"

    return f"FMP API returned error {response.status_code}. This tool does not work."

def get_company_info() -> list[str]:
    """
    Returns a dictionary of companies you can query about. Always check this before using any other tool.
    The output is a dictionary of valid ticker symbols mapped to company names.
    You can use this to identify the companies you can query about, and their ticker information.
    """
    return tickers

def get_valid_years() -> list[str]:
    """
    Returns a list of the years for which financial reports are available.
    Always check this before using any other tool.
    """
    return years


class AgentTools:
    def __init__(self, _cfg, agent_config):
        self.tools_factory = ToolsFactory()
        self.agent_config = agent_config
        self.cfg = _cfg
        self.vec_factory = VectaraToolFactory(vectara_api_key=_cfg.api_key,
                                              vectara_corpus_key=_cfg.corpus_key)

    def get_tools(self):
        class QueryTranscriptsArgs(BaseModel):
            query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "who are the competitors?"])
            year: int | str = Field(
                default=None,
                description=f"The year this query relates to. An integer between {min(years)} and {max(years)} or a string specifying a condition on the year",
                examples=[2020, '>2021', '<2023', '>=2021', '<=2023', '[2021, 2023]', '[2021, 2023)']
            )
            ticker: str = Field(..., description=f"The company ticker this query relates to. Must be a valid ticket symbol from the list {list(tickers.keys())}.")

        vec_factory = VectaraToolFactory(vectara_api_key=self.cfg.api_key,
                                        vectara_corpus_key=self.cfg.corpus_key)
        summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o'
        ask_transcripts = vec_factory.create_rag_tool(
            tool_name = "ask_transcripts",
            tool_description = """
            Given a company name and year, responds to a user question about the company, based on analyst call transcripts about the company's financial reports for that year.
            You can ask this tool any question about the company including risks, opportunities, financial performance, competitors and more.
            """,
            tool_args_schema = QueryTranscriptsArgs,
            reranker = "multilingual_reranker_v1", rerank_k = 100, rerank_cutoff = 0.1,
            n_sentences_before = 2, n_sentences_after = 4, lambda_val = 0.005,
            summary_num_results = 15,
            vectara_summarizer = summarizer,
            include_citations = True,
            verbose=False,
        )

        class SearchTranscriptsArgs(BaseModel):
            query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "who are the competitors?"])
            top_k: int = Field(..., description="The number of results to return.")
            year: int | str = Field(
                default=None,
                description=f"The year this query relates to. An integer between {min(years)} and {max(years)} or a string specifying a condition on the year",
                examples=[2020, '>2021', '<2023', '>=2021', '<=2023', '[2021, 2023]', '[2021, 2023)']
            )
            ticker: str = Field(..., description=f"The company ticker this query relates to. Must be a valid ticket symbol from the list {list(tickers.keys())}.")
        search_transcripts = vec_factory.create_search_tool(
            tool_name = "search_transcripts",
            tool_description = """
            Given a company name and year, and a user query, retrieves relevant documents about the company.
            """,
            tool_args_schema = SearchTranscriptsArgs,
            reranker = "multilingual_reranker_v1", rerank_k = 100,
            lambda_val = 0.005,
            verbose=False
        )

        tools_factory = ToolsFactory()
        return (
                [tools_factory.create_tool(tool) for tool in
                    [
                        get_company_info,
                        get_valid_years,
                        fmp_income_statement,
                    ]
                ] +
                [ask_transcripts, search_transcripts]
        )

def initialize_agent(_cfg, agent_progress_callback=None):
    financial_bot_instructions = """
    - You are a helpful financial assistant, with expertise in financial reporting, in conversation with a user. 
    - Use the 'fmp_income_statement' tool (with the company ticker and year) to obtain financial data.
    - Always check the 'get_company_info' and 'get_valid_years' tools to validate company and year are valid.
    - Use the 'ask_transcripts' tool to answer most questions about the company's financial performance, risks, opportunities, strategy, competitors, and more.
    - Respond in a compact format by using appropriate units of measure (e.g., K for thousands, M for millions, B for billions). 
      Do not report the same number twice (e.g. $100K and 100,000 USD).
    - Do not include URLs unless they are provided in the output of a tool response and are valid URLs.
      Ignore references or citations in the 'ask_transcripts' tool output if they have an empty URL (for example "[2]()").
    - When querying a tool for a numeric value or KPI, use a concise and non-ambiguous description of what you are looking for.
    - If you calculate a metric, make sure you have all the necessary information to complete the calculation. Don't guess.
    - Your response should not be in markdown format.
    """
    def query_logging(query: str, response: str):
        print(f"Logging query={query}, response={response}")

    agent_config = AgentConfig()

    agent = Agent(
        tools=AgentTools(_cfg, agent_config).get_tools(),
        topic="Financial data, annual reports and 10-K filings",
        custom_instructions=financial_bot_instructions,
        agent_progress_callback=agent_progress_callback,
        query_logging_callback=query_logging,
        verbose=True,
        #workflow_cls=SubQuestionQueryWorkflow,
    )

    agent.report()
    return agent

def get_agent_config() -> OmegaConf:
    companies = ", ".join(tickers.values())
    cfg = OmegaConf.create({
        'corpus_key': str(os.environ['VECTARA_CORPUS_KEY']),
        'api_key': str(os.environ['VECTARA_API_KEY']),
        'examples': os.environ.get('QUERY_EXAMPLES', None),
        'demo_name': "finance-chat",
        'demo_welcome': "Financial Assistant demo.",
        'demo_description': f"This assistant can help you with any questions about the financials of several companies:\n\n **{companies}**.\n"
    })
    return cfg