File size: 3,967 Bytes
d0388f2
b789218
d56e797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0388f2
 
d56e797
 
 
 
 
d0388f2
 
d56e797
 
 
 
 
 
 
8fcdb06
d56e797
 
 
 
615a9b7
d56e797
6905d6d
d56e797
d0388f2
d56e797
 
 
d0388f2
 
d56e797
 
 
 
 
 
 
 
 
 
 
d0388f2
d56e797
0a69d83
d56e797
83a84c1
d56e797
 
f1e2207
d56e797
83a84c1
d56e797
 
 
 
 
d0388f2
d56e797
d0388f2
d56e797
ca9bb83
97c6d6a
d56e797
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import shutil
import gradio as gr
from transformers import ReactCodeAgent, HfEngine, Tool
import pandas as pd
import PyPDF2
import io
from openpyxl import Workbook
from gradio import Chatbot
from transformers.agents import stream_to_gradio
from huggingface_hub import login

# Ensure you have set the HUGGINGFACEHUB_API_TOKEN environment variable
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))

llm_engine = HfEngine("meta-llama/Meta-Llama-3.1-70B-Instruct")

# Define tools for the agent
tools = [
    Tool("numpy", "NumPy library for numerical computing"),
    Tool("pandas", "Pandas library for data manipulation and analysis"),
    Tool("matplotlib", "Matplotlib library for creating visualizations"),
    Tool("openpyxl", "OpenPyXL library for working with Excel files"),
    Tool("PyPDF2", "PyPDF2 library for working with PDF files"),
]

agent = ReactCodeAgent(
    tools=tools,
    llm_engine=llm_engine,
    additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "openpyxl", "PyPDF2"],
    max_iterations=15,
)

base_prompt = """You are an expert financial data analyst. Your task is to analyze the provided financial PDF document and perform the following:
1. Accurately locate the financial statements such as the Balance Sheet, Income Statement, and Cash Flow Statement within the PDF.
2. Extract only the relevant pages containing these financial statements into a pandas DataFrame using tools that are available in the current environment.
3. Save the DataFrame into an Excel file using the `openpyxl` library, ensuring that no restricted functions like `open()` are used.
4. Provide the path to the saved Excel file and display a preview of the data extracted by showing the first few rows of the DataFrame with `df.head()`.
Ensure that the code is correctly structured to handle the identification, extraction, processing, and saving of the data into an Excel file, while adhering to the execution environment's constraints.
"""

def interact_with_agent(file_input):
    if os.path.exists("./output"):
        shutil.rmtree("./output")
    os.makedirs("./output")

    pdf_content = file_input.read()
    
    prompt = base_prompt + f"\n\nThe PDF file has been loaded and is available as 'pdf_content' (a bytes object). Use PyPDF2 to read and process this content."

    messages = [gr.ChatMessage(role="user", content=prompt)]
    yield messages + [
        gr.ChatMessage(role="assistant", content="⏳ _Starting analysis of the financial PDF..._")
    ]

    excel_file_path = None
    for msg in stream_to_gradio(agent, prompt, pdf_content=pdf_content):
        messages.append(msg)
        if isinstance(msg.content, str) and msg.content.startswith("The Excel file has been saved"):
            excel_file_path = msg.content.split(": ")[-1].strip()
        yield messages + [
            gr.ChatMessage(role="assistant", content="⏳ _Still processing..._")
        ]
    
    if excel_file_path and os.path.exists(excel_file_path):
        download_button = gr.File.update(value=excel_file_path, visible=True)
    else:
        download_button = gr.File.update(visible=False)
    
    yield messages, download_button

with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue)) as demo:
    gr.Markdown("""# Financial Statement Analyzer 📊💼

Upload a financial PDF document (like 10-Q or 10-K), and the AI will extract the financial statements into an Excel file!""")
    
    file_input = gr.File(label="Upload your financial PDF document")
    submit = gr.Button("Analyze Financial Statements", variant="primary")
    chatbot = gr.Chatbot(
        label="Financial Analyst Agent",
        avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"),
    )
    download_output = gr.File(label="Download Excel File", visible=False)

    submit.click(interact_with_agent, inputs=[file_input], outputs=[chatbot, download_output])

if __name__ == "__main__":
    demo.launch()