Spaces:
Sleeping
Sleeping
import os | |
import shutil | |
import gradio as gr | |
from transformers import ReactCodeAgent, HfEngine, Tool | |
import pandas as pd | |
import PyPDF2 | |
import io | |
from openpyxl import Workbook | |
from gradio import Chatbot | |
from transformers.agents import stream_to_gradio | |
from huggingface_hub import login | |
# Ensure you have set the HUGGINGFACEHUB_API_TOKEN environment variable | |
login(os.getenv("HUGGINGFACEHUB_API_TOKEN")) | |
llm_engine = HfEngine("meta-llama/Meta-Llama-3.1-70B-Instruct") | |
# Define tools for the agent | |
tools = [ | |
Tool("numpy", "NumPy library for numerical computing"), | |
Tool("pandas", "Pandas library for data manipulation and analysis"), | |
Tool("matplotlib", "Matplotlib library for creating visualizations"), | |
Tool("openpyxl", "OpenPyXL library for working with Excel files"), | |
Tool("PyPDF2", "PyPDF2 library for working with PDF files"), | |
] | |
agent = ReactCodeAgent( | |
tools=tools, | |
llm_engine=llm_engine, | |
additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "openpyxl", "PyPDF2"], | |
max_iterations=15, | |
) | |
base_prompt = """You are an expert financial data analyst. Your task is to analyze the provided financial PDF document and perform the following: | |
1. Accurately locate the financial statements such as the Balance Sheet, Income Statement, and Cash Flow Statement within the PDF. | |
2. Extract only the relevant pages containing these financial statements into a pandas DataFrame using tools that are available in the current environment. | |
3. Save the DataFrame into an Excel file using the `openpyxl` library, ensuring that no restricted functions like `open()` are used. | |
4. Provide the path to the saved Excel file and display a preview of the data extracted by showing the first few rows of the DataFrame with `df.head()`. | |
Ensure that the code is correctly structured to handle the identification, extraction, processing, and saving of the data into an Excel file, while adhering to the execution environment's constraints. | |
""" | |
def interact_with_agent(file_input): | |
if os.path.exists("./output"): | |
shutil.rmtree("./output") | |
os.makedirs("./output") | |
pdf_content = file_input.read() | |
prompt = base_prompt + f"\n\nThe PDF file has been loaded and is available as 'pdf_content' (a bytes object). Use PyPDF2 to read and process this content." | |
messages = [gr.ChatMessage(role="user", content=prompt)] | |
yield messages + [ | |
gr.ChatMessage(role="assistant", content="⏳ _Starting analysis of the financial PDF..._") | |
] | |
excel_file_path = None | |
for msg in stream_to_gradio(agent, prompt, pdf_content=pdf_content): | |
messages.append(msg) | |
if isinstance(msg.content, str) and msg.content.startswith("The Excel file has been saved"): | |
excel_file_path = msg.content.split(": ")[-1].strip() | |
yield messages + [ | |
gr.ChatMessage(role="assistant", content="⏳ _Still processing..._") | |
] | |
if excel_file_path and os.path.exists(excel_file_path): | |
download_button = gr.File.update(value=excel_file_path, visible=True) | |
else: | |
download_button = gr.File.update(visible=False) | |
yield messages, download_button | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue)) as demo: | |
gr.Markdown("""# Financial Statement Analyzer 📊💼 | |
Upload a financial PDF document (like 10-Q or 10-K), and the AI will extract the financial statements into an Excel file!""") | |
file_input = gr.File(label="Upload your financial PDF document") | |
submit = gr.Button("Analyze Financial Statements", variant="primary") | |
chatbot = gr.Chatbot( | |
label="Financial Analyst Agent", | |
avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"), | |
) | |
download_output = gr.File(label="Download Excel File", visible=False) | |
submit.click(interact_with_agent, inputs=[file_input], outputs=[chatbot, download_output]) | |
if __name__ == "__main__": | |
demo.launch() |