Spaces:
Running
Running
File size: 3,967 Bytes
d0388f2 b789218 d56e797 d0388f2 d56e797 d0388f2 d56e797 8fcdb06 d56e797 615a9b7 d56e797 6905d6d d56e797 d0388f2 d56e797 d0388f2 d56e797 d0388f2 d56e797 0a69d83 d56e797 83a84c1 d56e797 f1e2207 d56e797 83a84c1 d56e797 d0388f2 d56e797 d0388f2 d56e797 ca9bb83 97c6d6a d56e797 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
import shutil
import gradio as gr
from transformers import ReactCodeAgent, HfEngine, Tool
import pandas as pd
import PyPDF2
import io
from openpyxl import Workbook
from gradio import Chatbot
from transformers.agents import stream_to_gradio
from huggingface_hub import login
# Ensure you have set the HUGGINGFACEHUB_API_TOKEN environment variable
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
llm_engine = HfEngine("meta-llama/Meta-Llama-3.1-70B-Instruct")
# Define tools for the agent
tools = [
Tool("numpy", "NumPy library for numerical computing"),
Tool("pandas", "Pandas library for data manipulation and analysis"),
Tool("matplotlib", "Matplotlib library for creating visualizations"),
Tool("openpyxl", "OpenPyXL library for working with Excel files"),
Tool("PyPDF2", "PyPDF2 library for working with PDF files"),
]
agent = ReactCodeAgent(
tools=tools,
llm_engine=llm_engine,
additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "openpyxl", "PyPDF2"],
max_iterations=15,
)
base_prompt = """You are an expert financial data analyst. Your task is to analyze the provided financial PDF document and perform the following:
1. Accurately locate the financial statements such as the Balance Sheet, Income Statement, and Cash Flow Statement within the PDF.
2. Extract only the relevant pages containing these financial statements into a pandas DataFrame using tools that are available in the current environment.
3. Save the DataFrame into an Excel file using the `openpyxl` library, ensuring that no restricted functions like `open()` are used.
4. Provide the path to the saved Excel file and display a preview of the data extracted by showing the first few rows of the DataFrame with `df.head()`.
Ensure that the code is correctly structured to handle the identification, extraction, processing, and saving of the data into an Excel file, while adhering to the execution environment's constraints.
"""
def interact_with_agent(file_input):
if os.path.exists("./output"):
shutil.rmtree("./output")
os.makedirs("./output")
pdf_content = file_input.read()
prompt = base_prompt + f"\n\nThe PDF file has been loaded and is available as 'pdf_content' (a bytes object). Use PyPDF2 to read and process this content."
messages = [gr.ChatMessage(role="user", content=prompt)]
yield messages + [
gr.ChatMessage(role="assistant", content="⏳ _Starting analysis of the financial PDF..._")
]
excel_file_path = None
for msg in stream_to_gradio(agent, prompt, pdf_content=pdf_content):
messages.append(msg)
if isinstance(msg.content, str) and msg.content.startswith("The Excel file has been saved"):
excel_file_path = msg.content.split(": ")[-1].strip()
yield messages + [
gr.ChatMessage(role="assistant", content="⏳ _Still processing..._")
]
if excel_file_path and os.path.exists(excel_file_path):
download_button = gr.File.update(value=excel_file_path, visible=True)
else:
download_button = gr.File.update(visible=False)
yield messages, download_button
with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue)) as demo:
gr.Markdown("""# Financial Statement Analyzer 📊💼
Upload a financial PDF document (like 10-Q or 10-K), and the AI will extract the financial statements into an Excel file!""")
file_input = gr.File(label="Upload your financial PDF document")
submit = gr.Button("Analyze Financial Statements", variant="primary")
chatbot = gr.Chatbot(
label="Financial Analyst Agent",
avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"),
)
download_output = gr.File(label="Download Excel File", visible=False)
submit.click(interact_with_agent, inputs=[file_input], outputs=[chatbot, download_output])
if __name__ == "__main__":
demo.launch() |