SearchGPT / app.py
Shreyas094's picture
Update app.py
d56e797 verified
raw
history blame
3.97 kB
import os
import shutil
import gradio as gr
from transformers import ReactCodeAgent, HfEngine, Tool
import pandas as pd
import PyPDF2
import io
from openpyxl import Workbook
from gradio import Chatbot
from transformers.agents import stream_to_gradio
from huggingface_hub import login
# Ensure you have set the HUGGINGFACEHUB_API_TOKEN environment variable
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
llm_engine = HfEngine("meta-llama/Meta-Llama-3.1-70B-Instruct")
# Define tools for the agent
tools = [
Tool("numpy", "NumPy library for numerical computing"),
Tool("pandas", "Pandas library for data manipulation and analysis"),
Tool("matplotlib", "Matplotlib library for creating visualizations"),
Tool("openpyxl", "OpenPyXL library for working with Excel files"),
Tool("PyPDF2", "PyPDF2 library for working with PDF files"),
]
agent = ReactCodeAgent(
tools=tools,
llm_engine=llm_engine,
additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "openpyxl", "PyPDF2"],
max_iterations=15,
)
base_prompt = """You are an expert financial data analyst. Your task is to analyze the provided financial PDF document and perform the following:
1. Accurately locate the financial statements such as the Balance Sheet, Income Statement, and Cash Flow Statement within the PDF.
2. Extract only the relevant pages containing these financial statements into a pandas DataFrame using tools that are available in the current environment.
3. Save the DataFrame into an Excel file using the `openpyxl` library, ensuring that no restricted functions like `open()` are used.
4. Provide the path to the saved Excel file and display a preview of the data extracted by showing the first few rows of the DataFrame with `df.head()`.
Ensure that the code is correctly structured to handle the identification, extraction, processing, and saving of the data into an Excel file, while adhering to the execution environment's constraints.
"""
def interact_with_agent(file_input):
if os.path.exists("./output"):
shutil.rmtree("./output")
os.makedirs("./output")
pdf_content = file_input.read()
prompt = base_prompt + f"\n\nThe PDF file has been loaded and is available as 'pdf_content' (a bytes object). Use PyPDF2 to read and process this content."
messages = [gr.ChatMessage(role="user", content=prompt)]
yield messages + [
gr.ChatMessage(role="assistant", content="⏳ _Starting analysis of the financial PDF..._")
]
excel_file_path = None
for msg in stream_to_gradio(agent, prompt, pdf_content=pdf_content):
messages.append(msg)
if isinstance(msg.content, str) and msg.content.startswith("The Excel file has been saved"):
excel_file_path = msg.content.split(": ")[-1].strip()
yield messages + [
gr.ChatMessage(role="assistant", content="⏳ _Still processing..._")
]
if excel_file_path and os.path.exists(excel_file_path):
download_button = gr.File.update(value=excel_file_path, visible=True)
else:
download_button = gr.File.update(visible=False)
yield messages, download_button
with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue)) as demo:
gr.Markdown("""# Financial Statement Analyzer 📊💼
Upload a financial PDF document (like 10-Q or 10-K), and the AI will extract the financial statements into an Excel file!""")
file_input = gr.File(label="Upload your financial PDF document")
submit = gr.Button("Analyze Financial Statements", variant="primary")
chatbot = gr.Chatbot(
label="Financial Analyst Agent",
avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"),
)
download_output = gr.File(label="Download Excel File", visible=False)
submit.click(interact_with_agent, inputs=[file_input], outputs=[chatbot, download_output])
if __name__ == "__main__":
demo.launch()