Vikrant26 commited on
Commit
0dcbfc5
·
verified ·
1 Parent(s): c7e1db1

Upload 10 files

Browse files
Files changed (10) hide show
  1. .env +6 -0
  2. .gitignore +2 -0
  3. Dockerfile +20 -0
  4. PL_image-removebg-preview.png +0 -0
  5. README.md +148 -12
  6. app.py +108 -0
  7. financial_data.db +0 -0
  8. rag.py +123 -0
  9. requirements.txt +11 -0
  10. task_image-removebg-preview.png +0 -0
.env ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ GOOGLE_API_KEY="AIzaSyA6pBfBHg3zK_3JtB6fRoYUcG4589RjSjg"
2
+ PINECONE_API_KEY="pcsk_3oYE7o_3JP3Y1f9zveyQYJxUy4WGwZy4TKqCWyemLAqUeCqpM6UPK8Ne1Bx2KGCkmDS3eq"
3
+ PINECONE_ENV="us-west1-gcp-free"
4
+ # Optional: ChromaDB Settings
5
+ CHROMA_DB_IMPL=duckdb+parquet
6
+ PERSIST_DIRECTORY=db
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ myenv
2
+ .env
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.11-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the current directory contents into the container at /app
8
+ COPY . /app
9
+
10
+ # Install any needed packages specified in requirements.txt
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Make port 8501 available to the world outside this container
14
+ EXPOSE 8501
15
+
16
+ # Define environment variable
17
+ ENV GOOGLE_API_KEY="AIzaSyA6pBfBHg3zK_3JtB6fRoYUcG4589RjSjg"
18
+
19
+ # Run app.py when the container launches
20
+ CMD ["streamlit", "run", "app.py"]
PL_image-removebg-preview.png ADDED
README.md CHANGED
@@ -1,12 +1,148 @@
1
- ---
2
- title: Finance Buddy
3
- emoji: 😻
4
- colorFrom: gray
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.41.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Finance Buddy
2
+
3
+ Finance Buddy is a sophisticated Streamlit application designed to analyze P&L documents and answer financial queries using Google Generative AI. This tool is perfect for financial analysts, accountants, and anyone dealing with financial statements.
4
+
5
+ ## Table of Contents
6
+
7
+ - [Features](#features)
8
+ - [Prerequisites](#prerequisites)
9
+ - [Setup](#setup)
10
+ - [Using Docker](#using-docker)
11
+ - [Local Setup](#local-setup)
12
+ - [Usage](#usage)
13
+ - [Contributing](#contributing)
14
+ - [License](#license)
15
+
16
+ ## Features
17
+
18
+ - **Document Upload**: Upload multiple P&L documents in PDF format.
19
+ - **Document Processing**: Process uploaded documents to extract and analyze financial data.
20
+ - **Query System**: Ask questions about your financial data and get accurate, professional responses.
21
+ - **Integration with Google Generative AI**: Leverage advanced AI capabilities for accurate and context-aware responses.
22
+
23
+ ## Prerequisites
24
+
25
+ - Docker (for containerized deployment)
26
+ - Python 3.8+
27
+ - Streamlit
28
+ - Google Generative AI API Key
29
+
30
+ ## Setup
31
+
32
+ ### Using Docker
33
+
34
+ 1. **Clone the repository:**
35
+
36
+ ```sh
37
+ git clone <repository_url>
38
+ cd finance-buddy
39
+ ```
40
+
41
+ 2. **Build the Docker image:**
42
+
43
+ ```sh
44
+ docker build -t finance-buddy .
45
+ ```
46
+
47
+ 3. **Run the Docker container:**
48
+
49
+ ```sh
50
+ docker run -p 8501:8501 finance-buddy
51
+ ```
52
+
53
+ 4. **Access the application:**
54
+
55
+ Open your browser and go to `http://localhost:8501`.
56
+
57
+ ### Local Setup
58
+
59
+ 1. **Clone the repository:**
60
+
61
+ ```sh
62
+ git clone <repository_url>
63
+ cd finance-buddy
64
+ ```
65
+
66
+ 2. **Create a virtual environment and activate it:**
67
+
68
+ ```sh
69
+ python -m venv venv
70
+ source venv/bin/activate # On Windows use `venv\Scripts\activate`
71
+ ```
72
+
73
+ 3. **Install the required packages:**
74
+
75
+ ```sh
76
+ pip install -r requirements.txt
77
+ ```
78
+
79
+ 4. **Create a `.env` file and add your Google API Key:**
80
+
81
+ ```sh
82
+ GOOGLE_API_KEY=your_google_api_key_here
83
+ ```
84
+
85
+ 5. **Run the Streamlit application:**
86
+
87
+ ```sh
88
+ streamlit run app.py
89
+ ```
90
+
91
+ 6. **Access the application:**
92
+
93
+ Open your browser and go to `http://localhost:8501`.
94
+
95
+ ## Usage
96
+
97
+ ### Uploading and Processing Documents
98
+
99
+ 1. **Upload P&L Documents:**
100
+
101
+ Use the sidebar to upload your P&L documents in PDF format.
102
+
103
+ 2. **Process Documents:**
104
+
105
+ Click the "Process Documents" button to process the uploaded files.
106
+
107
+ ### Asking Questions
108
+
109
+ 1. **Enter Your Query:**
110
+
111
+ Enter your financial queries in the input box.
112
+
113
+ 2. **Get Responses:**
114
+
115
+ The application will analyze the processed documents and provide accurate responses based on the financial data.
116
+
117
+ ### Example Queries
118
+
119
+ - "What was the total revenue for the last quarter?"
120
+ - "How much did we spend on marketing last year?"
121
+ - "What is the net profit margin for the current fiscal year?"
122
+
123
+ ## Contributing
124
+
125
+ Contributions are welcome! Please follow these steps to contribute:
126
+
127
+ 1. Fork the repository.
128
+ 2. Create a new branch (`git checkout -b feature-branch-name`).
129
+ 3. Commit your changes (`git commit -am 'Add some feature'`).
130
+ 4. Push to the branch (`git push origin feature-branch-name`).
131
+ 5. Create a new Pull Request.
132
+
133
+ ## License
134
+
135
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
136
+
137
+ ## Acknowledgments
138
+
139
+ - Thanks to the Streamlit and Google Generative AI teams for their excellent tools and documentation.
140
+ - Special thanks to the open-source community for their contributions and support.
141
+
142
+ ## Contact
143
+
144
+ For any questions or support, please open an issue or contact the maintainers directly.
145
+
146
+ ---
147
+
148
+ Made ❤️ by Vikrant Kumar
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from rag import RAGProcessor
3
+ import os
4
+ from dotenv import load_dotenv
5
+ import tempfile
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+
10
+ # Check for API key
11
+ if not os.getenv('GOOGLE_API_KEY'):
12
+ st.error("Please set the GOOGLE_API_KEY in your .env file.")
13
+ st.stop()
14
+
15
+ def initialize_session_state():
16
+ """Initialize session state variables."""
17
+ if "rag_processor" not in st.session_state:
18
+ st.session_state.rag_processor = RAGProcessor()
19
+ if "vector_store" not in st.session_state:
20
+ st.session_state.vector_store = None
21
+
22
+ def save_uploaded_files(uploaded_files):
23
+ """Save uploaded files to a temporary directory and return file paths."""
24
+ try:
25
+ temp_dir = tempfile.mkdtemp()
26
+ file_paths = []
27
+
28
+ for uploaded_file in uploaded_files:
29
+ file_path = os.path.join(temp_dir, uploaded_file.name)
30
+ with open(file_path, "wb") as f:
31
+ f.write(uploaded_file.getbuffer())
32
+ file_paths.append(file_path)
33
+
34
+ return file_paths
35
+ except Exception as e:
36
+ st.error(f"Error saving uploaded files: {e}")
37
+ return []
38
+
39
+ def main():
40
+ st.set_page_config(
41
+ page_title="Finance Buddy",
42
+ page_icon="💰",
43
+ layout="wide"
44
+ )
45
+
46
+ initialize_session_state()
47
+
48
+ # Main header with emoji
49
+ st.markdown("<div class='main-header'>", unsafe_allow_html=True)
50
+ st.markdown(
51
+ "<h1 style='text-align: center;'>💰 Finance Buddy</h1>",
52
+ unsafe_allow_html=True
53
+ )
54
+ st.markdown("</div>", unsafe_allow_html=True)
55
+
56
+ # Sidebar
57
+ with st.sidebar:
58
+ st.image("PL_image-removebg-preview.png", use_column_width=True)
59
+ st.title("📄 Document Analysis")
60
+ uploaded_files = st.file_uploader(
61
+ "Upload P&L Documents (PDF)",
62
+ accept_multiple_files=True,
63
+ type=['pdf']
64
+ )
65
+
66
+ if uploaded_files and st.button("Process Documents", key="process_docs"):
67
+ with st.spinner("Processing documents..."):
68
+ try:
69
+ # Save uploaded files and process them
70
+ file_paths = save_uploaded_files(uploaded_files)
71
+ if file_paths:
72
+ st.session_state.vector_store = st.session_state.rag_processor.process_documents(file_paths)
73
+ st.success("✅ Documents processed successfully!")
74
+ except Exception as e:
75
+ st.error(f"Error processing documents: {e}")
76
+
77
+ # Main content
78
+ st.markdown("""
79
+ 💡 **Ask questions about your P&L statements and financial data.**
80
+ """)
81
+
82
+ # Query input
83
+ query = st.text_input("🔍 Ask your question:", key="query")
84
+
85
+ if query:
86
+ if not st.session_state.vector_store:
87
+ st.warning("Please upload and process documents first!")
88
+ else:
89
+ with st.spinner("Analyzing..."):
90
+ try:
91
+ response = st.session_state.rag_processor.generate_response(
92
+ query,
93
+ st.session_state.vector_store
94
+ )
95
+ st.markdown("### 📋 Response:")
96
+ st.markdown(f">{response}")
97
+ except Exception as e:
98
+ st.error(f"Error generating response: {e}")
99
+
100
+ # Footer
101
+ st.markdown("---")
102
+ st.markdown(
103
+ "<p style='text-align: center;'>💼 Built with Streamlit & Google Generative AI</p>",
104
+ unsafe_allow_html=True
105
+ )
106
+
107
+ if __name__ == "__main__":
108
+ main()
financial_data.db ADDED
Binary file (24.6 kB). View file
 
rag.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import google.generativeai as genai
3
+ from langchain.embeddings.base import Embeddings
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from PyPDF2 import PdfReader
7
+ import pandas as pd
8
+ import os
9
+
10
+ class CustomGoogleEmbeddings(Embeddings):
11
+ """Custom Embedding Class for Google Generative AI"""
12
+ def __init__(self, model='models/embedding-001'):
13
+ self.client = genai
14
+ self.model = model
15
+
16
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
17
+ embeddings = []
18
+ for text in texts:
19
+ text = text[:2048] if len(text) > 2048 else text
20
+ try:
21
+ embedding = self.client.embed_content(
22
+ model=self.model,
23
+ content=text,
24
+ task_type="retrieval_document"
25
+ )['embedding']
26
+ embeddings.append(embedding)
27
+ except Exception as e:
28
+ print(f"Embedding error: {e}")
29
+ embeddings.append([0.0] * 768)
30
+ return embeddings
31
+
32
+ def embed_query(self, text: str) -> List[float]:
33
+ text = text[:2048] if len(text) > 2048 else text
34
+ try:
35
+ return self.client.embed_content(
36
+ model=self.model,
37
+ content=text,
38
+ task_type="retrieval_query"
39
+ )['embedding']
40
+ except Exception as e:
41
+ print(f"Query embedding error: {e}")
42
+ return [0.0] * 768
43
+
44
+ class RAGProcessor:
45
+ def __init__(self):
46
+ self.embeddings = CustomGoogleEmbeddings()
47
+ self.text_splitter = RecursiveCharacterTextSplitter(
48
+ chunk_size=1000,
49
+ chunk_overlap=200,
50
+ separators=["\n\n", "\n", ".", ",", " ", ""]
51
+ )
52
+ genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
53
+ self.model = genai.GenerativeModel('gemini-pro')
54
+
55
+ def extract_text_from_pdf(self, pdf_file) -> str:
56
+ """Extract text from PDF with focus on structured content"""
57
+ try:
58
+ pdf_reader = PdfReader(pdf_file)
59
+ text = ""
60
+
61
+ for page in pdf_reader.pages:
62
+ text += page.extract_text() + "\n\n"
63
+
64
+ # Basic structure preservation
65
+ # Look for common P&L statement patterns
66
+ lines = text.split('\n')
67
+ structured_text = ""
68
+ for line in lines:
69
+ # Identify potential financial entries (e.g., "Revenue: $1000")
70
+ if any(keyword in line.lower() for keyword in ['revenue', 'profit', 'loss', 'expenses', 'income', 'cost', 'margin', 'ebitda', 'tax']):
71
+ structured_text += f"FINANCIAL_ENTRY: {line}\n"
72
+ else:
73
+ structured_text += line + "\n"
74
+
75
+ return structured_text
76
+
77
+ except Exception as e:
78
+ print(f"Error extracting text from PDF: {e}")
79
+ return ""
80
+
81
+ def process_documents(self, pdf_files: List[str]) -> FAISS:
82
+ """Process multiple PDF documents and create vector store"""
83
+ combined_text = ""
84
+ for pdf in pdf_files:
85
+ combined_text += self.extract_text_from_pdf(pdf)
86
+
87
+ # Create more focused chunks
88
+ text_chunks = self.text_splitter.split_text(combined_text)
89
+
90
+ # Create vector store
91
+ try:
92
+ vector_store = FAISS.from_texts(text_chunks, embedding=self.embeddings)
93
+ return vector_store
94
+ except Exception as e:
95
+ print(f"Error creating vector store: {e}")
96
+ raise
97
+
98
+ def generate_response(self, question: str, vector_store: FAISS) -> str:
99
+ """Generate response using RAG approach"""
100
+ # Retrieve relevant context
101
+ docs = vector_store.similarity_search(question, k=4)
102
+ context = "\n".join([doc.page_content for doc in docs])
103
+
104
+ prompt = f"""
105
+ You are a financial analyst assistant. Using the following financial data context,
106
+ answer the question accurately and professionally. Include specific numbers and
107
+ calculations when relevant.
108
+
109
+ Context: {context}
110
+
111
+ Question: {question}
112
+
113
+ If the context doesn't contain enough information to answer accurately,
114
+ please state that clearly. Focus on P&L related information and financial metrics.
115
+ When providing financial figures, please format them clearly with appropriate units
116
+ (e.g., "$1,234,567" or "1.2M" for millions).
117
+ """
118
+
119
+ try:
120
+ response = self.model.generate_content(prompt)
121
+ return response.text
122
+ except Exception as e:
123
+ return f"Error generating response: {e}"
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ python-dotenv
3
+ google-generativeai
4
+ langchain
5
+ langchain-community
6
+ faiss-cpu
7
+ PyPDF2
8
+ tabula-py
9
+ pandas
10
+ numpy
11
+ python-multipart
task_image-removebg-preview.png ADDED