|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import plotly.express as px |
|
import streamlit as st |
|
from transformers import pipeline |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv") |
|
|
|
if uploaded_file is not None: |
|
|
|
df = pd.read_csv(uploaded_file) |
|
|
|
|
|
st.write("Columns in the uploaded file:", df.columns) |
|
|
|
|
|
if 'Description' not in df.columns: |
|
st.error("Error: The CSV file does not contain a 'Description' column.") |
|
else: |
|
|
|
model_name = 'distilbert-base-uncased' |
|
classifier = pipeline('zero-shot-classification', model=model_name) |
|
|
|
|
|
categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"] |
|
|
|
|
|
def categorize_expense(description): |
|
result = classifier(description, candidate_labels=categories) |
|
return result['labels'][0] |
|
|
|
|
|
df['Category'] = df['Description'].apply(categorize_expense) |
|
|
|
|
|
st.write("Categorized Data:", df.head()) |
|
|
|
|
|
category_expenses = df.groupby('Category')['Amount'].sum() |
|
|
|
|
|
fig1, ax1 = plt.subplots(figsize=(8, 8)) |
|
category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors, ax=ax1) |
|
ax1.set_title('Expense Distribution by Category') |
|
ax1.set_ylabel('') |
|
st.pyplot(fig1) |
|
|
|
|
|
|
|
df['Date'] = pd.to_datetime(df['Date']).dt.date |
|
|
|
|
|
df['Month'] = df['Date'].apply(lambda x: x.strftime('%Y-%m')) |
|
|
|
|
|
monthly_expenses = df.groupby('Month')['Amount'].sum() |
|
|
|
|
|
fig2 = px.line( |
|
monthly_expenses, |
|
x=monthly_expenses.index, |
|
y=monthly_expenses.values, |
|
title="Monthly Expenses", |
|
labels={"x": "Month", "y": "Amount ($)"} |
|
) |
|
st.plotly_chart(fig2) |
|
|
|
|
|
default_budgets = { |
|
"Groceries": 300, |
|
"Rent": 1000, |
|
"Utilities": 150, |
|
"Entertainment": 100, |
|
"Dining": 150, |
|
"Transportation": 120, |
|
} |
|
|
|
|
|
st.write("Adjust your monthly budget for each category:") |
|
|
|
budgets = {} |
|
for category in default_budgets: |
|
budgets[category] = st.slider(f"Budget for {category} ($)", |
|
min_value=0, |
|
max_value=2000, |
|
value=default_budgets[category], |
|
step=50) |
|
|
|
|
|
df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1) |
|
|
|
|
|
exceeded_budget = df[df['Budget_Exceeded'] == True] |
|
st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']]) |
|
|
|
|
|
|
|
fig3, ax3 = plt.subplots(figsize=(10, 6)) |
|
monthly_expenses_df = pd.DataFrame({ |
|
'Actual': monthly_expenses, |
|
'Budget': [sum(budgets.values())] * len(monthly_expenses) |
|
}) |
|
monthly_expenses_df.plot(kind='bar', ax=ax3) |
|
ax3.set_title('Monthly Spending vs Budget') |
|
ax3.set_ylabel('Amount ($)') |
|
|
|
|
|
st.pyplot(fig3) |
|
|