File size: 4,663 Bytes
f10ec56 2c359f1 c7d0bb8 f10ec56 c7d0bb8 2c359f1 a6ee9ca c7d0bb8 a6ee9ca 6ca4f9e 11d5829 a6ee9ca 11d5829 a6ee9ca 11d5829 a6ee9ca 11d5829 a6ee9ca 11d5829 a6ee9ca 11d5829 631a831 77dc4ed 631a831 77dc4ed 631a831 161fe1c 631a831 161fe1c 631a831 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import streamlit as st
from transformers import pipeline
# Upload CSV file containing transaction data
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv")
if uploaded_file is not None:
# Load the file into a DataFrame
df = pd.read_csv(uploaded_file)
# Debug: Display the column names to check if 'Description' exists
st.write("Columns in the uploaded file:", df.columns)
# Check if the 'Description' column exists
if 'Description' not in df.columns:
st.error("Error: The CSV file does not contain a 'Description' column.")
else:
# Initialize Hugging Face's zero-shot text classification model
model_name = 'distilbert-base-uncased'
classifier = pipeline('zero-shot-classification', model=model_name)
# List of possible expense categories
categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]
# Function to classify transaction descriptions into categories
def categorize_expense(description):
result = classifier(description, candidate_labels=categories)
return result['labels'][0] # Choose the most probable category
# Apply the categorization function to the 'Description' column in the dataset
df['Category'] = df['Description'].apply(categorize_expense)
# Show the categorized data
st.write("Categorized Data:", df.head())
# Visualization 1: Pie Chart of Spending by Category
category_expenses = df.groupby('Category')['Amount'].sum()
# Plot pie chart for expense distribution by category
fig1, ax1 = plt.subplots(figsize=(8, 8))
category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors, ax=ax1)
ax1.set_title('Expense Distribution by Category')
ax1.set_ylabel('') # Hide the y-axis label
st.pyplot(fig1)
# Visualization 2: Monthly Spending Trends (Line Chart)
# Convert 'Date' to datetime and remove time part
df['Date'] = pd.to_datetime(df['Date']).dt.date # Keep only the date, no time
# Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues
df['Month'] = df['Date'].apply(lambda x: x.strftime('%Y-%m')) # Extract Year-Month as string
# Group by month and calculate the total amount spent per month
monthly_expenses = df.groupby('Month')['Amount'].sum()
# Plot monthly spending trends as a line chart
fig2 = px.line(
monthly_expenses,
x=monthly_expenses.index,
y=monthly_expenses.values,
title="Monthly Expenses",
labels={"x": "Month", "y": "Amount ($)"}
)
st.plotly_chart(fig2)
# Default Budget Values
default_budgets = {
"Groceries": 300,
"Rent": 1000,
"Utilities": 150,
"Entertainment": 100,
"Dining": 150,
"Transportation": 120,
}
# Sliders for adjusting the monthly budget
st.write("Adjust your monthly budget for each category:")
budgets = {}
for category in default_budgets:
budgets[category] = st.slider(f"Budget for {category} ($)",
min_value=0,
max_value=2000,
value=default_budgets[category],
step=50)
# Track if any category exceeds its budget
df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1)
# Show which categories exceeded their budgets
exceeded_budget = df[df['Budget_Exceeded'] == True]
st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']])
# Visualization 3: Monthly Spending vs Budget (Bar Chart)
# Create a figure explicitly for the bar chart
fig3, ax3 = plt.subplots(figsize=(10, 6)) # Create figure and axes
monthly_expenses_df = pd.DataFrame({
'Actual': monthly_expenses,
'Budget': [sum(budgets.values())] * len(monthly_expenses) # Same budget for simplicity
})
monthly_expenses_df.plot(kind='bar', ax=ax3) # Pass the axes to the plot
ax3.set_title('Monthly Spending vs Budget')
ax3.set_ylabel('Amount ($)')
# Display the plot with Streamlit
st.pyplot(fig3)
|