File size: 4,663 Bytes
f10ec56
 
2c359f1
c7d0bb8
 
f10ec56
c7d0bb8
 
2c359f1
a6ee9ca
c7d0bb8
a6ee9ca
6ca4f9e
11d5829
 
a6ee9ca
11d5829
 
 
 
 
 
 
a6ee9ca
11d5829
 
a6ee9ca
11d5829
 
 
 
a6ee9ca
11d5829
 
a6ee9ca
11d5829
 
 
631a831
 
 
 
 
 
 
 
 
 
 
77dc4ed
 
631a831
 
77dc4ed
631a831
 
 
 
 
 
 
 
 
 
 
 
 
 
161fe1c
 
631a831
 
 
 
 
 
 
 
161fe1c
 
 
 
 
 
 
 
 
 
 
631a831
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import streamlit as st
from transformers import pipeline

# Upload CSV file containing transaction data
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv")

if uploaded_file is not None:
    # Load the file into a DataFrame
    df = pd.read_csv(uploaded_file)

    # Debug: Display the column names to check if 'Description' exists
    st.write("Columns in the uploaded file:", df.columns)

    # Check if the 'Description' column exists
    if 'Description' not in df.columns:
        st.error("Error: The CSV file does not contain a 'Description' column.")
    else:
        # Initialize Hugging Face's zero-shot text classification model
        model_name = 'distilbert-base-uncased'
        classifier = pipeline('zero-shot-classification', model=model_name)

        # List of possible expense categories
        categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]

        # Function to classify transaction descriptions into categories
        def categorize_expense(description):
            result = classifier(description, candidate_labels=categories)
            return result['labels'][0]  # Choose the most probable category

        # Apply the categorization function to the 'Description' column in the dataset
        df['Category'] = df['Description'].apply(categorize_expense)

        # Show the categorized data
        st.write("Categorized Data:", df.head())

        # Visualization 1: Pie Chart of Spending by Category
        category_expenses = df.groupby('Category')['Amount'].sum()

        # Plot pie chart for expense distribution by category
        fig1, ax1 = plt.subplots(figsize=(8, 8))
        category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors, ax=ax1)
        ax1.set_title('Expense Distribution by Category')
        ax1.set_ylabel('')  # Hide the y-axis label
        st.pyplot(fig1)

        # Visualization 2: Monthly Spending Trends (Line Chart)
        # Convert 'Date' to datetime and remove time part
        df['Date'] = pd.to_datetime(df['Date']).dt.date  # Keep only the date, no time

        # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues
        df['Month'] = df['Date'].apply(lambda x: x.strftime('%Y-%m'))  # Extract Year-Month as string

        # Group by month and calculate the total amount spent per month
        monthly_expenses = df.groupby('Month')['Amount'].sum()

        # Plot monthly spending trends as a line chart
        fig2 = px.line(
            monthly_expenses, 
            x=monthly_expenses.index, 
            y=monthly_expenses.values, 
            title="Monthly Expenses", 
            labels={"x": "Month", "y": "Amount ($)"}
        )
        st.plotly_chart(fig2)

        # Default Budget Values
        default_budgets = {
            "Groceries": 300,
            "Rent": 1000,
            "Utilities": 150,
            "Entertainment": 100,
            "Dining": 150,
            "Transportation": 120,
        }

        # Sliders for adjusting the monthly budget
        st.write("Adjust your monthly budget for each category:")

        budgets = {}
        for category in default_budgets:
            budgets[category] = st.slider(f"Budget for {category} ($)", 
                                          min_value=0, 
                                          max_value=2000, 
                                          value=default_budgets[category], 
                                          step=50)

        # Track if any category exceeds its budget
        df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1)

        # Show which categories exceeded their budgets
        exceeded_budget = df[df['Budget_Exceeded'] == True]
        st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']])

        # Visualization 3: Monthly Spending vs Budget (Bar Chart)
        # Create a figure explicitly for the bar chart
        fig3, ax3 = plt.subplots(figsize=(10, 6))  # Create figure and axes
        monthly_expenses_df = pd.DataFrame({
            'Actual': monthly_expenses,
            'Budget': [sum(budgets.values())] * len(monthly_expenses)  # Same budget for simplicity
        })
        monthly_expenses_df.plot(kind='bar', ax=ax3)  # Pass the axes to the plot
        ax3.set_title('Monthly Spending vs Budget')
        ax3.set_ylabel('Amount ($)')

        # Display the plot with Streamlit
        st.pyplot(fig3)