{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "16fd83c7-9f91-40ab-ac15-57b02a63b7f4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"os.environ['HF_HOME'] = \"/scratch/tar3kh/models/cache\"\n",
"import torch \n",
"\n",
"from datasets import load_dataset #datasets is huggingface's dataset package\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import PIL"
]
},
{
"cell_type": "markdown",
"id": "f748fb12-da99-4702-bfb2-263e091fee14",
"metadata": {},
"source": [
"## Synthetic Dataset (generating budgets)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "61502633-b04c-44fd-b39f-7803ef778205",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"size = 3000\n",
"\n",
"np.random.seed(60)\n",
"Income_Randomizer = np.random.randint(29000,251000, size=size).astype(int)\n",
"#print(Income_Randomizer)\n",
"np.random.seed(60)\n",
"\n",
"Rent_Randomizer = np.random.randint(500,2500, size=size).astype(int)\n",
"#print(Rent_Randomizer)\n",
"np.random.seed(60)\n",
"\n",
"Car_Randomizer = np.random.randint(200,1000, size=size).astype(int)\n",
"#print(Car_Randomizer)\n",
"np.random.seed(60)\n",
"\n",
"Other_Randomizer = np.random.randint(200,600, size=size).astype(int)\n",
"#print(Other_Randomizer)\n",
"\n",
"Example_promtps = []\n",
"\n",
"for x in range(len(Income_Randomizer)):\n",
" Example_promtps.append('I have an income of about ' +\n",
" str(Income_Randomizer[x]) +\n",
" ' a year and my monthly expenses include ' +\n",
" str(Rent_Randomizer[x]) +\n",
" ' a month in rent and utilities, a ' +\n",
" str(Car_Randomizer[x]) +\n",
" ' car payment, $300 in food, and about ' +\n",
" str(Other_Randomizer[x]) +\n",
" ' a month in other expenses. Using python, can you create for me a budget spreadsheet and export it to excel?')\n",
"\n",
"#Example_promtps = ['I have an income of about ' + str(Income_Randomizer[0]) + ' a year and my monthly expenses include ' + str(Rent_Randomizer[0]) + ' a month in rent and utilities, a ' + str(Car_Randomizer[0]) + ' car payment, $300 in food, and about ' + str(Other_Randomizer[0]) + ' a month in other expenses. Using python, can you create for me a budget spreadsheet and export it to excel?',]\n",
"Example_outputs = []\n",
"\n",
"for x in range(len(Income_Randomizer)):\n",
" Example_outputs.append(''' import pandas as pd\n",
"import openpyxl\n",
"\n",
"# Define income and expenses\n",
"annual_income = '''+ str(Income_Randomizer[x])+'''\n",
"monthly_income = annual_income / 12\n",
"\n",
"expenses = {\n",
" \"Rent & Utilities\": '''+ str(Rent_Randomizer[x] )+''',\n",
" \"Car Payment\": '''+ str(Car_Randomizer[x]) +''',\n",
" \"Food\": 300,\n",
" \"Other Expenses\": '''+ str(Other_Randomizer[x]) +'''\n",
"}\n",
"\n",
"total_expenses = sum(expenses.values())\n",
"net_savings = monthly_income - total_expenses\n",
"\n",
"# Create DataFrame\n",
"budget_data = {\n",
" \"Category\": [\"Monthly Income\"] + list(expenses.keys()) + [\"Total Expenses\", \"Net Savings\"],\n",
" \"Amount ($)\": [monthly_income] + list(expenses.values()) + [total_expenses, net_savings]\n",
"}\n",
"\n",
"df = pd.DataFrame(budget_data)\n",
"\n",
"# Save to Excel\n",
"file_name = \"budget.xlsx\"\n",
"df.to_excel(file_name, index=False, engine='openpyxl')\n",
"\n",
"print(f\"Budget spreadsheet saved as {file_name}\")''')\n",
"\n",
"df2 = pd.DataFrame({'question':Example_promtps,\n",
" 'response': Example_outputs})\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "79e98786-47d7-4a83-943a-7d5484bd4c2c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df2['instruct'] = \"Q: \" + df2['question'] + \"\\n\\nA: \" + \"Lets think step by step.\" + df2['response']\n",
"df2['question_1'] = \"Q: \" + df2['question'] + \"\\n\\nA: \" + \"Lets think step by step.\" "
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "1199470a-790e-4c60-8bb5-15d7b64aa43a",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" question | \n",
" response | \n",
" instruct | \n",
" question_1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" I have an income of about 162325 a year and m... | \n",
" import pandas as pd\\nimport openpyxl\\n\\n# Def... | \n",
" Q: I have an income of about 162325 a year an... | \n",
" Q: I have an income of about 162325 a year an... | \n",
"
\n",
" \n",
" 1 | \n",
" I have an income of about 35543 a year and my... | \n",
" import pandas as pd\\nimport openpyxl\\n\\n# Def... | \n",
" Q: I have an income of about 35543 a year and... | \n",
" Q: I have an income of about 35543 a year and... | \n",
"
\n",
" \n",
" 2 | \n",
" I have an income of about 203179 a year and m... | \n",
" import pandas as pd\\nimport openpyxl\\n\\n# Def... | \n",
" Q: I have an income of about 203179 a year an... | \n",
" Q: I have an income of about 203179 a year an... | \n",
"
\n",
" \n",
" 3 | \n",
" I have an income of about 197008 a year and m... | \n",
" import pandas as pd\\nimport openpyxl\\n\\n# Def... | \n",
" Q: I have an income of about 197008 a year an... | \n",
" Q: I have an income of about 197008 a year an... | \n",
"
\n",
" \n",
" 4 | \n",
" I have an income of about 223681 a year and m... | \n",
" import pandas as pd\\nimport openpyxl\\n\\n# Def... | \n",
" Q: I have an income of about 223681 a year an... | \n",
" Q: I have an income of about 223681 a year an... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" question \\\n",
"0 I have an income of about 162325 a year and m... \n",
"1 I have an income of about 35543 a year and my... \n",
"2 I have an income of about 203179 a year and m... \n",
"3 I have an income of about 197008 a year and m... \n",
"4 I have an income of about 223681 a year and m... \n",
"\n",
" response \\\n",
"0 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n",
"1 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n",
"2 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n",
"3 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n",
"4 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n",
"\n",
" instruct \\\n",
"0 Q: I have an income of about 162325 a year an... \n",
"1 Q: I have an income of about 35543 a year and... \n",
"2 Q: I have an income of about 203179 a year an... \n",
"3 Q: I have an income of about 197008 a year an... \n",
"4 Q: I have an income of about 223681 a year an... \n",
"\n",
" question_1 \n",
"0 Q: I have an income of about 162325 a year an... \n",
"1 Q: I have an income of about 35543 a year and... \n",
"2 Q: I have an income of about 203179 a year an... \n",
"3 Q: I have an income of about 197008 a year an... \n",
"4 Q: I have an income of about 223681 a year an... "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "1b66310d-41e4-4592-8516-b35b635baead",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df2.to_csv('budget_dataset.csv', index=False)"
]
},
{
"cell_type": "markdown",
"id": "2fac1974-19a0-4853-bbe5-9867b57819ce",
"metadata": {},
"source": [
"## Synthetic Dataset (Financial Goals)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e4d8b755-25ec-472f-a8e4-52d153ff2f46",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# datset set up\n",
"\n",
"size = 3000\n",
"\n",
"\n",
"np.random.seed(60)\n",
"short_term_goals = np.random.randint(1000,5000, size=size).astype(int)\n",
"\n",
"np.random.seed(60)\n",
"medium_term_goals = np.random.randint(5000,10000, size=size).astype(int)\n",
"\n",
"np.random.seed(60)\n",
"long_term_goals = np.random.randint(75000,200000, size=size).astype(int)\n",
"\n",
"# print(short_term_goals)\n",
"# print(medium_term_goals)\n",
"# print(long_term_goals)\n",
"\n",
"prompts = []\n",
"\n",
"for x in range(len(short_term_goals)):\n",
" prompts.append('My short term goal is to save for a $' +\n",
" str(short_term_goals[x]) +\n",
" ' vacation in the next year, my medium term goal is to save for down payment for a new car, around ' +\n",
" str(medium_term_goals[x]) +\n",
" ' in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around ' +\n",
" str(long_term_goals[x]) +\n",
" ' in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?')\n",
"\n",
"outputs = []\n",
"for x in range(len(short_term_goals)):\n",
" outputs.append(''' 1. Short-Term Goal: $'''+ str(short_term_goals[x]) +''' Vacation (1 Year)\n",
"Timeline: 12 months\n",
"Monthly Savings Needed: '''+ str(short_term_goals[x]) + ''' / 12 = '''+ str((short_term_goals[x]/12).round()) +'''\n",
"\n",
"Best Storage Option: High-yield savings account (HYSA)\n",
"Easy access\n",
"Earns some interest\n",
"Safe from market fluctuations,\n",
"\n",
"2. Medium-Term Goal: $'''+ str(medium_term_goals[x]) +''' Car Down Payment (2–3 Years)\n",
"Timeline Options:\n",
"2 years (24 months) → $''' + str((medium_term_goals[x]/24).round()) + '''/month\n",
"3 years (36 months) → $''' + str((medium_term_goals[x]/36).round()) + '''/month\n",
"Best Storage Option: HYSA or conservative investment\n",
"If comfortable with some risk, a mix of HYSA + conservative investments (e.g., CDs, bond ETFs)\n",
"If risk-averse, keep it in an HYSA,\n",
"\n",
"3. Long-Term Goal: $'''+ str(long_term_goals[x]) +''' House Down Payment (10 Years)\n",
"Timeline: 120 months\n",
"Monthly Savings Needed: '''+ str(long_term_goals[x]) + ''' / 120 = '''+ str((long_term_goals[x]/120).round()) +''' \n",
"\n",
"Best Storage Option: Investment account\n",
"Given the long time horizon, investing in a mix of index funds (S&P 500, total stock market) + bonds could provide higher returns.\n",
"Consider Roth IRA (if eligible) or brokerage account to allow tax-efficient growth.\n",
"\n",
"Summary of Total Savings Targets:\n",
"Total Monthly Savings goal = $''' +str(((short_term_goals[x]/12)+(medium_term_goals[x]/36)+(long_term_goals[x]/120)).round()) +''' - $''' +str(((short_term_goals[x]/12)+(medium_term_goals[x]/24)+(long_term_goals[x]/120)).round()) +'''/month'''\n",
" )\n",
" \n",
"df3 = pd.DataFrame({'question':prompts,\n",
" 'response':outputs})"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f7f484a6-5cc5-4a77-a474-1fc921095dc2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df3['instruct'] = \"Q: \" + df3['question'] + \"\\n\\nA: \" + \"Lets think step by step.\" + df3['response']\n",
"df3['question_1'] = \"Q: \" + df3['question'] + \"\\n\\nA: \" + \"Lets think step by step.\" "
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "63d3c1cd-943b-41b3-ab4f-85f11510eeca",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" question | \n",
" response | \n",
" instruct | \n",
" question_1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" My short term goal is to save for a $3253 vaca... | \n",
" 1. Short-Term Goal: $3253 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $3253 v... | \n",
" Q: My short term goal is to save for a $3253 v... | \n",
"
\n",
" \n",
" 1 | \n",
" My short term goal is to save for a $4137 vaca... | \n",
" 1. Short-Term Goal: $4137 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $4137 v... | \n",
" Q: My short term goal is to save for a $4137 v... | \n",
"
\n",
" \n",
" 2 | \n",
" My short term goal is to save for a $4654 vaca... | \n",
" 1. Short-Term Goal: $4654 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $4654 v... | \n",
" Q: My short term goal is to save for a $4654 v... | \n",
"
\n",
" \n",
" 3 | \n",
" My short term goal is to save for a $2418 vaca... | \n",
" 1. Short-Term Goal: $2418 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $2418 v... | \n",
" Q: My short term goal is to save for a $2418 v... | \n",
"
\n",
" \n",
" 4 | \n",
" My short term goal is to save for a $3447 vaca... | \n",
" 1. Short-Term Goal: $3447 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $3447 v... | \n",
" Q: My short term goal is to save for a $3447 v... | \n",
"
\n",
" \n",
" 5 | \n",
" My short term goal is to save for a $3147 vaca... | \n",
" 1. Short-Term Goal: $3147 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $3147 v... | \n",
" Q: My short term goal is to save for a $3147 v... | \n",
"
\n",
" \n",
" 6 | \n",
" My short term goal is to save for a $1072 vaca... | \n",
" 1. Short-Term Goal: $1072 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $1072 v... | \n",
" Q: My short term goal is to save for a $1072 v... | \n",
"
\n",
" \n",
" 7 | \n",
" My short term goal is to save for a $3169 vaca... | \n",
" 1. Short-Term Goal: $3169 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $3169 v... | \n",
" Q: My short term goal is to save for a $3169 v... | \n",
"
\n",
" \n",
" 8 | \n",
" My short term goal is to save for a $4985 vaca... | \n",
" 1. Short-Term Goal: $4985 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $4985 v... | \n",
" Q: My short term goal is to save for a $4985 v... | \n",
"
\n",
" \n",
" 9 | \n",
" My short term goal is to save for a $3722 vaca... | \n",
" 1. Short-Term Goal: $3722 Vacation (1 Year)\\n... | \n",
" Q: My short term goal is to save for a $3722 v... | \n",
" Q: My short term goal is to save for a $3722 v... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" question \\\n",
"0 My short term goal is to save for a $3253 vaca... \n",
"1 My short term goal is to save for a $4137 vaca... \n",
"2 My short term goal is to save for a $4654 vaca... \n",
"3 My short term goal is to save for a $2418 vaca... \n",
"4 My short term goal is to save for a $3447 vaca... \n",
"5 My short term goal is to save for a $3147 vaca... \n",
"6 My short term goal is to save for a $1072 vaca... \n",
"7 My short term goal is to save for a $3169 vaca... \n",
"8 My short term goal is to save for a $4985 vaca... \n",
"9 My short term goal is to save for a $3722 vaca... \n",
"\n",
" response \\\n",
"0 1. Short-Term Goal: $3253 Vacation (1 Year)\\n... \n",
"1 1. Short-Term Goal: $4137 Vacation (1 Year)\\n... \n",
"2 1. Short-Term Goal: $4654 Vacation (1 Year)\\n... \n",
"3 1. Short-Term Goal: $2418 Vacation (1 Year)\\n... \n",
"4 1. Short-Term Goal: $3447 Vacation (1 Year)\\n... \n",
"5 1. Short-Term Goal: $3147 Vacation (1 Year)\\n... \n",
"6 1. Short-Term Goal: $1072 Vacation (1 Year)\\n... \n",
"7 1. Short-Term Goal: $3169 Vacation (1 Year)\\n... \n",
"8 1. Short-Term Goal: $4985 Vacation (1 Year)\\n... \n",
"9 1. Short-Term Goal: $3722 Vacation (1 Year)\\n... \n",
"\n",
" instruct \\\n",
"0 Q: My short term goal is to save for a $3253 v... \n",
"1 Q: My short term goal is to save for a $4137 v... \n",
"2 Q: My short term goal is to save for a $4654 v... \n",
"3 Q: My short term goal is to save for a $2418 v... \n",
"4 Q: My short term goal is to save for a $3447 v... \n",
"5 Q: My short term goal is to save for a $3147 v... \n",
"6 Q: My short term goal is to save for a $1072 v... \n",
"7 Q: My short term goal is to save for a $3169 v... \n",
"8 Q: My short term goal is to save for a $4985 v... \n",
"9 Q: My short term goal is to save for a $3722 v... \n",
"\n",
" question_1 \n",
"0 Q: My short term goal is to save for a $3253 v... \n",
"1 Q: My short term goal is to save for a $4137 v... \n",
"2 Q: My short term goal is to save for a $4654 v... \n",
"3 Q: My short term goal is to save for a $2418 v... \n",
"4 Q: My short term goal is to save for a $3447 v... \n",
"5 Q: My short term goal is to save for a $3147 v... \n",
"6 Q: My short term goal is to save for a $1072 v... \n",
"7 Q: My short term goal is to save for a $3169 v... \n",
"8 Q: My short term goal is to save for a $4985 v... \n",
"9 Q: My short term goal is to save for a $3722 v... "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "2a4deb48-724f-46a4-8e55-4e41f648af6e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df3.to_csv('goals_dataset.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "313cd80a-c7be-489c-9c70-30885c7e614a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "20de1e90-1c13-486a-b5db-09c957622a69",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llm_course_2",
"language": "python",
"name": "llm_course_2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}