{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "16fd83c7-9f91-40ab-ac15-57b02a63b7f4", "metadata": { "tags": [] }, "outputs": [], "source": [ "import os\n", "os.environ['HF_HOME'] = \"/scratch/tar3kh/models/cache\"\n", "import torch \n", "\n", "from datasets import load_dataset #datasets is huggingface's dataset package\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import PIL" ] }, { "cell_type": "markdown", "id": "f748fb12-da99-4702-bfb2-263e091fee14", "metadata": {}, "source": [ "## Synthetic Dataset (generating budgets)" ] }, { "cell_type": "code", "execution_count": 14, "id": "61502633-b04c-44fd-b39f-7803ef778205", "metadata": { "tags": [] }, "outputs": [], "source": [ "size = 3000\n", "\n", "np.random.seed(60)\n", "Income_Randomizer = np.random.randint(29000,251000, size=size).astype(int)\n", "#print(Income_Randomizer)\n", "np.random.seed(60)\n", "\n", "Rent_Randomizer = np.random.randint(500,2500, size=size).astype(int)\n", "#print(Rent_Randomizer)\n", "np.random.seed(60)\n", "\n", "Car_Randomizer = np.random.randint(200,1000, size=size).astype(int)\n", "#print(Car_Randomizer)\n", "np.random.seed(60)\n", "\n", "Other_Randomizer = np.random.randint(200,600, size=size).astype(int)\n", "#print(Other_Randomizer)\n", "\n", "Example_promtps = []\n", "\n", "for x in range(len(Income_Randomizer)):\n", " Example_promtps.append('I have an income of about ' +\n", " str(Income_Randomizer[x]) +\n", " ' a year and my monthly expenses include ' +\n", " str(Rent_Randomizer[x]) +\n", " ' a month in rent and utilities, a ' +\n", " str(Car_Randomizer[x]) +\n", " ' car payment, $300 in food, and about ' +\n", " str(Other_Randomizer[x]) +\n", " ' a month in other expenses. Using python, can you create for me a budget spreadsheet and export it to excel?')\n", "\n", "#Example_promtps = ['I have an income of about ' + str(Income_Randomizer[0]) + ' a year and my monthly expenses include ' + str(Rent_Randomizer[0]) + ' a month in rent and utilities, a ' + str(Car_Randomizer[0]) + ' car payment, $300 in food, and about ' + str(Other_Randomizer[0]) + ' a month in other expenses. Using python, can you create for me a budget spreadsheet and export it to excel?',]\n", "Example_outputs = []\n", "\n", "for x in range(len(Income_Randomizer)):\n", " Example_outputs.append(''' import pandas as pd\n", "import openpyxl\n", "\n", "# Define income and expenses\n", "annual_income = '''+ str(Income_Randomizer[x])+'''\n", "monthly_income = annual_income / 12\n", "\n", "expenses = {\n", " \"Rent & Utilities\": '''+ str(Rent_Randomizer[x] )+''',\n", " \"Car Payment\": '''+ str(Car_Randomizer[x]) +''',\n", " \"Food\": 300,\n", " \"Other Expenses\": '''+ str(Other_Randomizer[x]) +'''\n", "}\n", "\n", "total_expenses = sum(expenses.values())\n", "net_savings = monthly_income - total_expenses\n", "\n", "# Create DataFrame\n", "budget_data = {\n", " \"Category\": [\"Monthly Income\"] + list(expenses.keys()) + [\"Total Expenses\", \"Net Savings\"],\n", " \"Amount ($)\": [monthly_income] + list(expenses.values()) + [total_expenses, net_savings]\n", "}\n", "\n", "df = pd.DataFrame(budget_data)\n", "\n", "# Save to Excel\n", "file_name = \"budget.xlsx\"\n", "df.to_excel(file_name, index=False, engine='openpyxl')\n", "\n", "print(f\"Budget spreadsheet saved as {file_name}\")''')\n", "\n", "df2 = pd.DataFrame({'question':Example_promtps,\n", " 'response': Example_outputs})\n", "\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "79e98786-47d7-4a83-943a-7d5484bd4c2c", "metadata": { "tags": [] }, "outputs": [], "source": [ "df2['instruct'] = \"Q: \" + df2['question'] + \"\\n\\nA: \" + \"Lets think step by step.\" + df2['response']\n", "df2['question_1'] = \"Q: \" + df2['question'] + \"\\n\\nA: \" + \"Lets think step by step.\" " ] }, { "cell_type": "code", "execution_count": 16, "id": "1199470a-790e-4c60-8bb5-15d7b64aa43a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionresponseinstructquestion_1
0I have an income of about 162325 a year and m...import pandas as pd\\nimport openpyxl\\n\\n# Def...Q: I have an income of about 162325 a year an...Q: I have an income of about 162325 a year an...
1I have an income of about 35543 a year and my...import pandas as pd\\nimport openpyxl\\n\\n# Def...Q: I have an income of about 35543 a year and...Q: I have an income of about 35543 a year and...
2I have an income of about 203179 a year and m...import pandas as pd\\nimport openpyxl\\n\\n# Def...Q: I have an income of about 203179 a year an...Q: I have an income of about 203179 a year an...
3I have an income of about 197008 a year and m...import pandas as pd\\nimport openpyxl\\n\\n# Def...Q: I have an income of about 197008 a year an...Q: I have an income of about 197008 a year an...
4I have an income of about 223681 a year and m...import pandas as pd\\nimport openpyxl\\n\\n# Def...Q: I have an income of about 223681 a year an...Q: I have an income of about 223681 a year an...
\n", "
" ], "text/plain": [ " question \\\n", "0 I have an income of about 162325 a year and m... \n", "1 I have an income of about 35543 a year and my... \n", "2 I have an income of about 203179 a year and m... \n", "3 I have an income of about 197008 a year and m... \n", "4 I have an income of about 223681 a year and m... \n", "\n", " response \\\n", "0 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n", "1 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n", "2 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n", "3 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n", "4 import pandas as pd\\nimport openpyxl\\n\\n# Def... \n", "\n", " instruct \\\n", "0 Q: I have an income of about 162325 a year an... \n", "1 Q: I have an income of about 35543 a year and... \n", "2 Q: I have an income of about 203179 a year an... \n", "3 Q: I have an income of about 197008 a year an... \n", "4 Q: I have an income of about 223681 a year an... \n", "\n", " question_1 \n", "0 Q: I have an income of about 162325 a year an... \n", "1 Q: I have an income of about 35543 a year and... \n", "2 Q: I have an income of about 203179 a year an... \n", "3 Q: I have an income of about 197008 a year an... \n", "4 Q: I have an income of about 223681 a year an... " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.head()" ] }, { "cell_type": "code", "execution_count": 17, "id": "1b66310d-41e4-4592-8516-b35b635baead", "metadata": { "tags": [] }, "outputs": [], "source": [ "df2.to_csv('budget_dataset.csv', index=False)" ] }, { "cell_type": "markdown", "id": "2fac1974-19a0-4853-bbe5-9867b57819ce", "metadata": {}, "source": [ "## Synthetic Dataset (Financial Goals)" ] }, { "cell_type": "code", "execution_count": 18, "id": "e4d8b755-25ec-472f-a8e4-52d153ff2f46", "metadata": { "tags": [] }, "outputs": [], "source": [ "# datset set up\n", "\n", "size = 3000\n", "\n", "\n", "np.random.seed(60)\n", "short_term_goals = np.random.randint(1000,5000, size=size).astype(int)\n", "\n", "np.random.seed(60)\n", "medium_term_goals = np.random.randint(5000,10000, size=size).astype(int)\n", "\n", "np.random.seed(60)\n", "long_term_goals = np.random.randint(75000,200000, size=size).astype(int)\n", "\n", "# print(short_term_goals)\n", "# print(medium_term_goals)\n", "# print(long_term_goals)\n", "\n", "prompts = []\n", "\n", "for x in range(len(short_term_goals)):\n", " prompts.append('My short term goal is to save for a $' +\n", " str(short_term_goals[x]) +\n", " ' vacation in the next year, my medium term goal is to save for down payment for a new car, around ' +\n", " str(medium_term_goals[x]) +\n", " ' in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around ' +\n", " str(long_term_goals[x]) +\n", " ' in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?')\n", "\n", "outputs = []\n", "for x in range(len(short_term_goals)):\n", " outputs.append(''' 1. Short-Term Goal: $'''+ str(short_term_goals[x]) +''' Vacation (1 Year)\n", "Timeline: 12 months\n", "Monthly Savings Needed: '''+ str(short_term_goals[x]) + ''' / 12 = '''+ str((short_term_goals[x]/12).round()) +'''\n", "\n", "Best Storage Option: High-yield savings account (HYSA)\n", "Easy access\n", "Earns some interest\n", "Safe from market fluctuations,\n", "\n", "2. Medium-Term Goal: $'''+ str(medium_term_goals[x]) +''' Car Down Payment (2–3 Years)\n", "Timeline Options:\n", "2 years (24 months) → $''' + str((medium_term_goals[x]/24).round()) + '''/month\n", "3 years (36 months) → $''' + str((medium_term_goals[x]/36).round()) + '''/month\n", "Best Storage Option: HYSA or conservative investment\n", "If comfortable with some risk, a mix of HYSA + conservative investments (e.g., CDs, bond ETFs)\n", "If risk-averse, keep it in an HYSA,\n", "\n", "3. Long-Term Goal: $'''+ str(long_term_goals[x]) +''' House Down Payment (10 Years)\n", "Timeline: 120 months\n", "Monthly Savings Needed: '''+ str(long_term_goals[x]) + ''' / 120 = '''+ str((long_term_goals[x]/120).round()) +''' \n", "\n", "Best Storage Option: Investment account\n", "Given the long time horizon, investing in a mix of index funds (S&P 500, total stock market) + bonds could provide higher returns.\n", "Consider Roth IRA (if eligible) or brokerage account to allow tax-efficient growth.\n", "\n", "Summary of Total Savings Targets:\n", "Total Monthly Savings goal = $''' +str(((short_term_goals[x]/12)+(medium_term_goals[x]/36)+(long_term_goals[x]/120)).round()) +''' - $''' +str(((short_term_goals[x]/12)+(medium_term_goals[x]/24)+(long_term_goals[x]/120)).round()) +'''/month'''\n", " )\n", " \n", "df3 = pd.DataFrame({'question':prompts,\n", " 'response':outputs})" ] }, { "cell_type": "code", "execution_count": 19, "id": "f7f484a6-5cc5-4a77-a474-1fc921095dc2", "metadata": { "tags": [] }, "outputs": [], "source": [ "df3['instruct'] = \"Q: \" + df3['question'] + \"\\n\\nA: \" + \"Lets think step by step.\" + df3['response']\n", "df3['question_1'] = \"Q: \" + df3['question'] + \"\\n\\nA: \" + \"Lets think step by step.\" " ] }, { "cell_type": "code", "execution_count": 20, "id": "63d3c1cd-943b-41b3-ab4f-85f11510eeca", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionresponseinstructquestion_1
0My short term goal is to save for a $3253 vaca...1. Short-Term Goal: $3253 Vacation (1 Year)\\n...Q: My short term goal is to save for a $3253 v...Q: My short term goal is to save for a $3253 v...
1My short term goal is to save for a $4137 vaca...1. Short-Term Goal: $4137 Vacation (1 Year)\\n...Q: My short term goal is to save for a $4137 v...Q: My short term goal is to save for a $4137 v...
2My short term goal is to save for a $4654 vaca...1. Short-Term Goal: $4654 Vacation (1 Year)\\n...Q: My short term goal is to save for a $4654 v...Q: My short term goal is to save for a $4654 v...
3My short term goal is to save for a $2418 vaca...1. Short-Term Goal: $2418 Vacation (1 Year)\\n...Q: My short term goal is to save for a $2418 v...Q: My short term goal is to save for a $2418 v...
4My short term goal is to save for a $3447 vaca...1. Short-Term Goal: $3447 Vacation (1 Year)\\n...Q: My short term goal is to save for a $3447 v...Q: My short term goal is to save for a $3447 v...
5My short term goal is to save for a $3147 vaca...1. Short-Term Goal: $3147 Vacation (1 Year)\\n...Q: My short term goal is to save for a $3147 v...Q: My short term goal is to save for a $3147 v...
6My short term goal is to save for a $1072 vaca...1. Short-Term Goal: $1072 Vacation (1 Year)\\n...Q: My short term goal is to save for a $1072 v...Q: My short term goal is to save for a $1072 v...
7My short term goal is to save for a $3169 vaca...1. Short-Term Goal: $3169 Vacation (1 Year)\\n...Q: My short term goal is to save for a $3169 v...Q: My short term goal is to save for a $3169 v...
8My short term goal is to save for a $4985 vaca...1. Short-Term Goal: $4985 Vacation (1 Year)\\n...Q: My short term goal is to save for a $4985 v...Q: My short term goal is to save for a $4985 v...
9My short term goal is to save for a $3722 vaca...1. Short-Term Goal: $3722 Vacation (1 Year)\\n...Q: My short term goal is to save for a $3722 v...Q: My short term goal is to save for a $3722 v...
\n", "
" ], "text/plain": [ " question \\\n", "0 My short term goal is to save for a $3253 vaca... \n", "1 My short term goal is to save for a $4137 vaca... \n", "2 My short term goal is to save for a $4654 vaca... \n", "3 My short term goal is to save for a $2418 vaca... \n", "4 My short term goal is to save for a $3447 vaca... \n", "5 My short term goal is to save for a $3147 vaca... \n", "6 My short term goal is to save for a $1072 vaca... \n", "7 My short term goal is to save for a $3169 vaca... \n", "8 My short term goal is to save for a $4985 vaca... \n", "9 My short term goal is to save for a $3722 vaca... \n", "\n", " response \\\n", "0 1. Short-Term Goal: $3253 Vacation (1 Year)\\n... \n", "1 1. Short-Term Goal: $4137 Vacation (1 Year)\\n... \n", "2 1. Short-Term Goal: $4654 Vacation (1 Year)\\n... \n", "3 1. Short-Term Goal: $2418 Vacation (1 Year)\\n... \n", "4 1. Short-Term Goal: $3447 Vacation (1 Year)\\n... \n", "5 1. Short-Term Goal: $3147 Vacation (1 Year)\\n... \n", "6 1. Short-Term Goal: $1072 Vacation (1 Year)\\n... \n", "7 1. Short-Term Goal: $3169 Vacation (1 Year)\\n... \n", "8 1. Short-Term Goal: $4985 Vacation (1 Year)\\n... \n", "9 1. Short-Term Goal: $3722 Vacation (1 Year)\\n... \n", "\n", " instruct \\\n", "0 Q: My short term goal is to save for a $3253 v... \n", "1 Q: My short term goal is to save for a $4137 v... \n", "2 Q: My short term goal is to save for a $4654 v... \n", "3 Q: My short term goal is to save for a $2418 v... \n", "4 Q: My short term goal is to save for a $3447 v... \n", "5 Q: My short term goal is to save for a $3147 v... \n", "6 Q: My short term goal is to save for a $1072 v... \n", "7 Q: My short term goal is to save for a $3169 v... \n", "8 Q: My short term goal is to save for a $4985 v... \n", "9 Q: My short term goal is to save for a $3722 v... \n", "\n", " question_1 \n", "0 Q: My short term goal is to save for a $3253 v... \n", "1 Q: My short term goal is to save for a $4137 v... \n", "2 Q: My short term goal is to save for a $4654 v... \n", "3 Q: My short term goal is to save for a $2418 v... \n", "4 Q: My short term goal is to save for a $3447 v... \n", "5 Q: My short term goal is to save for a $3147 v... \n", "6 Q: My short term goal is to save for a $1072 v... \n", "7 Q: My short term goal is to save for a $3169 v... \n", "8 Q: My short term goal is to save for a $4985 v... \n", "9 Q: My short term goal is to save for a $3722 v... " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.head(10)" ] }, { "cell_type": "code", "execution_count": 21, "id": "2a4deb48-724f-46a4-8e55-4e41f648af6e", "metadata": { "tags": [] }, "outputs": [], "source": [ "df3.to_csv('goals_dataset.csv', index=False)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "313cd80a-c7be-489c-9c70-30885c7e614a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "20de1e90-1c13-486a-b5db-09c957622a69", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "llm_course_2", "language": "python", "name": "llm_course_2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 5 }