{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "95956dfc-c447-4dcb-a3c4-563ec91e9211",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8724a364-ae3a-41a1-9b4a-68d53c452d83",
"metadata": {},
"outputs": [],
"source": [
"infile = \"/Users/carolanderson/Dropbox/Existing2022CardInventoryWithNewBoxes2023.txt\"\n",
"df= pd.read_csv(infile, sep=\"\\t\", encoding=\"ISO-8859-1\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "660ed1bf-cf66-495e-a829-cfe37861aa3f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Type | \n",
" Sport | \n",
" Year | \n",
" Company | \n",
" Product | \n",
" Card # | \n",
" Player Name | \n",
" Notes | \n",
" HOF | \n",
" Grader | \n",
" Grade | \n",
" Storage Box | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 68 | \n",
" Eddie Miller | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 3.0 | \n",
" 270.0 | \n",
"
\n",
" \n",
" 1 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 48 | \n",
" Johnny Schmitz | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 3.0 | \n",
" 270.0 | \n",
"
\n",
" \n",
" 2 | \n",
" Card | \n",
" Baseball | \n",
" 1961 | \n",
" Topps | \n",
" NaN | \n",
" 575 | \n",
" Ernie Banks | \n",
" AS | \n",
" NaN | \n",
" Beckett | \n",
" 4.5 | \n",
" 269.0 | \n",
"
\n",
" \n",
" 3 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 129 | \n",
" Kirby Higbe | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 1.5 | \n",
" 270.0 | \n",
"
\n",
" \n",
" 4 | \n",
" Card | \n",
" Baseball | \n",
" 1999 | \n",
" Topps | \n",
" All-Topps Mystery Finest | \n",
" M3 | \n",
" Mark McGwire | \n",
" NaN | \n",
" NaN | \n",
" Beckett | \n",
" 9.0 | \n",
" 270.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Type Sport Year Company Product Card # \\\n",
"0 Card Baseball 1949 Leaf NaN 68 \n",
"1 Card Baseball 1949 Leaf NaN 48 \n",
"2 Card Baseball 1961 Topps NaN 575 \n",
"3 Card Baseball 1949 Leaf NaN 129 \n",
"4 Card Baseball 1999 Topps All-Topps Mystery Finest M3 \n",
"\n",
" Player Name Notes HOF Grader Grade Storage Box \n",
"0 Eddie Miller SP NaN Beckett 3.0 270.0 \n",
"1 Johnny Schmitz SP NaN Beckett 3.0 270.0 \n",
"2 Ernie Banks AS NaN Beckett 4.5 269.0 \n",
"3 Kirby Higbe SP NaN Beckett 1.5 270.0 \n",
"4 Mark McGwire NaN NaN Beckett 9.0 270.0 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d37f49f3-8fca-42db-a148-b0d0c37c9c04",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Type | \n",
" Sport | \n",
" Year | \n",
" Company | \n",
" Product | \n",
" Card # | \n",
" Player Name | \n",
" Notes | \n",
" HOF | \n",
" Grader | \n",
" Grade | \n",
" Storage Box | \n",
"
\n",
" \n",
" \n",
" \n",
" 55 | \n",
" Card | \n",
" Baseball | \n",
" 1952 | \n",
" Bowman | \n",
" NaN | \n",
" 11 | \n",
" Ralph Kiner | \n",
" NaN | \n",
" Y | \n",
" Beckett | \n",
" 3.0 | \n",
" 271.0 | \n",
"
\n",
" \n",
" 403 | \n",
" Card | \n",
" Baseball | \n",
" 1953 | \n",
" Bowman | \n",
" Color | \n",
" 80 | \n",
" Ralph Kiner | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 314 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 61 | \n",
" Jake Early | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 179 | \n",
" Card | \n",
" Baseball | \n",
" 1985 | \n",
" Topps | \n",
" NaN | \n",
" 401 | \n",
" Mark McGwire | \n",
" Rookie | \n",
" NaN | \n",
" Sportscard Guaranty | \n",
" 86.0 | \n",
" 268.0 | \n",
"
\n",
" \n",
" 492 | \n",
" Card | \n",
" Baseball | \n",
" 1952 | \n",
" Bowman | \n",
" NaN | \n",
" 122 | \n",
" Joe Garagiola | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 315 | \n",
" Card | \n",
" Baseball | \n",
" 1948 | \n",
" Leaf | \n",
" NaN | \n",
" 49 | \n",
" Del Ennis | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 109 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 62 | \n",
" Eddie Joost | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 2.5 | \n",
" 270.0 | \n",
"
\n",
" \n",
" 66 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 113 | \n",
" Dutch Leonard | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 2.5 | \n",
" 268.0 | \n",
"
\n",
" \n",
" 340 | \n",
" Card | \n",
" Basketball | \n",
" 1994 | \n",
" Competitive Images | \n",
" NaN | \n",
" 8 | \n",
" Michael Jordan | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 104 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 36 | \n",
" Al Zarilla | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 2.5 | \n",
" 271.0 | \n",
"
\n",
" \n",
" 469 | \n",
" Card | \n",
" Baseball | \n",
" 1974 | \n",
" Topps | \n",
" NaN | \n",
" 230 | \n",
" Tony Perez | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 420 | \n",
" Card | \n",
" Baseball | \n",
" 1951 | \n",
" Bowman | \n",
" NaN | \n",
" 110 | \n",
" Bobby Brown | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 302 | \n",
" Card | \n",
" Baseball | \n",
" 2017 | \n",
" Topps | \n",
" Now | \n",
" OS-80 | \n",
" Shohei Ohtani | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 197 | \n",
" Card | \n",
" Baseball | \n",
" 1957 | \n",
" Topps | \n",
" NaN | \n",
" 25 | \n",
" Whitey Ford | \n",
" NaN | \n",
" NaN | \n",
" Sportscard Guaranty | \n",
" 86.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 144 | \n",
" Card | \n",
" Baseball | \n",
" 1954 | \n",
" Dan-Dee | \n",
" NaN | \n",
" 3 | \n",
" Walker Cooper | \n",
" NaN | \n",
" NaN | \n",
" Beckett | \n",
" 2.0 | \n",
" 270.0 | \n",
"
\n",
" \n",
" 481 | \n",
" Card | \n",
" Baseball | \n",
" 1952 | \n",
" Bowman | \n",
" NaN | \n",
" 96 | \n",
" Ralph Branca | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 194 | \n",
" Card | \n",
" Baseball | \n",
" 1987 | \n",
" Fleer | \n",
" Update Glossy | \n",
" U-68 | \n",
" Greg Maddux | \n",
" Rookie | \n",
" NaN | \n",
" Sportscard Guaranty | \n",
" 96.0 | \n",
" 268.0 | \n",
"
\n",
" \n",
" 123 | \n",
" Card | \n",
" Baseball | \n",
" 2001 | \n",
" Topps | \n",
" NaN | \n",
" 726 | \n",
" Ichiro Suzuki | \n",
" NaN | \n",
" NaN | \n",
" Beckett | \n",
" 8.0 | \n",
" 267.0 | \n",
"
\n",
" \n",
" 224 | \n",
" Card | \n",
" Baseball | \n",
" 1993 | \n",
" Classic | \n",
" Best | \n",
" PR1 | \n",
" Derek Jeter | \n",
" NaN | \n",
" NaN | \n",
" CSA | \n",
" 9.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 358 | \n",
" Card | \n",
" Baseball | \n",
" 1952 | \n",
" Topps | \n",
" NaN | \n",
" 36 | \n",
" Gil Hodges | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Type Sport Year Company Product Card # \\\n",
"55 Card Baseball 1952 Bowman NaN 11 \n",
"403 Card Baseball 1953 Bowman Color 80 \n",
"314 Card Baseball 1949 Leaf NaN 61 \n",
"179 Card Baseball 1985 Topps NaN 401 \n",
"492 Card Baseball 1952 Bowman NaN 122 \n",
"315 Card Baseball 1948 Leaf NaN 49 \n",
"109 Card Baseball 1949 Leaf NaN 62 \n",
"66 Card Baseball 1949 Leaf NaN 113 \n",
"340 Card Basketball 1994 Competitive Images NaN 8 \n",
"104 Card Baseball 1949 Leaf NaN 36 \n",
"469 Card Baseball 1974 Topps NaN 230 \n",
"420 Card Baseball 1951 Bowman NaN 110 \n",
"302 Card Baseball 2017 Topps Now OS-80 \n",
"197 Card Baseball 1957 Topps NaN 25 \n",
"144 Card Baseball 1954 Dan-Dee NaN 3 \n",
"481 Card Baseball 1952 Bowman NaN 96 \n",
"194 Card Baseball 1987 Fleer Update Glossy U-68 \n",
"123 Card Baseball 2001 Topps NaN 726 \n",
"224 Card Baseball 1993 Classic Best PR1 \n",
"358 Card Baseball 1952 Topps NaN 36 \n",
"\n",
" Player Name Notes HOF Grader Grade Storage Box \n",
"55 Ralph Kiner NaN Y Beckett 3.0 271.0 \n",
"403 Ralph Kiner NaN NaN NaN NaN NaN \n",
"314 Jake Early NaN NaN NaN NaN NaN \n",
"179 Mark McGwire Rookie NaN Sportscard Guaranty 86.0 268.0 \n",
"492 Joe Garagiola NaN NaN NaN NaN NaN \n",
"315 Del Ennis NaN NaN NaN NaN NaN \n",
"109 Eddie Joost SP NaN Beckett 2.5 270.0 \n",
"66 Dutch Leonard SP NaN Beckett 2.5 268.0 \n",
"340 Michael Jordan NaN NaN NaN NaN NaN \n",
"104 Al Zarilla SP NaN Beckett 2.5 271.0 \n",
"469 Tony Perez NaN NaN NaN NaN NaN \n",
"420 Bobby Brown NaN NaN NaN NaN NaN \n",
"302 Shohei Ohtani NaN NaN NaN NaN NaN \n",
"197 Whitey Ford NaN NaN Sportscard Guaranty 86.0 NaN \n",
"144 Walker Cooper NaN NaN Beckett 2.0 270.0 \n",
"481 Ralph Branca NaN NaN NaN NaN NaN \n",
"194 Greg Maddux Rookie NaN Sportscard Guaranty 96.0 268.0 \n",
"123 Ichiro Suzuki NaN NaN Beckett 8.0 267.0 \n",
"224 Derek Jeter NaN NaN CSA 9.0 NaN \n",
"358 Gil Hodges NaN NaN NaN NaN NaN "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample = df[:500].sample(20)\n",
"sample"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "795ef99b-abd7-4fe8-b56c-1779ebda8d52",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Type | \n",
" Sport | \n",
" Year | \n",
" Company | \n",
" Product | \n",
" Card # | \n",
" Player Name | \n",
" Notes | \n",
" HOF | \n",
" Grader | \n",
" Grade | \n",
" Storage Box | \n",
"
\n",
" \n",
" \n",
" \n",
" 179 | \n",
" Card | \n",
" Baseball | \n",
" 1985 | \n",
" Topps | \n",
" NaN | \n",
" 401 | \n",
" Mark McGwire | \n",
" Rookie | \n",
" NaN | \n",
" Sportscard Guaranty | \n",
" 86.0 | \n",
" 268.0 | \n",
"
\n",
" \n",
" 492 | \n",
" Card | \n",
" Baseball | \n",
" 1952 | \n",
" Bowman | \n",
" NaN | \n",
" 122 | \n",
" Joe Garagiola | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 315 | \n",
" Card | \n",
" Baseball | \n",
" 1948 | \n",
" Leaf | \n",
" NaN | \n",
" 49 | \n",
" Del Ennis | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 109 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 62 | \n",
" Eddie Joost | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 2.5 | \n",
" 270.0 | \n",
"
\n",
" \n",
" 66 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 113 | \n",
" Dutch Leonard | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 2.5 | \n",
" 268.0 | \n",
"
\n",
" \n",
" 340 | \n",
" Card | \n",
" Basketball | \n",
" 1994 | \n",
" Competitive Images | \n",
" NaN | \n",
" 8 | \n",
" Michael Jordan | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 104 | \n",
" Card | \n",
" Baseball | \n",
" 1949 | \n",
" Leaf | \n",
" NaN | \n",
" 36 | \n",
" Al Zarilla | \n",
" SP | \n",
" NaN | \n",
" Beckett | \n",
" 2.5 | \n",
" 271.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Type Sport Year Company Product Card # \\\n",
"179 Card Baseball 1985 Topps NaN 401 \n",
"492 Card Baseball 1952 Bowman NaN 122 \n",
"315 Card Baseball 1948 Leaf NaN 49 \n",
"109 Card Baseball 1949 Leaf NaN 62 \n",
"66 Card Baseball 1949 Leaf NaN 113 \n",
"340 Card Basketball 1994 Competitive Images NaN 8 \n",
"104 Card Baseball 1949 Leaf NaN 36 \n",
"\n",
" Player Name Notes HOF Grader Grade Storage Box \n",
"179 Mark McGwire Rookie NaN Sportscard Guaranty 86.0 268.0 \n",
"492 Joe Garagiola NaN NaN NaN NaN NaN \n",
"315 Del Ennis NaN NaN NaN NaN NaN \n",
"109 Eddie Joost SP NaN Beckett 2.5 270.0 \n",
"66 Dutch Leonard SP NaN Beckett 2.5 268.0 \n",
"340 Michael Jordan NaN NaN NaN NaN NaN \n",
"104 Al Zarilla SP NaN Beckett 2.5 271.0 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample[3:10]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "58a90d8e-0480-4187-a7eb-cd69463c0329",
"metadata": {},
"outputs": [],
"source": [
"outfile = \"sample_data/sample_data_1.txt\"\n",
"sample[3:13].to_csv(outfile, sep=\"\\t\", index=False, encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "31083cda-058b-4aee-b0d5-965e48c4bca7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" animal | \n",
" coat pattern | \n",
" Quantity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Leopard | \n",
" spots | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" Tiger | \n",
" stripes | \n",
" 10 | \n",
"
\n",
" \n",
" 2 | \n",
" Lion | \n",
" solid | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" Cheetah | \n",
" spots | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" animal coat pattern Quantity\n",
"0 Leopard spots 2\n",
"1 Tiger stripes 10\n",
"2 Lion solid 1\n",
"3 Cheetah spots 1"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = pd.DataFrame([{\"animal\": \"Leopard\", \"coat pattern\": \"spots\", \"Quantity\" : 2},\n",
" {\"animal\": \"Tiger\", \"coat pattern\": \"stripes\", \"Quantity\" : 10},\n",
" {\"animal\" : \"Lion\", \"coat pattern\" : \"solid\", \"Quantity\" : 1},\n",
" {\"animal\" : \"Cheetah\", \"coat pattern\" : \"spots\", \"Quantity\" : 1}])\n",
"\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "bc9735c0-e12a-4b1f-8cb2-7501d37a4c19",
"metadata": {},
"outputs": [],
"source": [
"outfile = \"sample_data/sample_data_2.txt\"\n",
"df2.to_csv(outfile, sep=\"\\t\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "ed438bbb-03f4-44a9-8f14-9390f3996cae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"animal\" in df2"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "36d5bb2e-cc1e-4afa-bff9-730832e9a5ae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('int64')"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3 = pd.read_csv(\"sample_data/sample_data_1.txt\", sep=\"\\t\", encoding=\"ISO-8859-1\" )\n",
"df3.Year.dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "905c63e9-3d06-4905-ac63-3e66bf94c22e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:rebalance] *",
"language": "python",
"name": "conda-env-rebalance-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}