carolanderson commited on
Commit
ff2a8fc
·
1 Parent(s): 896c0f7

add baseball card inventory app

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .DS_Store
2
+ .ipynb_checkpoints
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import pandas as pd
4
+ import streamlit as st
5
+
6
+
7
+ @st.cache_data
8
+ def load_data(file, skiprows=0, encoding="ISO-8859-1"):
9
+ df = pd.read_csv(file, sep="\t", skiprows=skiprows, encoding=encoding)
10
+ return df
11
+
12
+
13
+ @st.cache_data
14
+ def convert_df(df):
15
+ return df.to_csv(sep="\t", index=False).encode('utf-8')
16
+
17
+
18
+ @st.cache_data
19
+ def extract_surname(name):
20
+ """Add first name and last name columns"""
21
+ non_names = ['1992 Hoops set', '2016 Topps Now Highlights', 'Fox-Aparicio',
22
+ '1960 World Series', 'Mantle/Berra', 'Ashburn-Mays', 'Ruth/Aaron/Mays',
23
+ 'Mays/Snider', 'New York Yankees', 'Checklist']
24
+ if not isinstance(name, str):
25
+ return None, None
26
+ if name == "G Hill Tribute":
27
+ return "G", "Hill"
28
+ if name == 'Ken Griffey, Jr.' or name=='Ken Griffey Jr.':
29
+ return "Ken", "Griffey Jr."
30
+ elif 'Vladimir Guerrero Jr.' in name:
31
+ return "Vladimir", 'Guerrero Jr.'
32
+ elif any(item in name for item in non_names):
33
+ return 'multiple', 'multiple'
34
+ elif "," in name:
35
+ return "multiple", 'multiple'
36
+ else:
37
+ if "-" in name or "/" in name:
38
+ print(name)
39
+ raise Exception("Found suspected multiple-name card!")
40
+ return " ".join(name.split()[:-1]), name.split()[-1]
41
+
42
+
43
+ @st.cache_data
44
+ def add_grading_status(grader):
45
+ """Add graded column (yes/no)"""
46
+ if isinstance(grader, str):
47
+ return "Yes"
48
+ elif math.isnan(grader):
49
+ return "No"
50
+ else:
51
+ print(grader)
52
+ raise Exception("Found unexpected item in Grader column!")
53
+
54
+
55
+ @st.cache_data
56
+ def get_default_sort_order(df):
57
+ default_cols = [c for c in ["Type", "Graded", "Sport", "Last Name", "Year"] if \
58
+ c in df.columns.values]
59
+ default_sort_order = [{"column" : item, "order": i }for i, item in \
60
+ enumerate(default_cols)]
61
+ for col_name in df.columns.values:
62
+ if col_name not in default_cols:
63
+ default_sort_order.append({"column" : col_name, "order": None})
64
+ return pd.DataFrame(default_sort_order)
65
+
66
+
67
+ @st.cache_data
68
+ def get_sort_order(edited_sort_order):
69
+ sort_columns = edited_sort_order.dropna(subset="order")
70
+ cols = sort_columns.column.tolist()
71
+ orders = sort_columns.order.tolist()
72
+ return [col for (col, _) in sorted(zip(cols, orders), key=lambda x: x[1])]
73
+
74
+
75
+ @st.cache_data
76
+ def add_graded_column(df):
77
+ if "Grader" in df:
78
+ df["Graded"] = df['Grader'].apply(lambda x: add_grading_status(x))
79
+ df["Graded"] = pd.Categorical(df["Graded"], categories = ["Yes", "No"]) # sets sort order
80
+ else:
81
+ st.warning('Input data must have a "Grader" column'
82
+ ' in order to create a "Graded" column', icon="⚠️")
83
+ return df
84
+
85
+
86
+ @st.cache_data
87
+ def add_multiple_column(df):
88
+ if "Quantity" in df:
89
+ df['Multiple'] = df['Quantity'].apply(lambda x: "Yes" if x>1 else "No")
90
+ df["Multiple"] = pd.Categorical(df["Multiple"], categories = ["Yes", "No"])
91
+ df = df.drop(columns=['Quantity'])
92
+ else:
93
+ st.warning('Input data must have a "Quantity" column'
94
+ ' in order to do this', icon="⚠️")
95
+ return df
96
+
97
+
98
+ @st.cache_data
99
+ def add_first_and_last_name_columns(df):
100
+ if "Player Name" in df:
101
+ st.write("breaking up names")
102
+ df['First Name'] = df['Player Name'].apply(lambda x: extract_surname(x)[0])
103
+ df['Last Name'] = df['Player Name'].apply(lambda x: extract_surname(x)[-1])
104
+ else:
105
+ st.warning('Input data must have a "Player Name" column'
106
+ ' in order to extract first and last names', icon="⚠️")
107
+ return df
108
+
109
+
110
+ if __name__ == "__main__":
111
+ st.markdown("# Baseball card data wrangling")
112
+ st.write("Upload a tab-separated spreadsheet. The first row should contain column"
113
+ " names.")
114
+ input_file = st.file_uploader("Choose a file", type=['txt', 'csv', 'tsv', 'xlsx'])
115
+
116
+ if st.checkbox("Use sample data 1 (baseball cards)"):
117
+ input_file = "sample_data/sample_data_1.txt"
118
+ elif st.checkbox("Use sample data 2 (big cats)"):
119
+ input_file = "sample_data/sample_data_2.txt"
120
+
121
+ if input_file is not None:
122
+ df = load_data(input_file)
123
+ st.subheader('Input data')
124
+ st.write(df)
125
+
126
+ if st.checkbox('Create first name and last name columns'):
127
+ df = add_first_and_last_name_columns(df)
128
+
129
+ if st.checkbox('Add graded column'):
130
+ df = add_graded_column(df)
131
+
132
+ if st.checkbox('Create "multiple" column and remove quantity'):
133
+ df = add_multiple_column(df)
134
+
135
+ if st.checkbox("Change sort order"):
136
+ st.subheader("Column sort order")
137
+ st.write("Edit the sort priority by changing the numbers in the table below."
138
+ " Click the sort button below when you're done.")
139
+ default_sort_order = get_default_sort_order(df)
140
+ edited_sort_order = st.data_editor(default_sort_order)
141
+
142
+ do_sort = st.button("Sort")
143
+
144
+ if do_sort:
145
+ col_order = get_sort_order(edited_sort_order)
146
+ st.subheader('Sorted output data')
147
+ df = df.sort_values(by=col_order).reset_index(drop=True)
148
+
149
+
150
+ st.subheader('Output data')
151
+ st.write(df)
152
+
153
+ output_file = st.text_input("Enter name for the file to be downloaded", value="cards_output.csv")
154
+ if output_file is not None:
155
+ csv = convert_df(df)
156
+ st.download_button("Download data as CSV", csv, file_name=output_file)
157
+
158
+
159
+
160
+
161
+
develop_app.ipynb ADDED
@@ -0,0 +1,930 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 7,
6
+ "id": "95956dfc-c447-4dcb-a3c4-563ec91e9211",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 8,
16
+ "id": "8724a364-ae3a-41a1-9b4a-68d53c452d83",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "infile = \"/Users/carolanderson/Dropbox/Existing2022CardInventoryWithNewBoxes2023.txt\"\n",
21
+ "df= pd.read_csv(infile, sep=\"\\t\", encoding=\"ISO-8859-1\")"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 9,
27
+ "id": "660ed1bf-cf66-495e-a829-cfe37861aa3f",
28
+ "metadata": {},
29
+ "outputs": [
30
+ {
31
+ "data": {
32
+ "text/html": [
33
+ "<div>\n",
34
+ "<style scoped>\n",
35
+ " .dataframe tbody tr th:only-of-type {\n",
36
+ " vertical-align: middle;\n",
37
+ " }\n",
38
+ "\n",
39
+ " .dataframe tbody tr th {\n",
40
+ " vertical-align: top;\n",
41
+ " }\n",
42
+ "\n",
43
+ " .dataframe thead th {\n",
44
+ " text-align: right;\n",
45
+ " }\n",
46
+ "</style>\n",
47
+ "<table border=\"1\" class=\"dataframe\">\n",
48
+ " <thead>\n",
49
+ " <tr style=\"text-align: right;\">\n",
50
+ " <th></th>\n",
51
+ " <th>Type</th>\n",
52
+ " <th>Sport</th>\n",
53
+ " <th>Year</th>\n",
54
+ " <th>Company</th>\n",
55
+ " <th>Product</th>\n",
56
+ " <th>Card #</th>\n",
57
+ " <th>Player Name</th>\n",
58
+ " <th>Notes</th>\n",
59
+ " <th>HOF</th>\n",
60
+ " <th>Grader</th>\n",
61
+ " <th>Grade</th>\n",
62
+ " <th>Storage Box</th>\n",
63
+ " </tr>\n",
64
+ " </thead>\n",
65
+ " <tbody>\n",
66
+ " <tr>\n",
67
+ " <th>0</th>\n",
68
+ " <td>Card</td>\n",
69
+ " <td>Baseball</td>\n",
70
+ " <td>1949</td>\n",
71
+ " <td>Leaf</td>\n",
72
+ " <td>NaN</td>\n",
73
+ " <td>68</td>\n",
74
+ " <td>Eddie Miller</td>\n",
75
+ " <td>SP</td>\n",
76
+ " <td>NaN</td>\n",
77
+ " <td>Beckett</td>\n",
78
+ " <td>3.0</td>\n",
79
+ " <td>270.0</td>\n",
80
+ " </tr>\n",
81
+ " <tr>\n",
82
+ " <th>1</th>\n",
83
+ " <td>Card</td>\n",
84
+ " <td>Baseball</td>\n",
85
+ " <td>1949</td>\n",
86
+ " <td>Leaf</td>\n",
87
+ " <td>NaN</td>\n",
88
+ " <td>48</td>\n",
89
+ " <td>Johnny Schmitz</td>\n",
90
+ " <td>SP</td>\n",
91
+ " <td>NaN</td>\n",
92
+ " <td>Beckett</td>\n",
93
+ " <td>3.0</td>\n",
94
+ " <td>270.0</td>\n",
95
+ " </tr>\n",
96
+ " <tr>\n",
97
+ " <th>2</th>\n",
98
+ " <td>Card</td>\n",
99
+ " <td>Baseball</td>\n",
100
+ " <td>1961</td>\n",
101
+ " <td>Topps</td>\n",
102
+ " <td>NaN</td>\n",
103
+ " <td>575</td>\n",
104
+ " <td>Ernie Banks</td>\n",
105
+ " <td>AS</td>\n",
106
+ " <td>NaN</td>\n",
107
+ " <td>Beckett</td>\n",
108
+ " <td>4.5</td>\n",
109
+ " <td>269.0</td>\n",
110
+ " </tr>\n",
111
+ " <tr>\n",
112
+ " <th>3</th>\n",
113
+ " <td>Card</td>\n",
114
+ " <td>Baseball</td>\n",
115
+ " <td>1949</td>\n",
116
+ " <td>Leaf</td>\n",
117
+ " <td>NaN</td>\n",
118
+ " <td>129</td>\n",
119
+ " <td>Kirby Higbe</td>\n",
120
+ " <td>SP</td>\n",
121
+ " <td>NaN</td>\n",
122
+ " <td>Beckett</td>\n",
123
+ " <td>1.5</td>\n",
124
+ " <td>270.0</td>\n",
125
+ " </tr>\n",
126
+ " <tr>\n",
127
+ " <th>4</th>\n",
128
+ " <td>Card</td>\n",
129
+ " <td>Baseball</td>\n",
130
+ " <td>1999</td>\n",
131
+ " <td>Topps</td>\n",
132
+ " <td>All-Topps Mystery Finest</td>\n",
133
+ " <td>M3</td>\n",
134
+ " <td>Mark McGwire</td>\n",
135
+ " <td>NaN</td>\n",
136
+ " <td>NaN</td>\n",
137
+ " <td>Beckett</td>\n",
138
+ " <td>9.0</td>\n",
139
+ " <td>270.0</td>\n",
140
+ " </tr>\n",
141
+ " </tbody>\n",
142
+ "</table>\n",
143
+ "</div>"
144
+ ],
145
+ "text/plain": [
146
+ " Type Sport Year Company Product Card # \\\n",
147
+ "0 Card Baseball 1949 Leaf NaN 68 \n",
148
+ "1 Card Baseball 1949 Leaf NaN 48 \n",
149
+ "2 Card Baseball 1961 Topps NaN 575 \n",
150
+ "3 Card Baseball 1949 Leaf NaN 129 \n",
151
+ "4 Card Baseball 1999 Topps All-Topps Mystery Finest M3 \n",
152
+ "\n",
153
+ " Player Name Notes HOF Grader Grade Storage Box \n",
154
+ "0 Eddie Miller SP NaN Beckett 3.0 270.0 \n",
155
+ "1 Johnny Schmitz SP NaN Beckett 3.0 270.0 \n",
156
+ "2 Ernie Banks AS NaN Beckett 4.5 269.0 \n",
157
+ "3 Kirby Higbe SP NaN Beckett 1.5 270.0 \n",
158
+ "4 Mark McGwire NaN NaN Beckett 9.0 270.0 "
159
+ ]
160
+ },
161
+ "execution_count": 9,
162
+ "metadata": {},
163
+ "output_type": "execute_result"
164
+ }
165
+ ],
166
+ "source": [
167
+ "df.head()"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": 13,
173
+ "id": "d37f49f3-8fca-42db-a148-b0d0c37c9c04",
174
+ "metadata": {},
175
+ "outputs": [
176
+ {
177
+ "data": {
178
+ "text/html": [
179
+ "<div>\n",
180
+ "<style scoped>\n",
181
+ " .dataframe tbody tr th:only-of-type {\n",
182
+ " vertical-align: middle;\n",
183
+ " }\n",
184
+ "\n",
185
+ " .dataframe tbody tr th {\n",
186
+ " vertical-align: top;\n",
187
+ " }\n",
188
+ "\n",
189
+ " .dataframe thead th {\n",
190
+ " text-align: right;\n",
191
+ " }\n",
192
+ "</style>\n",
193
+ "<table border=\"1\" class=\"dataframe\">\n",
194
+ " <thead>\n",
195
+ " <tr style=\"text-align: right;\">\n",
196
+ " <th></th>\n",
197
+ " <th>Type</th>\n",
198
+ " <th>Sport</th>\n",
199
+ " <th>Year</th>\n",
200
+ " <th>Company</th>\n",
201
+ " <th>Product</th>\n",
202
+ " <th>Card #</th>\n",
203
+ " <th>Player Name</th>\n",
204
+ " <th>Notes</th>\n",
205
+ " <th>HOF</th>\n",
206
+ " <th>Grader</th>\n",
207
+ " <th>Grade</th>\n",
208
+ " <th>Storage Box</th>\n",
209
+ " </tr>\n",
210
+ " </thead>\n",
211
+ " <tbody>\n",
212
+ " <tr>\n",
213
+ " <th>55</th>\n",
214
+ " <td>Card</td>\n",
215
+ " <td>Baseball</td>\n",
216
+ " <td>1952</td>\n",
217
+ " <td>Bowman</td>\n",
218
+ " <td>NaN</td>\n",
219
+ " <td>11</td>\n",
220
+ " <td>Ralph Kiner</td>\n",
221
+ " <td>NaN</td>\n",
222
+ " <td>Y</td>\n",
223
+ " <td>Beckett</td>\n",
224
+ " <td>3.0</td>\n",
225
+ " <td>271.0</td>\n",
226
+ " </tr>\n",
227
+ " <tr>\n",
228
+ " <th>403</th>\n",
229
+ " <td>Card</td>\n",
230
+ " <td>Baseball</td>\n",
231
+ " <td>1953</td>\n",
232
+ " <td>Bowman</td>\n",
233
+ " <td>Color</td>\n",
234
+ " <td>80</td>\n",
235
+ " <td>Ralph Kiner</td>\n",
236
+ " <td>NaN</td>\n",
237
+ " <td>NaN</td>\n",
238
+ " <td>NaN</td>\n",
239
+ " <td>NaN</td>\n",
240
+ " <td>NaN</td>\n",
241
+ " </tr>\n",
242
+ " <tr>\n",
243
+ " <th>314</th>\n",
244
+ " <td>Card</td>\n",
245
+ " <td>Baseball</td>\n",
246
+ " <td>1949</td>\n",
247
+ " <td>Leaf</td>\n",
248
+ " <td>NaN</td>\n",
249
+ " <td>61</td>\n",
250
+ " <td>Jake Early</td>\n",
251
+ " <td>NaN</td>\n",
252
+ " <td>NaN</td>\n",
253
+ " <td>NaN</td>\n",
254
+ " <td>NaN</td>\n",
255
+ " <td>NaN</td>\n",
256
+ " </tr>\n",
257
+ " <tr>\n",
258
+ " <th>179</th>\n",
259
+ " <td>Card</td>\n",
260
+ " <td>Baseball</td>\n",
261
+ " <td>1985</td>\n",
262
+ " <td>Topps</td>\n",
263
+ " <td>NaN</td>\n",
264
+ " <td>401</td>\n",
265
+ " <td>Mark McGwire</td>\n",
266
+ " <td>Rookie</td>\n",
267
+ " <td>NaN</td>\n",
268
+ " <td>Sportscard Guaranty</td>\n",
269
+ " <td>86.0</td>\n",
270
+ " <td>268.0</td>\n",
271
+ " </tr>\n",
272
+ " <tr>\n",
273
+ " <th>492</th>\n",
274
+ " <td>Card</td>\n",
275
+ " <td>Baseball</td>\n",
276
+ " <td>1952</td>\n",
277
+ " <td>Bowman</td>\n",
278
+ " <td>NaN</td>\n",
279
+ " <td>122</td>\n",
280
+ " <td>Joe Garagiola</td>\n",
281
+ " <td>NaN</td>\n",
282
+ " <td>NaN</td>\n",
283
+ " <td>NaN</td>\n",
284
+ " <td>NaN</td>\n",
285
+ " <td>NaN</td>\n",
286
+ " </tr>\n",
287
+ " <tr>\n",
288
+ " <th>315</th>\n",
289
+ " <td>Card</td>\n",
290
+ " <td>Baseball</td>\n",
291
+ " <td>1948</td>\n",
292
+ " <td>Leaf</td>\n",
293
+ " <td>NaN</td>\n",
294
+ " <td>49</td>\n",
295
+ " <td>Del Ennis</td>\n",
296
+ " <td>NaN</td>\n",
297
+ " <td>NaN</td>\n",
298
+ " <td>NaN</td>\n",
299
+ " <td>NaN</td>\n",
300
+ " <td>NaN</td>\n",
301
+ " </tr>\n",
302
+ " <tr>\n",
303
+ " <th>109</th>\n",
304
+ " <td>Card</td>\n",
305
+ " <td>Baseball</td>\n",
306
+ " <td>1949</td>\n",
307
+ " <td>Leaf</td>\n",
308
+ " <td>NaN</td>\n",
309
+ " <td>62</td>\n",
310
+ " <td>Eddie Joost</td>\n",
311
+ " <td>SP</td>\n",
312
+ " <td>NaN</td>\n",
313
+ " <td>Beckett</td>\n",
314
+ " <td>2.5</td>\n",
315
+ " <td>270.0</td>\n",
316
+ " </tr>\n",
317
+ " <tr>\n",
318
+ " <th>66</th>\n",
319
+ " <td>Card</td>\n",
320
+ " <td>Baseball</td>\n",
321
+ " <td>1949</td>\n",
322
+ " <td>Leaf</td>\n",
323
+ " <td>NaN</td>\n",
324
+ " <td>113</td>\n",
325
+ " <td>Dutch Leonard</td>\n",
326
+ " <td>SP</td>\n",
327
+ " <td>NaN</td>\n",
328
+ " <td>Beckett</td>\n",
329
+ " <td>2.5</td>\n",
330
+ " <td>268.0</td>\n",
331
+ " </tr>\n",
332
+ " <tr>\n",
333
+ " <th>340</th>\n",
334
+ " <td>Card</td>\n",
335
+ " <td>Basketball</td>\n",
336
+ " <td>1994</td>\n",
337
+ " <td>Competitive Images</td>\n",
338
+ " <td>NaN</td>\n",
339
+ " <td>8</td>\n",
340
+ " <td>Michael Jordan</td>\n",
341
+ " <td>NaN</td>\n",
342
+ " <td>NaN</td>\n",
343
+ " <td>NaN</td>\n",
344
+ " <td>NaN</td>\n",
345
+ " <td>NaN</td>\n",
346
+ " </tr>\n",
347
+ " <tr>\n",
348
+ " <th>104</th>\n",
349
+ " <td>Card</td>\n",
350
+ " <td>Baseball</td>\n",
351
+ " <td>1949</td>\n",
352
+ " <td>Leaf</td>\n",
353
+ " <td>NaN</td>\n",
354
+ " <td>36</td>\n",
355
+ " <td>Al Zarilla</td>\n",
356
+ " <td>SP</td>\n",
357
+ " <td>NaN</td>\n",
358
+ " <td>Beckett</td>\n",
359
+ " <td>2.5</td>\n",
360
+ " <td>271.0</td>\n",
361
+ " </tr>\n",
362
+ " <tr>\n",
363
+ " <th>469</th>\n",
364
+ " <td>Card</td>\n",
365
+ " <td>Baseball</td>\n",
366
+ " <td>1974</td>\n",
367
+ " <td>Topps</td>\n",
368
+ " <td>NaN</td>\n",
369
+ " <td>230</td>\n",
370
+ " <td>Tony Perez</td>\n",
371
+ " <td>NaN</td>\n",
372
+ " <td>NaN</td>\n",
373
+ " <td>NaN</td>\n",
374
+ " <td>NaN</td>\n",
375
+ " <td>NaN</td>\n",
376
+ " </tr>\n",
377
+ " <tr>\n",
378
+ " <th>420</th>\n",
379
+ " <td>Card</td>\n",
380
+ " <td>Baseball</td>\n",
381
+ " <td>1951</td>\n",
382
+ " <td>Bowman</td>\n",
383
+ " <td>NaN</td>\n",
384
+ " <td>110</td>\n",
385
+ " <td>Bobby Brown</td>\n",
386
+ " <td>NaN</td>\n",
387
+ " <td>NaN</td>\n",
388
+ " <td>NaN</td>\n",
389
+ " <td>NaN</td>\n",
390
+ " <td>NaN</td>\n",
391
+ " </tr>\n",
392
+ " <tr>\n",
393
+ " <th>302</th>\n",
394
+ " <td>Card</td>\n",
395
+ " <td>Baseball</td>\n",
396
+ " <td>2017</td>\n",
397
+ " <td>Topps</td>\n",
398
+ " <td>Now</td>\n",
399
+ " <td>OS-80</td>\n",
400
+ " <td>Shohei Ohtani</td>\n",
401
+ " <td>NaN</td>\n",
402
+ " <td>NaN</td>\n",
403
+ " <td>NaN</td>\n",
404
+ " <td>NaN</td>\n",
405
+ " <td>NaN</td>\n",
406
+ " </tr>\n",
407
+ " <tr>\n",
408
+ " <th>197</th>\n",
409
+ " <td>Card</td>\n",
410
+ " <td>Baseball</td>\n",
411
+ " <td>1957</td>\n",
412
+ " <td>Topps</td>\n",
413
+ " <td>NaN</td>\n",
414
+ " <td>25</td>\n",
415
+ " <td>Whitey Ford</td>\n",
416
+ " <td>NaN</td>\n",
417
+ " <td>NaN</td>\n",
418
+ " <td>Sportscard Guaranty</td>\n",
419
+ " <td>86.0</td>\n",
420
+ " <td>NaN</td>\n",
421
+ " </tr>\n",
422
+ " <tr>\n",
423
+ " <th>144</th>\n",
424
+ " <td>Card</td>\n",
425
+ " <td>Baseball</td>\n",
426
+ " <td>1954</td>\n",
427
+ " <td>Dan-Dee</td>\n",
428
+ " <td>NaN</td>\n",
429
+ " <td>3</td>\n",
430
+ " <td>Walker Cooper</td>\n",
431
+ " <td>NaN</td>\n",
432
+ " <td>NaN</td>\n",
433
+ " <td>Beckett</td>\n",
434
+ " <td>2.0</td>\n",
435
+ " <td>270.0</td>\n",
436
+ " </tr>\n",
437
+ " <tr>\n",
438
+ " <th>481</th>\n",
439
+ " <td>Card</td>\n",
440
+ " <td>Baseball</td>\n",
441
+ " <td>1952</td>\n",
442
+ " <td>Bowman</td>\n",
443
+ " <td>NaN</td>\n",
444
+ " <td>96</td>\n",
445
+ " <td>Ralph Branca</td>\n",
446
+ " <td>NaN</td>\n",
447
+ " <td>NaN</td>\n",
448
+ " <td>NaN</td>\n",
449
+ " <td>NaN</td>\n",
450
+ " <td>NaN</td>\n",
451
+ " </tr>\n",
452
+ " <tr>\n",
453
+ " <th>194</th>\n",
454
+ " <td>Card</td>\n",
455
+ " <td>Baseball</td>\n",
456
+ " <td>1987</td>\n",
457
+ " <td>Fleer</td>\n",
458
+ " <td>Update Glossy</td>\n",
459
+ " <td>U-68</td>\n",
460
+ " <td>Greg Maddux</td>\n",
461
+ " <td>Rookie</td>\n",
462
+ " <td>NaN</td>\n",
463
+ " <td>Sportscard Guaranty</td>\n",
464
+ " <td>96.0</td>\n",
465
+ " <td>268.0</td>\n",
466
+ " </tr>\n",
467
+ " <tr>\n",
468
+ " <th>123</th>\n",
469
+ " <td>Card</td>\n",
470
+ " <td>Baseball</td>\n",
471
+ " <td>2001</td>\n",
472
+ " <td>Topps</td>\n",
473
+ " <td>NaN</td>\n",
474
+ " <td>726</td>\n",
475
+ " <td>Ichiro Suzuki</td>\n",
476
+ " <td>NaN</td>\n",
477
+ " <td>NaN</td>\n",
478
+ " <td>Beckett</td>\n",
479
+ " <td>8.0</td>\n",
480
+ " <td>267.0</td>\n",
481
+ " </tr>\n",
482
+ " <tr>\n",
483
+ " <th>224</th>\n",
484
+ " <td>Card</td>\n",
485
+ " <td>Baseball</td>\n",
486
+ " <td>1993</td>\n",
487
+ " <td>Classic</td>\n",
488
+ " <td>Best</td>\n",
489
+ " <td>PR1</td>\n",
490
+ " <td>Derek Jeter</td>\n",
491
+ " <td>NaN</td>\n",
492
+ " <td>NaN</td>\n",
493
+ " <td>CSA</td>\n",
494
+ " <td>9.0</td>\n",
495
+ " <td>NaN</td>\n",
496
+ " </tr>\n",
497
+ " <tr>\n",
498
+ " <th>358</th>\n",
499
+ " <td>Card</td>\n",
500
+ " <td>Baseball</td>\n",
501
+ " <td>1952</td>\n",
502
+ " <td>Topps</td>\n",
503
+ " <td>NaN</td>\n",
504
+ " <td>36</td>\n",
505
+ " <td>Gil Hodges</td>\n",
506
+ " <td>NaN</td>\n",
507
+ " <td>NaN</td>\n",
508
+ " <td>NaN</td>\n",
509
+ " <td>NaN</td>\n",
510
+ " <td>NaN</td>\n",
511
+ " </tr>\n",
512
+ " </tbody>\n",
513
+ "</table>\n",
514
+ "</div>"
515
+ ],
516
+ "text/plain": [
517
+ " Type Sport Year Company Product Card # \\\n",
518
+ "55 Card Baseball 1952 Bowman NaN 11 \n",
519
+ "403 Card Baseball 1953 Bowman Color 80 \n",
520
+ "314 Card Baseball 1949 Leaf NaN 61 \n",
521
+ "179 Card Baseball 1985 Topps NaN 401 \n",
522
+ "492 Card Baseball 1952 Bowman NaN 122 \n",
523
+ "315 Card Baseball 1948 Leaf NaN 49 \n",
524
+ "109 Card Baseball 1949 Leaf NaN 62 \n",
525
+ "66 Card Baseball 1949 Leaf NaN 113 \n",
526
+ "340 Card Basketball 1994 Competitive Images NaN 8 \n",
527
+ "104 Card Baseball 1949 Leaf NaN 36 \n",
528
+ "469 Card Baseball 1974 Topps NaN 230 \n",
529
+ "420 Card Baseball 1951 Bowman NaN 110 \n",
530
+ "302 Card Baseball 2017 Topps Now OS-80 \n",
531
+ "197 Card Baseball 1957 Topps NaN 25 \n",
532
+ "144 Card Baseball 1954 Dan-Dee NaN 3 \n",
533
+ "481 Card Baseball 1952 Bowman NaN 96 \n",
534
+ "194 Card Baseball 1987 Fleer Update Glossy U-68 \n",
535
+ "123 Card Baseball 2001 Topps NaN 726 \n",
536
+ "224 Card Baseball 1993 Classic Best PR1 \n",
537
+ "358 Card Baseball 1952 Topps NaN 36 \n",
538
+ "\n",
539
+ " Player Name Notes HOF Grader Grade Storage Box \n",
540
+ "55 Ralph Kiner NaN Y Beckett 3.0 271.0 \n",
541
+ "403 Ralph Kiner NaN NaN NaN NaN NaN \n",
542
+ "314 Jake Early NaN NaN NaN NaN NaN \n",
543
+ "179 Mark McGwire Rookie NaN Sportscard Guaranty 86.0 268.0 \n",
544
+ "492 Joe Garagiola NaN NaN NaN NaN NaN \n",
545
+ "315 Del Ennis NaN NaN NaN NaN NaN \n",
546
+ "109 Eddie Joost SP NaN Beckett 2.5 270.0 \n",
547
+ "66 Dutch Leonard SP NaN Beckett 2.5 268.0 \n",
548
+ "340 Michael Jordan NaN NaN NaN NaN NaN \n",
549
+ "104 Al Zarilla SP NaN Beckett 2.5 271.0 \n",
550
+ "469 Tony Perez NaN NaN NaN NaN NaN \n",
551
+ "420 Bobby Brown NaN NaN NaN NaN NaN \n",
552
+ "302 Shohei Ohtani NaN NaN NaN NaN NaN \n",
553
+ "197 Whitey Ford NaN NaN Sportscard Guaranty 86.0 NaN \n",
554
+ "144 Walker Cooper NaN NaN Beckett 2.0 270.0 \n",
555
+ "481 Ralph Branca NaN NaN NaN NaN NaN \n",
556
+ "194 Greg Maddux Rookie NaN Sportscard Guaranty 96.0 268.0 \n",
557
+ "123 Ichiro Suzuki NaN NaN Beckett 8.0 267.0 \n",
558
+ "224 Derek Jeter NaN NaN CSA 9.0 NaN \n",
559
+ "358 Gil Hodges NaN NaN NaN NaN NaN "
560
+ ]
561
+ },
562
+ "execution_count": 13,
563
+ "metadata": {},
564
+ "output_type": "execute_result"
565
+ }
566
+ ],
567
+ "source": [
568
+ "sample = df[:500].sample(20)\n",
569
+ "sample"
570
+ ]
571
+ },
572
+ {
573
+ "cell_type": "code",
574
+ "execution_count": 15,
575
+ "id": "795ef99b-abd7-4fe8-b56c-1779ebda8d52",
576
+ "metadata": {},
577
+ "outputs": [
578
+ {
579
+ "data": {
580
+ "text/html": [
581
+ "<div>\n",
582
+ "<style scoped>\n",
583
+ " .dataframe tbody tr th:only-of-type {\n",
584
+ " vertical-align: middle;\n",
585
+ " }\n",
586
+ "\n",
587
+ " .dataframe tbody tr th {\n",
588
+ " vertical-align: top;\n",
589
+ " }\n",
590
+ "\n",
591
+ " .dataframe thead th {\n",
592
+ " text-align: right;\n",
593
+ " }\n",
594
+ "</style>\n",
595
+ "<table border=\"1\" class=\"dataframe\">\n",
596
+ " <thead>\n",
597
+ " <tr style=\"text-align: right;\">\n",
598
+ " <th></th>\n",
599
+ " <th>Type</th>\n",
600
+ " <th>Sport</th>\n",
601
+ " <th>Year</th>\n",
602
+ " <th>Company</th>\n",
603
+ " <th>Product</th>\n",
604
+ " <th>Card #</th>\n",
605
+ " <th>Player Name</th>\n",
606
+ " <th>Notes</th>\n",
607
+ " <th>HOF</th>\n",
608
+ " <th>Grader</th>\n",
609
+ " <th>Grade</th>\n",
610
+ " <th>Storage Box</th>\n",
611
+ " </tr>\n",
612
+ " </thead>\n",
613
+ " <tbody>\n",
614
+ " <tr>\n",
615
+ " <th>179</th>\n",
616
+ " <td>Card</td>\n",
617
+ " <td>Baseball</td>\n",
618
+ " <td>1985</td>\n",
619
+ " <td>Topps</td>\n",
620
+ " <td>NaN</td>\n",
621
+ " <td>401</td>\n",
622
+ " <td>Mark McGwire</td>\n",
623
+ " <td>Rookie</td>\n",
624
+ " <td>NaN</td>\n",
625
+ " <td>Sportscard Guaranty</td>\n",
626
+ " <td>86.0</td>\n",
627
+ " <td>268.0</td>\n",
628
+ " </tr>\n",
629
+ " <tr>\n",
630
+ " <th>492</th>\n",
631
+ " <td>Card</td>\n",
632
+ " <td>Baseball</td>\n",
633
+ " <td>1952</td>\n",
634
+ " <td>Bowman</td>\n",
635
+ " <td>NaN</td>\n",
636
+ " <td>122</td>\n",
637
+ " <td>Joe Garagiola</td>\n",
638
+ " <td>NaN</td>\n",
639
+ " <td>NaN</td>\n",
640
+ " <td>NaN</td>\n",
641
+ " <td>NaN</td>\n",
642
+ " <td>NaN</td>\n",
643
+ " </tr>\n",
644
+ " <tr>\n",
645
+ " <th>315</th>\n",
646
+ " <td>Card</td>\n",
647
+ " <td>Baseball</td>\n",
648
+ " <td>1948</td>\n",
649
+ " <td>Leaf</td>\n",
650
+ " <td>NaN</td>\n",
651
+ " <td>49</td>\n",
652
+ " <td>Del Ennis</td>\n",
653
+ " <td>NaN</td>\n",
654
+ " <td>NaN</td>\n",
655
+ " <td>NaN</td>\n",
656
+ " <td>NaN</td>\n",
657
+ " <td>NaN</td>\n",
658
+ " </tr>\n",
659
+ " <tr>\n",
660
+ " <th>109</th>\n",
661
+ " <td>Card</td>\n",
662
+ " <td>Baseball</td>\n",
663
+ " <td>1949</td>\n",
664
+ " <td>Leaf</td>\n",
665
+ " <td>NaN</td>\n",
666
+ " <td>62</td>\n",
667
+ " <td>Eddie Joost</td>\n",
668
+ " <td>SP</td>\n",
669
+ " <td>NaN</td>\n",
670
+ " <td>Beckett</td>\n",
671
+ " <td>2.5</td>\n",
672
+ " <td>270.0</td>\n",
673
+ " </tr>\n",
674
+ " <tr>\n",
675
+ " <th>66</th>\n",
676
+ " <td>Card</td>\n",
677
+ " <td>Baseball</td>\n",
678
+ " <td>1949</td>\n",
679
+ " <td>Leaf</td>\n",
680
+ " <td>NaN</td>\n",
681
+ " <td>113</td>\n",
682
+ " <td>Dutch Leonard</td>\n",
683
+ " <td>SP</td>\n",
684
+ " <td>NaN</td>\n",
685
+ " <td>Beckett</td>\n",
686
+ " <td>2.5</td>\n",
687
+ " <td>268.0</td>\n",
688
+ " </tr>\n",
689
+ " <tr>\n",
690
+ " <th>340</th>\n",
691
+ " <td>Card</td>\n",
692
+ " <td>Basketball</td>\n",
693
+ " <td>1994</td>\n",
694
+ " <td>Competitive Images</td>\n",
695
+ " <td>NaN</td>\n",
696
+ " <td>8</td>\n",
697
+ " <td>Michael Jordan</td>\n",
698
+ " <td>NaN</td>\n",
699
+ " <td>NaN</td>\n",
700
+ " <td>NaN</td>\n",
701
+ " <td>NaN</td>\n",
702
+ " <td>NaN</td>\n",
703
+ " </tr>\n",
704
+ " <tr>\n",
705
+ " <th>104</th>\n",
706
+ " <td>Card</td>\n",
707
+ " <td>Baseball</td>\n",
708
+ " <td>1949</td>\n",
709
+ " <td>Leaf</td>\n",
710
+ " <td>NaN</td>\n",
711
+ " <td>36</td>\n",
712
+ " <td>Al Zarilla</td>\n",
713
+ " <td>SP</td>\n",
714
+ " <td>NaN</td>\n",
715
+ " <td>Beckett</td>\n",
716
+ " <td>2.5</td>\n",
717
+ " <td>271.0</td>\n",
718
+ " </tr>\n",
719
+ " </tbody>\n",
720
+ "</table>\n",
721
+ "</div>"
722
+ ],
723
+ "text/plain": [
724
+ " Type Sport Year Company Product Card # \\\n",
725
+ "179 Card Baseball 1985 Topps NaN 401 \n",
726
+ "492 Card Baseball 1952 Bowman NaN 122 \n",
727
+ "315 Card Baseball 1948 Leaf NaN 49 \n",
728
+ "109 Card Baseball 1949 Leaf NaN 62 \n",
729
+ "66 Card Baseball 1949 Leaf NaN 113 \n",
730
+ "340 Card Basketball 1994 Competitive Images NaN 8 \n",
731
+ "104 Card Baseball 1949 Leaf NaN 36 \n",
732
+ "\n",
733
+ " Player Name Notes HOF Grader Grade Storage Box \n",
734
+ "179 Mark McGwire Rookie NaN Sportscard Guaranty 86.0 268.0 \n",
735
+ "492 Joe Garagiola NaN NaN NaN NaN NaN \n",
736
+ "315 Del Ennis NaN NaN NaN NaN NaN \n",
737
+ "109 Eddie Joost SP NaN Beckett 2.5 270.0 \n",
738
+ "66 Dutch Leonard SP NaN Beckett 2.5 268.0 \n",
739
+ "340 Michael Jordan NaN NaN NaN NaN NaN \n",
740
+ "104 Al Zarilla SP NaN Beckett 2.5 271.0 "
741
+ ]
742
+ },
743
+ "execution_count": 15,
744
+ "metadata": {},
745
+ "output_type": "execute_result"
746
+ }
747
+ ],
748
+ "source": [
749
+ "sample[3:10]"
750
+ ]
751
+ },
752
+ {
753
+ "cell_type": "code",
754
+ "execution_count": 23,
755
+ "id": "58a90d8e-0480-4187-a7eb-cd69463c0329",
756
+ "metadata": {},
757
+ "outputs": [],
758
+ "source": [
759
+ "outfile = \"sample_data/sample_data_1.txt\"\n",
760
+ "sample[3:13].to_csv(outfile, sep=\"\\t\", index=False, encoding=\"utf-8\")"
761
+ ]
762
+ },
763
+ {
764
+ "cell_type": "code",
765
+ "execution_count": 21,
766
+ "id": "31083cda-058b-4aee-b0d5-965e48c4bca7",
767
+ "metadata": {},
768
+ "outputs": [
769
+ {
770
+ "data": {
771
+ "text/html": [
772
+ "<div>\n",
773
+ "<style scoped>\n",
774
+ " .dataframe tbody tr th:only-of-type {\n",
775
+ " vertical-align: middle;\n",
776
+ " }\n",
777
+ "\n",
778
+ " .dataframe tbody tr th {\n",
779
+ " vertical-align: top;\n",
780
+ " }\n",
781
+ "\n",
782
+ " .dataframe thead th {\n",
783
+ " text-align: right;\n",
784
+ " }\n",
785
+ "</style>\n",
786
+ "<table border=\"1\" class=\"dataframe\">\n",
787
+ " <thead>\n",
788
+ " <tr style=\"text-align: right;\">\n",
789
+ " <th></th>\n",
790
+ " <th>animal</th>\n",
791
+ " <th>coat pattern</th>\n",
792
+ " <th>Quantity</th>\n",
793
+ " </tr>\n",
794
+ " </thead>\n",
795
+ " <tbody>\n",
796
+ " <tr>\n",
797
+ " <th>0</th>\n",
798
+ " <td>Leopard</td>\n",
799
+ " <td>spots</td>\n",
800
+ " <td>2</td>\n",
801
+ " </tr>\n",
802
+ " <tr>\n",
803
+ " <th>1</th>\n",
804
+ " <td>Tiger</td>\n",
805
+ " <td>stripes</td>\n",
806
+ " <td>10</td>\n",
807
+ " </tr>\n",
808
+ " <tr>\n",
809
+ " <th>2</th>\n",
810
+ " <td>Lion</td>\n",
811
+ " <td>solid</td>\n",
812
+ " <td>1</td>\n",
813
+ " </tr>\n",
814
+ " <tr>\n",
815
+ " <th>3</th>\n",
816
+ " <td>Cheetah</td>\n",
817
+ " <td>spots</td>\n",
818
+ " <td>1</td>\n",
819
+ " </tr>\n",
820
+ " </tbody>\n",
821
+ "</table>\n",
822
+ "</div>"
823
+ ],
824
+ "text/plain": [
825
+ " animal coat pattern Quantity\n",
826
+ "0 Leopard spots 2\n",
827
+ "1 Tiger stripes 10\n",
828
+ "2 Lion solid 1\n",
829
+ "3 Cheetah spots 1"
830
+ ]
831
+ },
832
+ "execution_count": 21,
833
+ "metadata": {},
834
+ "output_type": "execute_result"
835
+ }
836
+ ],
837
+ "source": [
838
+ "df2 = pd.DataFrame([{\"animal\": \"Leopard\", \"coat pattern\": \"spots\", \"Quantity\" : 2},\n",
839
+ " {\"animal\": \"Tiger\", \"coat pattern\": \"stripes\", \"Quantity\" : 10},\n",
840
+ " {\"animal\" : \"Lion\", \"coat pattern\" : \"solid\", \"Quantity\" : 1},\n",
841
+ " {\"animal\" : \"Cheetah\", \"coat pattern\" : \"spots\", \"Quantity\" : 1}])\n",
842
+ "\n",
843
+ "df2"
844
+ ]
845
+ },
846
+ {
847
+ "cell_type": "code",
848
+ "execution_count": 22,
849
+ "id": "bc9735c0-e12a-4b1f-8cb2-7501d37a4c19",
850
+ "metadata": {},
851
+ "outputs": [],
852
+ "source": [
853
+ "outfile = \"sample_data/sample_data_2.txt\"\n",
854
+ "df2.to_csv(outfile, sep=\"\\t\", index=False)"
855
+ ]
856
+ },
857
+ {
858
+ "cell_type": "code",
859
+ "execution_count": 20,
860
+ "id": "ed438bbb-03f4-44a9-8f14-9390f3996cae",
861
+ "metadata": {},
862
+ "outputs": [
863
+ {
864
+ "data": {
865
+ "text/plain": [
866
+ "True"
867
+ ]
868
+ },
869
+ "execution_count": 20,
870
+ "metadata": {},
871
+ "output_type": "execute_result"
872
+ }
873
+ ],
874
+ "source": [
875
+ "\"animal\" in df2"
876
+ ]
877
+ },
878
+ {
879
+ "cell_type": "code",
880
+ "execution_count": 29,
881
+ "id": "36d5bb2e-cc1e-4afa-bff9-730832e9a5ae",
882
+ "metadata": {},
883
+ "outputs": [
884
+ {
885
+ "data": {
886
+ "text/plain": [
887
+ "dtype('int64')"
888
+ ]
889
+ },
890
+ "execution_count": 29,
891
+ "metadata": {},
892
+ "output_type": "execute_result"
893
+ }
894
+ ],
895
+ "source": [
896
+ "df3 = pd.read_csv(\"sample_data/sample_data_1.txt\", sep=\"\\t\", encoding=\"ISO-8859-1\" )\n",
897
+ "df3.Year.dtype"
898
+ ]
899
+ },
900
+ {
901
+ "cell_type": "code",
902
+ "execution_count": null,
903
+ "id": "905c63e9-3d06-4905-ac63-3e66bf94c22e",
904
+ "metadata": {},
905
+ "outputs": [],
906
+ "source": []
907
+ }
908
+ ],
909
+ "metadata": {
910
+ "kernelspec": {
911
+ "display_name": "Python [conda env:rebalance] *",
912
+ "language": "python",
913
+ "name": "conda-env-rebalance-py"
914
+ },
915
+ "language_info": {
916
+ "codemirror_mode": {
917
+ "name": "ipython",
918
+ "version": 3
919
+ },
920
+ "file_extension": ".py",
921
+ "mimetype": "text/x-python",
922
+ "name": "python",
923
+ "nbconvert_exporter": "python",
924
+ "pygments_lexer": "ipython3",
925
+ "version": "3.8.13"
926
+ }
927
+ },
928
+ "nbformat": 4,
929
+ "nbformat_minor": 5
930
+ }
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pandas
sample_data/sample_data_1.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Type Sport Year Company Product Card # Player Name Notes HOF Grader Grade Storage Box
2
+ Card Baseball 1985 Topps 401 Mark McGwire Rookie Sportscard Guaranty 86.0 268.0
3
+ Card Baseball 1952 Bowman 122 Joe Garagiola
4
+ Card Baseball 1948 Leaf 49 Del Ennis
5
+ Card Baseball 1949 Leaf 62 Eddie Joost SP Beckett 2.5 270.0
6
+ Card Baseball 1949 Leaf 113 Dutch Leonard SP Beckett 2.5 268.0
7
+ Card Basketball 1994 Competitive Images 8 Michael Jordan
8
+ Card Baseball 1949 Leaf 36 Al Zarilla SP Beckett 2.5 271.0
9
+ Card Baseball 1974 Topps 230 Tony Perez
10
+ Card Baseball 1951 Bowman 110 Bobby Brown
11
+ Card Baseball 2017 Topps Now OS-80 Shohei Ohtani
sample_data/sample_data_2.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ animal coat pattern Quantity
2
+ Leopard spots 2
3
+ Tiger stripes 10
4
+ Lion solid 1
5
+ Cheetah spots 1