|
|
|
import pandas as pd |
|
import csv |
|
|
|
|
|
|
|
file_dir = "../" |
|
extracted_file_path = file_dir + "2022_08_case_notes.txt" |
|
parquet_file_path = file_dir + "2022_08_case_notes.parquet" |
|
|
|
|
|
|
|
csv.field_size_limit(1000000) |
|
|
|
data_list = [] |
|
with open(extracted_file_path, mode='r', encoding='iso-8859-1') as file: |
|
csv_reader = csv.reader(file, delimiter=',') |
|
for row in csv_reader: |
|
data_list.append(row) |
|
|
|
|
|
header = data_list[0] |
|
filtered_data = [row for row in data_list if len(row) == len(header)] |
|
|
|
|
|
casenotes = pd.DataFrame(filtered_data[1:], columns=header) |
|
|
|
print(casenotes.head()) |
|
|
|
|
|
casenotes.to_parquet(parquet_file_path) |
|
|
|
|
|
|