RvanB's picture
Add files from other repo
fbf7e95
raw
history blame
2.8 kB
from collections import OrderedDict
import pymarc
def get_record_values(record, location):
split = location.split("$")
if len(split) == 1:
tag = split[0]
code = None
elif len(split) == 2:
tag, code = split
else:
raise ValueError("Invalid location")
# Find fields matching tag
fields = record.get_fields(tag)
results = []
for current_value in fields:
if current_value is not None:
if code is not None:
values = current_value.get_subfields(code)
results.extend(values)
elif isinstance(current_value, pymarc.Field):
results.append(current_value.value())
return " ".join(results)
def record_dict(record):
d = OrderedDict()
# Dump every field value into a string
d["raw"] = " ".join([f.value() for f in record.fields])
d["cid"] = get_record_values(record, "CID")
d["id"] = get_record_values(record, "001")
fixed_data = get_record_values(record, "008")
d["pub_date"] = fixed_data[7:11]
d["pub_place"] = fixed_data[15:18]
d["language"] = fixed_data[35:38]
d["title_a"] = get_record_values(record, "245$a")
d["title_b"] = get_record_values(record, "245$b")
d["title_c"] = get_record_values(record, "245$c")
d["title_p"] = get_record_values(record, "245$p")
d["title"] = " ".join([d["title_a"], d["title_b"], d["title_p"]])
d["title_variation_a"] = get_record_values(record, "246$a")
d["title_variation_b"] = get_record_values(record, "246$b")
d["subject_headings"] = " ".join(
get_record_values(record, "650$a") + get_record_values(record, "650$x")
)
d["author_names"] = " ".join(
[get_record_values(record, "100$a"), get_record_values(record, "700$a")]
)
d["corporate_names"] = " ".join(
[get_record_values(record, "110$a"), get_record_values(record, "710$a")]
)
d["meeting_names"] = " ".join(
[get_record_values(record, "111$a"), get_record_values(record, "711$a")]
)
d["publisher"] = record.publisher or ""
d["pagination"] = get_record_values(record, "300$a")
d["dimensions"] = get_record_values(record, "300$c")
return d
def load_records(path):
records = []
extension = path.split(".")[-1]
if extension == "mrc" or extension == "marc":
with open(path, "rb") as marcfile:
reader = pymarc.MARCReader(marcfile)
records.extend(list(reader))
elif extension == "json":
with open(path, "r") as jsonfile:
for line in jsonfile:
record = pymarc.parse_json_to_array(line)[0]
records.append(record)
else:
raise ValueError(f"Unsupported file extension: {extension}")
return records