Spaces:
No application file
No application file
# Copyright 1999 by Jeffrey Chang. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Code to work with Medline from the NCBI. | |
Classes: | |
- Record A dictionary holding Medline data. | |
Functions: | |
- read Reads one Medline record | |
- parse Allows you to iterate over a bunch of Medline records | |
""" | |
class Record(dict): | |
"""A dictionary holding information from a Medline record. | |
All data are stored under the mnemonic appearing in the Medline | |
file. These mnemonics have the following interpretations: | |
========= ============================== | |
Mnemonic Description | |
--------- ------------------------------ | |
AB Abstract | |
CI Copyright Information | |
AD Affiliation | |
IRAD Investigator Affiliation | |
AID Article Identifier | |
AU Author | |
FAU Full Author | |
CN Corporate Author | |
DCOM Date Completed | |
DA Date Created | |
LR Date Last Revised | |
DEP Date of Electronic Publication | |
DP Date of Publication | |
EDAT Entrez Date | |
GS Gene Symbol | |
GN General Note | |
GR Grant Number | |
IR Investigator Name | |
FIR Full Investigator Name | |
IS ISSN | |
IP Issue | |
TA Journal Title Abbreviation | |
JT Journal Title | |
LA Language | |
LID Location Identifier | |
MID Manuscript Identifier | |
MHDA MeSH Date | |
MH MeSH Terms | |
JID NLM Unique ID | |
RF Number of References | |
OAB Other Abstract | |
OCI Other Copyright Information | |
OID Other ID | |
OT Other Term | |
OTO Other Term Owner | |
OWN Owner | |
PG Pagination | |
PS Personal Name as Subject | |
FPS Full Personal Name as Subject | |
PL Place of Publication | |
PHST Publication History Status | |
PST Publication Status | |
PT Publication Type | |
PUBM Publishing Model | |
PMC PubMed Central Identifier | |
PMID PubMed Unique Identifier | |
RN Registry Number/EC Number | |
NM Substance Name | |
SI Secondary Source ID | |
SO Source | |
SFM Space Flight Mission | |
STAT Status | |
SB Subset | |
TI Title | |
TT Transliterated Title | |
VI Volume | |
CON Comment on | |
CIN Comment in | |
EIN Erratum in | |
EFR Erratum for | |
CRI Corrected and Republished in | |
CRF Corrected and Republished from | |
PRIN Partial retraction in | |
PROF Partial retraction of | |
RPI Republished in | |
RPF Republished from | |
RIN Retraction in | |
ROF Retraction of | |
UIN Update in | |
UOF Update of | |
SPIN Summary for patients in | |
ORI Original report in | |
========= ============================== | |
""" | |
def parse(handle): | |
"""Read Medline records one by one from the handle. | |
The handle is either is a Medline file, a file-like object, or a list | |
of lines describing one or more Medline records. | |
Typical usage:: | |
>>> from Bio import Medline | |
>>> with open("Medline/pubmed_result2.txt") as handle: | |
... records = Medline.parse(handle) | |
... for record in records: | |
... print(record['TI']) | |
... | |
A high level interface to SCOP and ASTRAL ... | |
GenomeDiagram: a python package for the visualization of ... | |
Open source clustering software. | |
PDB file parser and structure class implemented in Python. | |
""" | |
# These keys point to string values | |
textkeys = ( | |
"ID", | |
"PMID", | |
"SO", | |
"RF", | |
"NI", | |
"JC", | |
"TA", | |
"IS", | |
"CY", | |
"TT", | |
"CA", | |
"IP", | |
"VI", | |
"DP", | |
"YR", | |
"PG", | |
"LID", | |
"DA", | |
"LR", | |
"OWN", | |
"STAT", | |
"DCOM", | |
"PUBM", | |
"DEP", | |
"PL", | |
"JID", | |
"SB", | |
"PMC", | |
"EDAT", | |
"MHDA", | |
"PST", | |
"AB", | |
"EA", | |
"TI", | |
"JT", | |
) | |
handle = iter(handle) | |
key = "" | |
record = Record() | |
for line in handle: | |
line = line.rstrip() | |
if line[:6] == " ": # continuation line | |
if key in ["MH", "AD"]: | |
# Multi-line MESH term, want to append to last entry in list | |
record[key][-1] += line[5:] # including space using line[5:] | |
else: | |
record[key].append(line[6:]) | |
elif line: | |
key = line[:4].rstrip() | |
if key not in record: | |
record[key] = [] | |
record[key].append(line[6:]) | |
elif record: | |
# Join each list of strings into one string. | |
for key in record: | |
if key in textkeys: | |
record[key] = " ".join(record[key]) | |
yield record | |
record = Record() | |
if record: # catch last one | |
for key in record: | |
if key in textkeys: | |
record[key] = " ".join(record[key]) | |
yield record | |
def read(handle): | |
"""Read a single Medline record from the handle. | |
The handle is either is a Medline file, a file-like object, or a list | |
of lines describing a Medline record. | |
Typical usage: | |
>>> from Bio import Medline | |
>>> with open("Medline/pubmed_result1.txt") as handle: | |
... record = Medline.read(handle) | |
... print(record['TI']) | |
... | |
The Bio* toolkits--a brief overview. | |
""" | |
records = parse(handle) | |
return next(records) | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |