Spaces:
No application file
No application file
#!/usr/bin/env python | |
# Copyright 2003, by Kristian Rother. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
# | |
# PDBList.py | |
# | |
# A tool for tracking changes in the PDB Protein Structure Database. | |
# | |
# (c) 2003 Kristian Rother | |
# This work was supported by the German Ministry of Education | |
# and Research (BMBF). Project http://www.bcbio.de | |
# | |
# Contact the author | |
# homepage : http://www.rubor.de/bioinf | |
# email : [email protected] | |
# | |
# | |
# (c) 2016 Wiktoria Karwicka & Jacek Smietanski | |
# - updated and Python 3.x compatible code | |
# - new options to enable download PDBx/mmCif, PDBML and mmtf formatted | |
# files as well as large PDB bundles | |
# - unit tests for the module | |
# | |
# Contact the corresponding author | |
# homepage : http://jaceksmietanski.net | |
# email : [email protected] | |
# | |
# It may be distributed freely with respect to the original authors. | |
# Any maintainer of the Biopython code may change this notice | |
# when appropriate. | |
"""Access the PDB over the internet (e.g. to download structures).""" | |
import contextlib | |
import ftplib | |
import gzip | |
import os | |
import re | |
import shutil | |
import sys | |
from urllib.request import urlopen | |
from urllib.request import urlretrieve | |
from urllib.request import urlcleanup | |
class PDBList: | |
"""Quick access to the structure lists on the PDB or its mirrors. | |
This class provides quick access to the structure lists on the | |
PDB server or its mirrors. The structure lists contain | |
four-letter PDB codes, indicating that structures are | |
new, have been modified or are obsolete. The lists are released | |
on a weekly basis. | |
It also provides a function to retrieve PDB files from the server. | |
To use it properly, prepare a directory /pdb or the like, | |
where PDB files are stored. | |
All available file formats (PDB, PDBx/mmCif, PDBML, mmtf) are supported. | |
Please note that large structures (containing >62 chains | |
and/or 99999 ATOM lines) are no longer stored as a single PDB file | |
and by default (when PDB format selected) are not downloaded. | |
Large structures can be downloaded in other formats, including PDBx/mmCif | |
or as a .tar file (a collection of PDB-like formatted files for a given | |
structure). | |
If you want to use this module from inside a proxy, add | |
the proxy variable to your environment, e.g. in Unix: | |
export HTTP_PROXY='http://realproxy.charite.de:888' | |
(This can also be added to ~/.bashrc) | |
""" | |
PDB_REF = """ | |
The Protein Data Bank: a computer-based archival file for macromolecular structures. | |
F.C.Bernstein, T.F.Koetzle, G.J.B.Williams, E.F.Meyer Jr, M.D.Brice, J.R.Rodgers, O.Kennard, T.Shimanouchi, M.Tasumi | |
J. Mol. Biol. 112 pp. 535-542 (1977) | |
http://www.pdb.org/. | |
""" | |
def __init__( | |
self, server="ftp://ftp.wwpdb.org", pdb=None, obsolete_pdb=None, verbose=True | |
): | |
"""Initialize the class with the default server or a custom one. | |
Argument pdb is the local path to use, defaulting to the current | |
directory at the moment of initialisation. | |
""" | |
self.pdb_server = server # remote pdb server | |
if pdb: | |
self.local_pdb = pdb # local pdb file tree | |
else: | |
self.local_pdb = os.getcwd() | |
# enable or disable verbose | |
self._verbose = verbose | |
# local file tree for obsolete pdb files | |
if obsolete_pdb: | |
self.obsolete_pdb = obsolete_pdb | |
else: | |
self.obsolete_pdb = os.path.join(self.local_pdb, "obsolete") | |
if not os.access(self.obsolete_pdb, os.F_OK): | |
os.makedirs(self.obsolete_pdb) | |
# variable for command-line option | |
self.flat_tree = False | |
def _print_default_format_warning(file_format): | |
"""Print a warning to stdout (PRIVATE). | |
Temporary warning (similar to a deprecation warning) that files | |
are being downloaded in mmCIF. | |
""" | |
if file_format is None: | |
sys.stderr.write( | |
"WARNING: The default download format has changed from PDB to PDBx/mmCif\n" | |
) | |
return "mmCif" | |
return file_format | |
def get_status_list(url): | |
"""Retrieve a list of pdb codes in the weekly pdb status file from given URL. | |
Used by get_recent_changes. Typical contents of the list files parsed | |
by this method is now very simply - one PDB name per line. | |
""" | |
with contextlib.closing(urlopen(url)) as handle: | |
answer = [] | |
for line in handle: | |
pdb = line.strip() | |
assert len(pdb) == 4 | |
answer.append(pdb.decode()) | |
return answer | |
def get_recent_changes(self): | |
"""Return three lists of the newest weekly files (added,mod,obsolete). | |
Reads the directories with changed entries from the PDB server and | |
returns a tuple of three URL's to the files of new, modified and | |
obsolete entries from the most recent list. The directory with the | |
largest numerical name is used. | |
Returns None if something goes wrong. | |
Contents of the data/status dir (20031013 would be used);: | |
drwxrwxr-x 2 1002 sysadmin 512 Oct 6 18:28 20031006 | |
drwxrwxr-x 2 1002 sysadmin 512 Oct 14 02:14 20031013 | |
-rw-r--r-- 1 1002 sysadmin 1327 Mar 12 2001 README | |
""" | |
path = self.pdb_server + "/pub/pdb/data/status/latest/" | |
# Retrieve the lists | |
added = self.get_status_list(path + "added.pdb") | |
modified = self.get_status_list(path + "modified.pdb") | |
obsolete = self.get_status_list(path + "obsolete.pdb") | |
return [added, modified, obsolete] | |
def get_all_entries(self): | |
"""Retrieve the big file containing all the PDB entries and some annotation. | |
Returns a list of PDB codes in the index file. | |
""" | |
url = self.pdb_server + "/pub/pdb/derived_data/index/entries.idx" | |
if self._verbose: | |
print("Retrieving index file. Takes about 27 MB.") | |
with contextlib.closing(urlopen(url)) as handle: | |
all_entries = [ | |
line[:4].decode() for line in handle.readlines()[2:] if len(line) > 4 | |
] | |
return all_entries | |
def get_all_obsolete(self): | |
"""Return a list of all obsolete entries ever in the PDB. | |
Returns a list of all obsolete pdb codes that have ever been | |
in the PDB. | |
Gets and parses the file from the PDB server in the format | |
(the first pdb_code column is the one used). The file looks | |
like this:: | |
LIST OF OBSOLETE COORDINATE ENTRIES AND SUCCESSORS | |
OBSLTE 31-JUL-94 116L 216L | |
... | |
OBSLTE 29-JAN-96 1HFT 2HFT | |
OBSLTE 21-SEP-06 1HFV 2J5X | |
OBSLTE 21-NOV-03 1HG6 | |
OBSLTE 18-JUL-84 1HHB 2HHB 3HHB | |
OBSLTE 08-NOV-96 1HID 2HID | |
OBSLTE 01-APR-97 1HIU 2HIU | |
OBSLTE 14-JAN-04 1HKE 1UUZ | |
... | |
""" | |
url = self.pdb_server + "/pub/pdb/data/status/obsolete.dat" | |
with contextlib.closing(urlopen(url)) as handle: | |
# Extract pdb codes. Could use a list comprehension, but I want | |
# to include an assert to check for mis-reading the data. | |
obsolete = [] | |
for line in handle: | |
if not line.startswith(b"OBSLTE "): | |
continue | |
pdb = line.split()[2] | |
assert len(pdb) == 4 | |
obsolete.append(pdb.decode()) | |
return obsolete | |
def retrieve_pdb_file( | |
self, pdb_code, obsolete=False, pdir=None, file_format=None, overwrite=False | |
): | |
"""Fetch PDB structure file from PDB server, and store it locally. | |
The PDB structure's file name is returned as a single string. | |
If obsolete ``==`` True, the file will be saved in a special file tree. | |
NOTE. The default download format has changed from PDB to PDBx/mmCif | |
:param pdb_code: 4-symbols structure Id from PDB (e.g. 3J92). | |
:type pdb_code: string | |
:param file_format: | |
File format. Available options: | |
* "mmCif" (default, PDBx/mmCif file), | |
* "pdb" (format PDB), | |
* "xml" (PDBML/XML format), | |
* "mmtf" (highly compressed), | |
* "bundle" (PDB formatted archive for large structure) | |
:type file_format: string | |
:param overwrite: if set to True, existing structure files will be overwritten. Default: False | |
:type overwrite: bool | |
:param obsolete: | |
Has a meaning only for obsolete structures. If True, download the obsolete structure | |
to 'obsolete' folder, otherwise download won't be performed. | |
This option doesn't work for mmtf format as obsoleted structures aren't stored in mmtf. | |
Also doesn't have meaning when parameter pdir is specified. | |
Note: make sure that you are about to download the really obsolete structure. | |
Trying to download non-obsolete structure into obsolete folder will not work | |
and you face the "structure doesn't exists" error. | |
Default: False | |
:type obsolete: bool | |
:param pdir: put the file in this directory (default: create a PDB-style directory tree) | |
:type pdir: string | |
:return: filename | |
:rtype: string | |
""" | |
# Deprecation warning | |
file_format = self._print_default_format_warning(file_format) | |
# Get the compressed PDB structure | |
pdb_code = pdb_code.lower() | |
archive = { | |
"pdb": f"pdb{pdb_code}.ent.gz", | |
"mmCif": f"{pdb_code}.cif.gz", | |
"xml": f"{pdb_code}.xml.gz", | |
"mmtf": f"{pdb_code}", | |
"bundle": f"{pdb_code}-pdb-bundle.tar.gz", | |
} | |
archive_fn = archive[file_format] | |
if file_format not in archive.keys(): | |
raise ( | |
f"Specified file_format {file_format} doesn't exists or is not supported. Maybe a " | |
"typo. Please, use one of the following: mmCif, pdb, xml, mmtf, bundle" | |
) | |
if file_format in ("pdb", "mmCif", "xml"): | |
pdb_dir = "divided" if not obsolete else "obsolete" | |
file_type = ( | |
"pdb" | |
if file_format == "pdb" | |
else "mmCIF" | |
if file_format == "mmCif" | |
else "XML" | |
) | |
url = ( | |
self.pdb_server | |
+ f"/pub/pdb/data/structures/{pdb_dir}/{file_type}/{pdb_code[1:3]}/{archive_fn}" | |
) | |
elif file_format == "bundle": | |
url = ( | |
self.pdb_server | |
+ f"/pub/pdb/compatible/pdb_bundle/{pdb_code[1:3]}/{pdb_code}/{archive_fn}" | |
) | |
else: | |
url = f"http://mmtf.rcsb.org/v1.0/full/{pdb_code}" | |
# Where does the final PDB file get saved? | |
if pdir is None: | |
path = self.local_pdb if not obsolete else self.obsolete_pdb | |
if not self.flat_tree: # Put in PDB-style directory tree | |
path = os.path.join(path, pdb_code[1:3]) | |
else: # Put in specified directory | |
path = pdir | |
if not os.access(path, os.F_OK): | |
os.makedirs(path) | |
filename = os.path.join(path, archive_fn) | |
final = { | |
"pdb": f"pdb{pdb_code}.ent", | |
"mmCif": f"{pdb_code}.cif", | |
"xml": f"{pdb_code}.xml", | |
"mmtf": f"{pdb_code}.mmtf", | |
"bundle": f"{pdb_code}-pdb-bundle.tar", | |
} | |
final_file = os.path.join(path, final[file_format]) | |
# Skip download if the file already exists | |
if not overwrite: | |
if os.path.exists(final_file): | |
if self._verbose: | |
print(f"Structure exists: '{final_file}' ") | |
return final_file | |
# Retrieve the file(s) | |
if self._verbose: | |
print(f"Downloading PDB structure '{pdb_code}'...") | |
try: | |
urlcleanup() | |
urlretrieve(url, filename) | |
except OSError: | |
print("Desired structure doesn't exist") | |
else: | |
with gzip.open(filename, "rb") as gz: | |
with open(final_file, "wb") as out: | |
out.writelines(gz) | |
os.remove(filename) | |
return final_file | |
def update_pdb(self, file_format=None, with_assemblies=False): | |
"""Update your local copy of the PDB files. | |
I guess this is the 'most wanted' function from this module. | |
It gets the weekly lists of new and modified pdb entries and | |
automatically downloads the according PDB files. | |
You can call this module as a weekly cron job. | |
""" | |
assert os.path.isdir(self.local_pdb) | |
assert os.path.isdir(self.obsolete_pdb) | |
# Deprecation warning | |
file_format = self._print_default_format_warning(file_format) | |
new, modified, obsolete = self.get_recent_changes() | |
for pdb_code in new + modified: | |
try: | |
self.retrieve_pdb_file(pdb_code, file_format=file_format) | |
if with_assemblies: | |
assemblies = self.get_all_assemblies(file_format) | |
for a_pdb_code, assembly_num in assemblies: | |
if a_pdb_code == pdb_code: | |
pl.retrieve_assembly_file( | |
pdb_code, | |
assembly_num, | |
file_format=file_format, | |
overwrite=True, | |
) | |
except Exception as err: | |
print(f"error {pdb_code}: {err}\n") | |
# you can insert here some more log notes that | |
# something has gone wrong. | |
# Move the obsolete files to a special folder | |
# NOTE: This should be updated to handle multiple file types and | |
# assemblies. As of now, it only looks for PDB-formatted files. | |
# Using pathlib will be probably the best approach here, to build | |
# and index of which files we have efficiently (or glob them). | |
for pdb_code in obsolete: | |
if self.flat_tree: | |
old_file = os.path.join(self.local_pdb, f"pdb{pdb_code}.ent") | |
new_dir = self.obsolete_pdb | |
else: | |
old_file = os.path.join( | |
self.local_pdb, pdb_code[1:3], f"pdb{pdb_code}.ent" | |
) | |
new_dir = os.path.join(self.obsolete_pdb, pdb_code[1:3]) | |
new_file = os.path.join(new_dir, f"pdb{pdb_code}.ent") | |
if os.path.isfile(old_file): | |
if not os.path.isdir(new_dir): | |
os.mkdir(new_dir) | |
try: | |
shutil.move(old_file, new_file) | |
except Exception: | |
print(f"Could not move {old_file} to obsolete folder") | |
elif os.path.isfile(new_file): | |
if self._verbose: | |
print(f"Obsolete file {old_file} already moved") | |
else: | |
if self._verbose: | |
print(f"Obsolete file {old_file} is missing") | |
def download_pdb_files( | |
self, pdb_codes, obsolete=False, pdir=None, file_format=None, overwrite=False | |
): | |
"""Fetch set of PDB structure files from the PDB server and stores them locally. | |
The PDB structure's file name is returned as a single string. | |
If obsolete ``==`` True, the files will be saved in a special file tree. | |
:param pdb_codes: a list of 4-symbols structure Ids from PDB | |
:type pdb_codes: list of strings | |
:param file_format: | |
File format. Available options: | |
* "mmCif" (default, PDBx/mmCif file), | |
* "pdb" (format PDB), | |
* "xml" (PMDML/XML format), | |
* "mmtf" (highly compressed), | |
* "bundle" (PDB formatted archive for large structure) | |
:param overwrite: if set to True, existing structure files will be overwritten. Default: False | |
:type overwrite: bool | |
:param obsolete: | |
Has a meaning only for obsolete structures. | |
If True, download the obsolete structure | |
to 'obsolete' folder, otherwise download won't be performed. | |
This option doesn't work for mmtf format as obsoleted structures are not available as mmtf. | |
(default: False) | |
:type obsolete: bool | |
:param pdir: put the file in this directory (default: create a PDB-style directory tree) | |
:type pdir: string | |
:return: filenames | |
:rtype: string | |
""" | |
# Deprecation warning | |
file_format = self._print_default_format_warning(file_format) | |
for pdb_code in pdb_codes: | |
self.retrieve_pdb_file( | |
pdb_code, | |
obsolete=obsolete, | |
pdir=pdir, | |
file_format=file_format, | |
overwrite=overwrite, | |
) | |
def get_all_assemblies(self, file_format="mmCif"): | |
"""Retrieve the list of PDB entries with an associated bio assembly. | |
The requested list will be cached to avoid multiple calls to the FTP | |
server. | |
:type file_format: str, optional | |
:param file_format: format in which to download the entries. Available | |
options are "mmCif" or "pdb". Defaults to mmCif. | |
""" | |
if hasattr(self, "assemblies") and self.assemblies: | |
if self._verbose: | |
print("Retrieving cached list of assemblies.") | |
return self.assemblies # cache | |
if self._verbose: | |
print("Retrieving list of assemblies. This might take a while.") | |
# FTPLib is much faster than urlopen | |
idx = self.pdb_server.find("://") | |
if idx >= 0: | |
ftp = ftplib.FTP(self.pdb_server[idx + 3 :]) | |
else: | |
ftp = ftplib.FTP(self.pdb_server) | |
ftp.login() # anonymous | |
if file_format.lower() == "mmcif": | |
ftp.cwd("/pub/pdb/data/assemblies/mmCIF/all/") | |
re_name = re.compile(r"(\d[0-9a-z]{3})-assembly(\d+).cif.gz") | |
elif file_format.lower() == "pdb": | |
ftp.cwd("/pub/pdb/data/biounit/PDB/all/") | |
re_name = re.compile(r"(\d[0-9a-z]{3}).pdb(\d+).gz") | |
else: | |
msg = "file_format for assemblies must be 'pdb' or 'mmCif'" | |
raise ValueError(msg) | |
response = [] | |
ftp.retrlines("NLST", callback=response.append) | |
# PDB entries can have more than one assembly: | |
# e.g. | |
# 104l-assembly1.cif.gz 2022-01-15 19:38 36K | |
# 104l-assembly2.cif.gz 2022-01-15 19:38 36K | |
# ... | |
# 104l.pdb1.gz 2010-01-15 08:00 28K | |
# 104l.pdb2.gz 2010-01-15 08:00 28K | |
all_assemblies = [] | |
for line in response: | |
if line.endswith(".gz"): | |
match = re_name.findall(line) | |
try: | |
if len(match): | |
entry, assembly = match[0] | |
except ValueError: | |
pass | |
else: | |
all_assemblies.append((entry, assembly)) | |
self.assemblies = all_assemblies # cache | |
return all_assemblies | |
def retrieve_assembly_file( | |
self, pdb_code, assembly_num, pdir=None, file_format=None, overwrite=False | |
): | |
"""Fetch one or more assembly structures associated with a PDB entry. | |
Unless noted below, parameters are described in ``retrieve_pdb_file``. | |
:type assembly_num: int | |
:param assembly_num: assembly number to download. | |
:rtype : str | |
:return: file name of the downloaded assembly file. | |
""" | |
pdb_code = pdb_code.lower() | |
assembly_num = int(assembly_num) | |
archive = { | |
"pdb": f"{pdb_code}.pdb{assembly_num}.gz", | |
"mmcif": f"{pdb_code}-assembly{assembly_num}.cif.gz", | |
} | |
file_format = self._print_default_format_warning(file_format) | |
file_format = file_format.lower() # we should standardize this. | |
if file_format not in archive: | |
raise ( | |
f"Specified file_format '{file_format}' is not supported. Use one of the " | |
"following: 'mmcif' or 'pdb'." | |
) | |
# Get the compressed assembly structure name | |
archive_fn = archive[file_format] | |
if file_format == "mmcif": | |
url = self.pdb_server + f"/pub/pdb/data/assemblies/mmCIF/all/{archive_fn}" | |
elif file_format == "pdb": | |
url = self.pdb_server + f"/pub/pdb/data/biounit/PDB/all/{archive_fn}" | |
else: # better safe than sorry | |
raise ValueError(f"file_format '{file_format}' not supported") | |
# Where will the file be saved? | |
if pdir is None: | |
path = self.local_pdb | |
if not self.flat_tree: # Put in PDB-style directory tree | |
path = os.path.join(path, pdb_code[1:3]) | |
else: # Put in specified directory | |
path = pdir | |
if not os.access(path, os.F_OK): | |
os.makedirs(path) | |
assembly_gz_file = os.path.join(path, archive_fn) | |
assembly_final_file = os.path.join(path, archive_fn[:-3]) # no .gz | |
# Skip download if the file already exists | |
if not overwrite: | |
if os.path.exists(assembly_final_file): | |
if self._verbose: | |
print(f"Structure exists: '{assembly_final_file}' ") | |
return assembly_final_file | |
# Otherwise,retrieve the file(s) | |
if self._verbose: | |
print( | |
f"Downloading assembly ({assembly_num}) for PDB entry " | |
f"'{pdb_code}'..." | |
) | |
try: | |
urlcleanup() | |
urlretrieve(url, assembly_gz_file) | |
except OSError as err: | |
print(f"Download failed! Maybe the desired assembly does not exist: {err}") | |
else: | |
with gzip.open(assembly_gz_file, "rb") as gz: | |
with open(assembly_final_file, "wb") as out: | |
out.writelines(gz) | |
os.remove(assembly_gz_file) | |
return assembly_final_file | |
def download_all_assemblies(self, listfile=None, file_format=None): | |
"""Retrieve all biological assemblies not in the local PDB copy. | |
:type listfile: str, optional | |
:param listfile: file name to which all assembly codes will be written | |
:type file_format: str, optional | |
:param file_format: format in which to download the entries. Available | |
options are "mmCif" or "pdb". Defaults to mmCif. | |
""" | |
# Deprecation warning | |
file_format = self._print_default_format_warning(file_format) | |
assemblies = self.get_all_assemblies(file_format) | |
for pdb_code, assembly_num in assemblies: | |
self.retrieve_assembly_file(pdb_code, assembly_num, file_format=file_format) | |
# Write the list | |
if listfile: | |
with open(listfile, "w") as outfile: | |
outfile.writelines(f"{pdb_code}.{assembly_num}\n" for x in assemblies) | |
def download_entire_pdb(self, listfile=None, file_format=None): | |
"""Retrieve all PDB entries not present in the local PDB copy. | |
:param listfile: filename to which all PDB codes will be written (optional) | |
:param file_format: | |
File format. Available options: | |
* "mmCif" (default, PDBx/mmCif file), | |
* "pdb" (format PDB), | |
* "xml" (PMDML/XML format), | |
* "mmtf" (highly compressed), | |
* "bundle" (PDB formatted archive for large structure) | |
NOTE. The default download format has changed from PDB to PDBx/mmCif | |
""" | |
# Deprecation warning | |
file_format = self._print_default_format_warning(file_format) | |
entries = self.get_all_entries() | |
for pdb_code in entries: | |
self.retrieve_pdb_file(pdb_code, file_format=file_format) | |
# Write the list | |
if listfile: | |
with open(listfile, "w") as outfile: | |
outfile.writelines(x + "\n" for x in entries) | |
def download_obsolete_entries(self, listfile=None, file_format=None): | |
"""Retrieve all obsolete PDB entries not present in local obsolete PDB copy. | |
:param listfile: filename to which all PDB codes will be written (optional) | |
:param file_format: file format. Available options: | |
"mmCif" (default, PDBx/mmCif file), | |
"pdb" (format PDB), | |
"xml" (PMDML/XML format), | |
NOTE. The default download format has changed from PDB to PDBx/mmCif | |
""" | |
# Deprecation warning | |
file_format = self._print_default_format_warning(file_format) | |
entries = self.get_all_obsolete() | |
for pdb_code in entries: | |
self.retrieve_pdb_file(pdb_code, obsolete=True, file_format=file_format) | |
# Write the list | |
if listfile: | |
with open(listfile, "w") as outfile: | |
outfile.writelines(x + "\n" for x in entries) | |
def get_seqres_file(self, savefile="pdb_seqres.txt"): | |
"""Retrieve and save a (big) file containing all the sequences of PDB entries.""" | |
if self._verbose: | |
print("Retrieving sequence file (takes over 110 MB).") | |
url = self.pdb_server + "/pub/pdb/derived_data/pdb_seqres.txt" | |
urlretrieve(url, savefile) | |
if __name__ == "__main__": | |
doc = """PDBList.py | |
(c) Kristian Rother 2003, Wiktoria Karwicka & Jacek Smietanski 2016 | |
Contributed to Biopython | |
Usage:: | |
PDBList.py update <pdb_path> [options] - write weekly PDB updates to | |
local pdb tree. | |
PDBList.py all <pdb_path> [options] - write all PDB entries to | |
local pdb tree. | |
PDBList.py obsol <pdb_path> [options] - write all obsolete PDB | |
entries to local pdb tree. | |
PDBList.py assemb <pdb_path> [options] - write all assemblies for each | |
PDB entry to local pdb tree. | |
PDBList.py <PDB-ID> <pdb_path> [options] - retrieve single structure | |
PDBList.py (<PDB-ID1>,<PDB-ID2>,...) <pdb_path> [options] - retrieve a set | |
of structures | |
Options: | |
-d A single directory will be used as <pdb_path>, not a tree. | |
-o Overwrite existing structure files. | |
-pdb Downloads structures in PDB format | |
-xml Downloads structures in PDBML (XML) format | |
-mmtf Downloads structures in mmtf format | |
-with-assemblies Downloads assemblies along with regular entries. | |
Maximum one format can be specified simultaneously (if more selected, only | |
the last will be considered). By default (no format specified) structures are | |
downloaded as PDBx/mmCif files. | |
""" | |
print(doc) | |
file_format = "mmCif" | |
overwrite = False | |
with_assemblies = False | |
if len(sys.argv) > 2: | |
pdb_path = sys.argv[2] | |
pl = PDBList(pdb=pdb_path) | |
if len(sys.argv) > 3: | |
for option in sys.argv[3:]: | |
if option == "-d": | |
pl.flat_tree = True | |
elif option == "-o": | |
overwrite = True | |
elif option in ("-pdb", "-xml", "-mmtf"): | |
file_format = option[1:] | |
# Allow for download of assemblies alongside ASU | |
elif option == "-with-assemblies": | |
with_assemblies = True | |
else: | |
pdb_path = os.getcwd() | |
pl = PDBList() | |
pl.flat_tree = True | |
if len(sys.argv) > 1: | |
if sys.argv[1] == "update": | |
# update PDB | |
print("updating local PDB at " + pdb_path) | |
pl.update_pdb(file_format=file_format, with_assemblies=with_assemblies) | |
elif sys.argv[1] == "all": | |
# get the entire PDB | |
pl.download_entire_pdb(file_format=file_format) | |
if with_assemblies: | |
# get all assembly structures | |
pl.download_all_assemblies(file_format=file_format) | |
elif sys.argv[1] == "obsol": | |
# get all obsolete entries | |
pl.download_obsolete_entries(pdb_path, file_format=file_format) | |
elif sys.argv[1] == "assemb": | |
# get all assembly structures | |
pl.download_all_assemblies(file_format=file_format) | |
elif len(sys.argv[1]) == 4 and sys.argv[1][0].isdigit(): | |
pdb_code = sys.argv[1] | |
# get single PDB entry | |
pl.retrieve_pdb_file( | |
pdb_code, pdir=pdb_path, file_format=file_format, overwrite=overwrite | |
) | |
if with_assemblies: | |
# PDB Code might have more than one assembly. | |
assemblies = pl.get_all_assemblies(file_format) | |
for a_pdb_code, assembly_num in assemblies: | |
if a_pdb_code == pdb_code: | |
pl.retrieve_assembly_file( | |
pdb_code, | |
assembly_num, | |
pdir=pdb_path, | |
file_format=file_format, | |
overwrite=overwrite, | |
) | |
elif sys.argv[1][0] == "(": | |
# get a set of PDB entries | |
pdb_ids = re.findall("[0-9A-Za-z]{4}", sys.argv[1]) | |
for pdb_id in pdb_ids: | |
pl.retrieve_pdb_file( | |
pdb_id, pdir=pdb_path, file_format=file_format, overwrite=overwrite | |
) | |
if with_assemblies: | |
# PDB Code might have more than one assembly. | |
assemblies = pl.get_all_assemblies(file_format) | |
for a_pdb_code, assembly_num in assemblies: | |
if a_pdb_code == pdb_id: | |
pl.retrieve_assembly_file( | |
pdb_id, | |
assembly_num, | |
pdir=pdb_path, | |
file_format=file_format, | |
overwrite=overwrite, | |
) | |