File size: 4,157 Bytes
b8175a8 02a9726 5b50998 98950ac 5b50998 02a9726 5b50998 02a9726 5b50998 98950ac b8175a8 5b50998 b8175a8 5b50998 b8175a8 5b50998 b8175a8 5b50998 a5b0df3 98950ac b8175a8 98950ac a5b0df3 2c8b144 98950ac 02a9726 7fccc04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import uuid
import gradio as gr
from pathlib import Path
from Bio.PDB import MMCIFParser, PDBIO
from folding_studio.commands.predict import af2 as af2_predict
from folding_studio.commands.predict import boltz as boltz_predict
from folding_studio.config import FOLDING_PROJECT_CODE
import logging
from molecule import molecule
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
]
)
logger = logging.getLogger(__name__)
def convert_cif_to_pdb(cif_path, pdb_path):
"""Convert a .cif file to .pdb format using Biopython.
Args:
cif_path (str): Path to input .cif file
pdb_path (str): Path to output .pdb file
"""
# Parse the CIF file
parser = MMCIFParser()
structure = parser.get_structure("structure", cif_path)
# Save as PDB
io = PDBIO()
io.set_structure(structure)
io.save(pdb_path)
def predict(sequence: str) -> str:
"""Predict protein structure from amino acid sequence using Boltz model.
Args:
sequence (str): Amino acid sequence to predict structure for
Returns:
str: HTML iframe containing 3D molecular visualization
"""
# Create FASTA file with sequence
seq_file = Path("sequence.fasta")
_write_fasta_file(seq_file, sequence)
# Set up unique output directory
seq_id = str(uuid.uuid4())
output_dir = Path(f"sequence_{seq_id}")
output_dir.mkdir(parents=True, exist_ok=True)
# Run Boltz prediction
logger.info(f"Predicting {seq_file.stem} with project code {FOLDING_PROJECT_CODE}")
boltz_predict(
source=seq_file,
project_code=FOLDING_PROJECT_CODE,
output=output_dir,
unzip=True
)
logger.info("Prediction done. Output directory: %s", output_dir)
# Convert output CIF to PDB
pred_cif = list(output_dir.rglob("*_model_0.cif"))[0]
logger.info("Output file: %s", pred_cif)
converted_pdb_path = output_dir / "pred.pdb"
convert_cif_to_pdb(str(pred_cif), str(converted_pdb_path))
logger.info("Converted PDB file: %s", converted_pdb_path)
# Generate molecular visualization
mol = _create_molecule_visualization(
converted_pdb_path,
sequence,
output_dir
)
return _wrap_in_iframe(mol)
def _write_fasta_file(filepath: Path, sequence: str) -> None:
"""Write sequence to FASTA file."""
with open(filepath, "w") as f:
f.write(f">A|protein\n{sequence}")
def _create_molecule_visualization(pdb_path: Path, sequence: str, output_dir: Path) -> str:
"""Create molecular visualization using molecule module."""
return molecule(
str(pdb_path),
str(pdb_path),
lenSeqs=1,
num_res=len(sequence),
selectedResidues=list(range(1, len(sequence) + 1)),
allSeqs=[sequence],
sequences=[{
"Score": 0,
"RMSD": 0,
"Recovery": 0,
"Mean pLDDT": 0,
"seq": sequence
}],
random_dir=output_dir
)
def _wrap_in_iframe(content: str) -> str:
"""Wrap content in an HTML iframe with appropriate styling and permissions."""
return f"""<iframe
name="result"
allow="midi; geolocation; microphone; camera; display-capture; encrypted-media;"
sandbox="allow-modals allow-forms allow-scripts allow-same-origin allow-popups allow-top-navigation-by-user-activation allow-downloads"
allowfullscreen=""
allowpaymentrequest=""
frameborder="0"
srcdoc='{content}'
></iframe>"""
demo = gr.Blocks(title="Folding Studio: structure prediction with Boltz-1")
with demo:
gr.Markdown("# Input")
with gr.Row():
with gr.Column():
sequence = gr.Textbox(label="Sequence", value="")
gr.Markdown("# Output")
with gr.Row():
predict_btn = gr.Button("Predict")
with gr.Row():
mol_output = gr.HTML()
predict_btn.click(
fn=predict,
inputs=[sequence],
outputs=[mol_output]
)
demo.launch()
|