jfaustin's picture
Add caching to the instance
e557ff0
raw
history blame
4.52 kB
import hashlib
import gradio as gr
from pathlib import Path
from Bio.PDB import MMCIFParser, PDBIO
from folding_studio.commands.predict import af2 as af2_predict
from folding_studio.commands.predict import boltz as boltz_predict
from folding_studio.config import FOLDING_PROJECT_CODE
import logging
from molecule import molecule
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
]
)
logger = logging.getLogger(__name__)
def convert_cif_to_pdb(cif_path, pdb_path):
"""Convert a .cif file to .pdb format using Biopython.
Args:
cif_path (str): Path to input .cif file
pdb_path (str): Path to output .pdb file
"""
# Parse the CIF file
parser = MMCIFParser()
structure = parser.get_structure("structure", cif_path)
# Save as PDB
io = PDBIO()
io.set_structure(structure)
io.save(pdb_path)
def predict(sequence: str) -> str:
"""Predict protein structure from amino acid sequence using Boltz model.
Args:
sequence (str): Amino acid sequence to predict structure for
Returns:
str: HTML iframe containing 3D molecular visualization
"""
# Create FASTA file with sequence
seq_file = Path("sequence.fasta")
_write_fasta_file(seq_file, sequence)
# Set up unique output directory based on sequence hash
seq_id = hashlib.sha1(sequence.encode()).hexdigest()
output_dir = Path(f"sequence_{seq_id}")
output_dir.mkdir(parents=True, exist_ok=True)
# Check if prediction already exists
pred_cif = list(output_dir.rglob("*_model_0.cif"))
if not pred_cif:
# Run Boltz prediction
logger.info(f"Predicting {seq_file.stem} with project code {FOLDING_PROJECT_CODE}")
boltz_predict(
source=seq_file,
project_code=FOLDING_PROJECT_CODE,
output=output_dir,
unzip=True
)
logger.info("Prediction done. Output directory: %s", output_dir)
else:
logger.info("Prediction already exists. Output directory: %s", output_dir)
# # TODO: remove this
# output_dir = Path("boltz_results")
# Convert output CIF to PDB
pred_cif = list(output_dir.rglob("*_model_0.cif"))[0]
logger.info("Output file: %s", pred_cif)
converted_pdb_path = str(output_dir / "pred.pdb")
convert_cif_to_pdb(str(pred_cif), str(converted_pdb_path))
logger.info("Converted PDB file: %s", converted_pdb_path)
# Generate molecular visualization
mol = _create_molecule_visualization(
converted_pdb_path,
sequence,
output_dir
)
return _wrap_in_iframe(mol)
def _write_fasta_file(filepath: Path, sequence: str) -> None:
"""Write sequence to FASTA file."""
with open(filepath, "w") as f:
f.write(f">A|protein\n{sequence}")
def _create_molecule_visualization(pdb_path: Path, sequence: str, output_dir: Path) -> str:
"""Create molecular visualization using molecule module."""
return molecule(
str(pdb_path),
lenSeqs=1,
num_res=len(sequence),
selectedResidues=list(range(1, len(sequence) + 1)),
allSeqs=[sequence],
sequences=[{
"Score": 0,
"RMSD": 0,
"Recovery": 0,
"Mean pLDDT": 0,
"seq": sequence
}],
)
def _wrap_in_iframe(content: str) -> str:
"""Wrap content in an HTML iframe with appropriate styling and permissions."""
return f"""<iframe
name="result"
style="width: 100%; height: 100vh;"
allow="midi; geolocation; microphone; camera; display-capture; encrypted-media;"
sandbox="allow-modals allow-forms allow-scripts allow-same-origin allow-popups allow-top-navigation-by-user-activation allow-downloads"
allowfullscreen=""
allowpaymentrequest=""
frameborder="0"
srcdoc='{content}'
></iframe>"""
demo = gr.Blocks(title="Folding Studio: structure prediction with Boltz-1")
with demo:
gr.Markdown("# Input")
with gr.Row():
with gr.Column():
sequence = gr.Textbox(label="Sequence", value="")
gr.Markdown("# Output")
with gr.Row():
predict_btn = gr.Button("Predict")
with gr.Row():
mol_output = gr.HTML()
predict_btn.click(
fn=predict,
inputs=[sequence],
outputs=[mol_output]
)
demo.launch()