File size: 4,157 Bytes
b8175a8
02a9726
5b50998
98950ac
5b50998
 
 
 
02a9726
5b50998
02a9726
5b50998
 
 
 
 
 
 
 
 
 
 
98950ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8175a8
 
 
 
 
 
 
 
 
 
5b50998
b8175a8
 
 
 
 
5b50998
b8175a8
 
 
 
 
 
 
 
 
5b50998
b8175a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b50998
a5b0df3
98950ac
 
 
 
 
b8175a8
98950ac
 
 
a5b0df3
2c8b144
98950ac
 
 
 
 
 
 
02a9726
7fccc04
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import uuid
import gradio as gr
from pathlib import Path
from Bio.PDB import MMCIFParser, PDBIO
from folding_studio.commands.predict import af2 as af2_predict
from folding_studio.commands.predict import boltz as boltz_predict
from folding_studio.config import FOLDING_PROJECT_CODE
import logging

from molecule import molecule

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
    ]
)
logger = logging.getLogger(__name__)


def convert_cif_to_pdb(cif_path, pdb_path):
    """Convert a .cif file to .pdb format using Biopython.
    
    Args:
        cif_path (str): Path to input .cif file
        pdb_path (str): Path to output .pdb file
    """
    # Parse the CIF file
    parser = MMCIFParser()
    structure = parser.get_structure("structure", cif_path)
    
    # Save as PDB
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_path)
    


def predict(sequence: str) -> str:
    """Predict protein structure from amino acid sequence using Boltz model.
    
    Args:
        sequence (str): Amino acid sequence to predict structure for
        
    Returns:
        str: HTML iframe containing 3D molecular visualization
    """
    # Create FASTA file with sequence
    seq_file = Path("sequence.fasta")
    _write_fasta_file(seq_file, sequence)
    
    # Set up unique output directory
    seq_id = str(uuid.uuid4())
    output_dir = Path(f"sequence_{seq_id}")
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Run Boltz prediction
    logger.info(f"Predicting {seq_file.stem} with project code {FOLDING_PROJECT_CODE}")
    boltz_predict(
        source=seq_file,
        project_code=FOLDING_PROJECT_CODE, 
        output=output_dir,
        unzip=True
    )
    logger.info("Prediction done. Output directory: %s", output_dir)

    # Convert output CIF to PDB
    pred_cif = list(output_dir.rglob("*_model_0.cif"))[0]
    logger.info("Output file: %s", pred_cif)
    
    converted_pdb_path = output_dir / "pred.pdb"
    convert_cif_to_pdb(str(pred_cif), str(converted_pdb_path))
    logger.info("Converted PDB file: %s", converted_pdb_path)

    # Generate molecular visualization
    mol = _create_molecule_visualization(
        converted_pdb_path,
        sequence,
        output_dir
    )
    
    return _wrap_in_iframe(mol)


def _write_fasta_file(filepath: Path, sequence: str) -> None:
    """Write sequence to FASTA file."""
    with open(filepath, "w") as f:
        f.write(f">A|protein\n{sequence}")


def _create_molecule_visualization(pdb_path: Path, sequence: str, output_dir: Path) -> str:
    """Create molecular visualization using molecule module."""
    return molecule(
        str(pdb_path),
        str(pdb_path),
        lenSeqs=1,
        num_res=len(sequence),
        selectedResidues=list(range(1, len(sequence) + 1)),
        allSeqs=[sequence],
        sequences=[{
            "Score": 0,
            "RMSD": 0, 
            "Recovery": 0,
            "Mean pLDDT": 0,
            "seq": sequence
        }],
        random_dir=output_dir
    )


def _wrap_in_iframe(content: str) -> str:
    """Wrap content in an HTML iframe with appropriate styling and permissions."""
    return f"""<iframe 
        name="result" 
        allow="midi; geolocation; microphone; camera; display-capture; encrypted-media;"
        sandbox="allow-modals allow-forms allow-scripts allow-same-origin allow-popups allow-top-navigation-by-user-activation allow-downloads"
        allowfullscreen=""
        allowpaymentrequest=""
        frameborder="0"
        srcdoc='{content}'
    ></iframe>"""

demo = gr.Blocks(title="Folding Studio: structure prediction with Boltz-1")

with demo:
    gr.Markdown("# Input")
    with gr.Row():
        with gr.Column():
            sequence = gr.Textbox(label="Sequence", value="")
    gr.Markdown("# Output")
    with gr.Row():
        predict_btn = gr.Button("Predict")
    with gr.Row():
        mol_output = gr.HTML()
    
    predict_btn.click(
        fn=predict,
        inputs=[sequence],
        outputs=[mol_output]
    )

demo.launch()