refactor
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from pathlib import Path
|
3 |
from Bio.PDB import MMCIFParser, PDBIO
|
@@ -35,38 +36,92 @@ def convert_cif_to_pdb(cif_path, pdb_path):
|
|
35 |
io.set_structure(structure)
|
36 |
io.save(pdb_path)
|
37 |
|
38 |
-
return pdb_path
|
39 |
|
40 |
|
41 |
-
def predict(sequence: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
seq_file = Path("sequence.fasta")
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
47 |
output_dir.mkdir(parents=True, exist_ok=True)
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
logger.info("Prediction done. Output directory: %s", output_dir)
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
demo = gr.Blocks()
|
72 |
|
@@ -74,7 +129,7 @@ with demo:
|
|
74 |
gr.Markdown("# Input")
|
75 |
with gr.Row():
|
76 |
with gr.Column():
|
77 |
-
sequence = gr.Textbox(label="Sequence", value="
|
78 |
gr.Markdown("# Output")
|
79 |
with gr.Row():
|
80 |
predict_btn = gr.Button("Predict")
|
|
|
1 |
+
import uuid
|
2 |
import gradio as gr
|
3 |
from pathlib import Path
|
4 |
from Bio.PDB import MMCIFParser, PDBIO
|
|
|
36 |
io.set_structure(structure)
|
37 |
io.save(pdb_path)
|
38 |
|
|
|
39 |
|
40 |
|
41 |
+
def predict(sequence: str) -> str:
|
42 |
+
"""Predict protein structure from amino acid sequence using Boltz model.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
sequence (str): Amino acid sequence to predict structure for
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
str: HTML iframe containing 3D molecular visualization
|
49 |
+
"""
|
50 |
+
# Create FASTA file with sequence
|
51 |
seq_file = Path("sequence.fasta")
|
52 |
+
_write_fasta_file(seq_file, sequence)
|
53 |
+
|
54 |
+
# Set up unique output directory
|
55 |
+
seq_id = str(uuid.uuid4())
|
56 |
+
output_dir = Path(f"sequence_{seq_id}")
|
57 |
output_dir.mkdir(parents=True, exist_ok=True)
|
58 |
+
|
59 |
+
# Run Boltz prediction
|
60 |
+
logger.info(f"Predicting {seq_file.stem} with project code {FOLDING_PROJECT_CODE}")
|
61 |
+
boltz_predict(
|
62 |
+
source=seq_file,
|
63 |
+
project_code=FOLDING_PROJECT_CODE,
|
64 |
+
output=output_dir,
|
65 |
+
unzip=True
|
66 |
+
)
|
67 |
logger.info("Prediction done. Output directory: %s", output_dir)
|
68 |
+
|
69 |
+
# Convert output CIF to PDB
|
70 |
+
pred_cif = list(output_dir.rglob("*_model_0.cif"))[0]
|
71 |
+
logger.info("Output file: %s", pred_cif)
|
72 |
+
|
73 |
+
converted_pdb_path = output_dir / "pred.pdb"
|
74 |
+
convert_cif_to_pdb(str(pred_cif), str(converted_pdb_path))
|
75 |
+
logger.info("Converted PDB file: %s", converted_pdb_path)
|
76 |
+
|
77 |
+
# Generate molecular visualization
|
78 |
+
mol = _create_molecule_visualization(
|
79 |
+
converted_pdb_path,
|
80 |
+
sequence,
|
81 |
+
output_dir
|
82 |
+
)
|
83 |
+
|
84 |
+
return _wrap_in_iframe(mol)
|
85 |
+
|
86 |
+
|
87 |
+
def _write_fasta_file(filepath: Path, sequence: str) -> None:
|
88 |
+
"""Write sequence to FASTA file."""
|
89 |
+
with open(filepath, "w") as f:
|
90 |
+
f.write(f">A|protein\n{sequence}")
|
91 |
+
|
92 |
+
|
93 |
+
def _create_molecule_visualization(pdb_path: Path, sequence: str, output_dir: Path) -> str:
|
94 |
+
"""Create molecular visualization using molecule module."""
|
95 |
+
return molecule(
|
96 |
+
str(pdb_path),
|
97 |
+
str(pdb_path),
|
98 |
+
lenSeqs=1,
|
99 |
+
num_res=len(sequence),
|
100 |
+
selectedResidues=list(range(1, len(sequence) + 1)),
|
101 |
+
allSeqs=[sequence],
|
102 |
+
sequences=[{
|
103 |
+
"Score": 0,
|
104 |
+
"RMSD": 0,
|
105 |
+
"Recovery": 0,
|
106 |
+
"Mean pLDDT": 0,
|
107 |
+
"seq": sequence
|
108 |
+
}],
|
109 |
+
random_dir=output_dir
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
def _wrap_in_iframe(content: str) -> str:
|
114 |
+
"""Wrap content in an HTML iframe with appropriate styling and permissions."""
|
115 |
+
return f"""<iframe
|
116 |
+
style="width: 800px; height: 1300px"
|
117 |
+
name="result"
|
118 |
+
allow="midi; geolocation; microphone; camera; display-capture; encrypted-media;"
|
119 |
+
sandbox="allow-modals allow-forms allow-scripts allow-same-origin allow-popups allow-top-navigation-by-user-activation allow-downloads"
|
120 |
+
allowfullscreen=""
|
121 |
+
allowpaymentrequest=""
|
122 |
+
frameborder="0"
|
123 |
+
srcdoc='{content}'
|
124 |
+
></iframe>"""
|
125 |
|
126 |
demo = gr.Blocks()
|
127 |
|
|
|
129 |
gr.Markdown("# Input")
|
130 |
with gr.Row():
|
131 |
with gr.Column():
|
132 |
+
sequence = gr.Textbox(label="Sequence", value="")
|
133 |
gr.Markdown("# Output")
|
134 |
with gr.Row():
|
135 |
predict_btn = gr.Button("Predict")
|