from pyuul_kmeans import VolumeMaker from pyuul_kmeans import utils import os import zipfile import pathlib from sklearn.cluster import KMeans from collections import OrderedDict import numpy as np import pandas as pd import random import torch import os import shutil import gradio as gr # 设置随机数种子 def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True setup_seed(100) device = "cpu" def add_file_to_folder(new_file_path, folder): ls = os.listdir(new_file_path) for line in ls: filePath = os.path.join(new_file_path, line) if os.path.isfile(filePath): shutil.copy(filePath, folder) def copy(files,folder): file = os.listdir(folder) for i in file: if i not in files: file_path = os.path.join(folder, i) os.remove(file_path) def pyuul(folder,n_clusters): PDBFile = str(folder) coords, atname, pdbname, pdb_num = utils.parsePDB(PDBFile) atoms_channel = utils.atomlistToChannels(atname) radius = utils.atomlistToRadius(atname) PointCloudSurfaceObject = VolumeMaker.PointCloudVolume(device=device) coords = coords.to(device) radius = radius.to(device) atoms_channel = atoms_channel.to(device) SurfacePoitCloud = PointCloudSurfaceObject(coords, radius) feature = SurfacePoitCloud.view(pdb_num,-1).cpu() kmean = KMeans(n_clusters=n_clusters,n_init=10,init="k-means++",random_state=100) y = kmean.fit_predict(feature) pairs = zip(pdbname, y) result_dict = {key: value for key, value in pairs} ligand_class = result_dict['ligand.pdb'] sheet = [] for key, value in result_dict.items(): if value == ligand_class: sheet.append(key) return sheet def kmeans(ligand,n_clusters,n_num): azip = zipfile.ZipFile("peptides.zip") azip.extractall() peptide_folder_path = pathlib.Path(__file__).parent.joinpath("peptides" ) pdb_folder = pathlib.Path(__file__).parent.joinpath("temp") ligand_path = ligand.name shutil.copy(ligand_path,pdb_folder) n_num = int(n_num) n_clusters = int(n_clusters) for i in range(1,n_num+1): if i == 1: add_file_to_folder(peptide_folder_path,pdb_folder) output = pyuul(pdb_folder, n_clusters) copy(output,pdb_folder) else: if pdb_folder != None: output = pyuul(pdb_folder, n_clusters) copy(output,pdb_folder) data = OrderedDict() data['Name'] = output data = pd.DataFrame(data) data.to_csv('outputs.csv', index=False) shutil.rmtree(pdb_folder) os.mkdir(pdb_folder) return 'outputs.csv' with open("pyuul_kmeans.md", "r") as f: description = f.read() iface = gr.Interface(fn=kmeans, title="Pyuul_Kmeans", inputs=["file", gr.Textbox(label="n_clusters", placeholder="2", lines=1), gr.Textbox(label="Times", placeholder="2", lines=1) ], outputs= "file", description=description ) iface.launch()