pyuul_kmeans / app.py
oucgc1996's picture
Update app.py
504f017
raw
history blame
2.85 kB
from pyuul import VolumeMaker
from pyuul import utils
import os
from sklearn.cluster import KMeans
from collections import OrderedDict
import numpy as np
import pandas as pd
import random
import torch
import os
import shutil
import gradio as gr
# 设置随机数种子
def setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
setup_seed(100)
device = "cuda"
def add_file_to_folder(new_file_path, folder):
ls = os.listdir(new_file_path)
for line in ls:
filePath = os.path.join(new_file_path, line)
if os.path.isfile(filePath):
shutil.copy(filePath, folder)
def copy(files,folder):
file = os.listdir(folder)
for i in file:
if i not in files:
file_path = os.path.join(folder, i)
os.remove(file_path)
def pyuul(folder,n_clusters):
coords, atname, pdbname, pdb_num = utils.parsePDB(folder)
atoms_channel = utils.atomlistToChannels(atname)
radius = utils.atomlistToRadius(atname)
PointCloudSurfaceObject = VolumeMaker.PointCloudVolume(device=device)
coords = coords.to(device)
radius = radius.to(device)
atoms_channel = atoms_channel.to(device)
SurfacePoitCloud = PointCloudSurfaceObject(coords, radius)
feature = SurfacePoitCloud.view(pdb_num,-1).cpu()
kmean = KMeans(n_clusters=n_clusters,n_init=10,init="k-means++",random_state=100)
y = kmean.fit_predict(feature)
pairs = zip(pdbname, y)
result_dict = {key: value for key, value in pairs}
ligend_class = result_dict['ligend.pdb']
sheet = []
for key, value in result_dict.items():
if value == ligend_class:
sheet.append(key)
return sheet
def kmeans(ligend,n_clusters,n_num):
peptide_folder_path = "/peptide/"
pdb_folder = "/temp/"
shutil.copyfile(ligend,pdb_folder)
for i in range(1,n_num+1):
if i == 1:
add_file_to_folder(peptide_folder_path,pdb_folder)
output = pyuul(pdb_folder, n_clusters)
copy(output,pdb_folder)
else:
if pdb_folder != None:
output = pyuul(pdb_folder, n_clusters)
copy(output,pdb_folder)
data = OrderedDict()
data['Name'] = output
data = pd.DataFrame(data)
data.to_csv('outputs.csv', index=False)
shutil.rmtree(pdb_folder)
os.mkdir(pdb_folder)
return 'outputs.csv'
iface = gr.Interface(fn=kmeans,
inputs=["file",
gr.Textbox(label="n_clusters", placeholder="2", lines=1),
gr.Textbox(label="Times", placeholder="2", lines=1)
],
outputs= "file"
)
iface.launch()