Spaces:
Sleeping
Sleeping
Commit
·
c497936
1
Parent(s):
fdfbb79
Update code/pdb_featureVector.py
Browse files
code/pdb_featureVector.py
CHANGED
@@ -286,6 +286,7 @@ def pdb(input_set, mode, impute):
|
|
286 |
|
287 |
uniprot_matched = uniprot_matched.merge(pdb_fasta, on=['pdbID', 'chain'], how='left')
|
288 |
uniprot_matched = uniprot_matched.astype(str)
|
|
|
289 |
|
290 |
with_pdb = uniprot_matched[(uniprot_matched.pdbID != 'nan') & (
|
291 |
(uniprot_matched.resolution != 'nan') & (uniprot_matched.resolution != 'OT') & (
|
@@ -295,11 +296,12 @@ def pdb(input_set, mode, impute):
|
|
295 |
uniprot_matched.resolution == 'None'))]
|
296 |
no_pdb = no_pdb[~no_pdb.datapoint.isin(with_pdb.datapoint.to_list())]
|
297 |
no_pdb.drop(columns=['chain', 'pdbID', 'pdbSequence', 'resolution'], inplace=True)
|
298 |
-
|
299 |
print(
|
300 |
'PDB Information successfully added...\nPDB structures are found for %d of %d.\n%d of %d failed to match with PDB structure.\n'
|
301 |
% (len(with_pdb.drop_duplicates(['datapoint'])), len(uniprot_matched.drop_duplicates(['datapoint'])),
|
302 |
len(no_pdb.drop_duplicates(['datapoint'])), len(uniprot_matched.drop_duplicates(['datapoint']))))
|
|
|
303 |
|
304 |
with_pdb = with_pdb.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
|
305 |
with_pdb = with_pdb.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
|
@@ -378,6 +380,7 @@ def pdb(input_set, mode, impute):
|
|
378 |
# Isoform matches, i.e. labelled as i, isoform sequences will be aligned with PDB sequences.
|
379 |
with_pdb['uniprotSequence'] = with_pdb['uniprotSequence'].str.replace('U', 'C')
|
380 |
with_pdb['pdbSequence'] = with_pdb['pdbSequence'].str.replace('U', 'C')
|
|
|
381 |
|
382 |
dfM = with_pdb[with_pdb.wt_sequence_match == 'm']
|
383 |
dfM = dfM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
|
|
|
286 |
|
287 |
uniprot_matched = uniprot_matched.merge(pdb_fasta, on=['pdbID', 'chain'], how='left')
|
288 |
uniprot_matched = uniprot_matched.astype(str)
|
289 |
+
st.write('uniprot_matched', uniprot_matched)
|
290 |
|
291 |
with_pdb = uniprot_matched[(uniprot_matched.pdbID != 'nan') & (
|
292 |
(uniprot_matched.resolution != 'nan') & (uniprot_matched.resolution != 'OT') & (
|
|
|
296 |
uniprot_matched.resolution == 'None'))]
|
297 |
no_pdb = no_pdb[~no_pdb.datapoint.isin(with_pdb.datapoint.to_list())]
|
298 |
no_pdb.drop(columns=['chain', 'pdbID', 'pdbSequence', 'resolution'], inplace=True)
|
299 |
+
st.write('with_pdb', with_pdb)
|
300 |
print(
|
301 |
'PDB Information successfully added...\nPDB structures are found for %d of %d.\n%d of %d failed to match with PDB structure.\n'
|
302 |
% (len(with_pdb.drop_duplicates(['datapoint'])), len(uniprot_matched.drop_duplicates(['datapoint'])),
|
303 |
len(no_pdb.drop_duplicates(['datapoint'])), len(uniprot_matched.drop_duplicates(['datapoint']))))
|
304 |
+
st.write('with_pdb1', with_pdb)
|
305 |
|
306 |
with_pdb = with_pdb.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
|
307 |
with_pdb = with_pdb.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
|
|
|
380 |
# Isoform matches, i.e. labelled as i, isoform sequences will be aligned with PDB sequences.
|
381 |
with_pdb['uniprotSequence'] = with_pdb['uniprotSequence'].str.replace('U', 'C')
|
382 |
with_pdb['pdbSequence'] = with_pdb['pdbSequence'].str.replace('U', 'C')
|
383 |
+
st.write('with_pdb2', with_pdb)
|
384 |
|
385 |
dfM = with_pdb[with_pdb.wt_sequence_match == 'm']
|
386 |
dfM = dfM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
|