fadliaulawi
commited on
Commit
•
93c3874
1
Parent(s):
2bc0f8a
Add new validation case
Browse files- validate.py +16 -12
validate.py
CHANGED
@@ -41,18 +41,7 @@ class Validation():
|
|
41 |
break
|
42 |
i += 1
|
43 |
|
44 |
-
# Check if there are
|
45 |
-
i = 0
|
46 |
-
while i < len(df):
|
47 |
-
rsid = df.loc[i, 'rsID']
|
48 |
-
if ',' in rsid:
|
49 |
-
rsids = rsid.split(',')
|
50 |
-
df.loc[i + 0.1], df.loc[i + 0.9] = df.loc[i], df.loc[i]
|
51 |
-
df = df.sort_index().reset_index(drop=True)
|
52 |
-
df.loc[i + 1, 'rsID'], df.loc[i + 2, 'rsID'] = rsids[0], s.join(rsids[1:])
|
53 |
-
i += 1
|
54 |
-
|
55 |
-
# Check if there is SNPs without 'rs'
|
56 |
for i in df.index:
|
57 |
safe = True
|
58 |
snp = df.loc[i, 'rsID']
|
@@ -65,6 +54,10 @@ class Validation():
|
|
65 |
snp = 'r' + snp
|
66 |
elif re.fullmatch('(\d)+', snp):
|
67 |
snp = 'rs' + snp
|
|
|
|
|
|
|
|
|
68 |
else:
|
69 |
safe = False
|
70 |
df = df.drop(i)
|
@@ -72,6 +65,17 @@ class Validation():
|
|
72 |
if safe:
|
73 |
df.loc[i, 'rsID'] = snp
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
df.reset_index(drop=True, inplace=True)
|
76 |
df_clean = df.copy()
|
77 |
|
|
|
41 |
break
|
42 |
i += 1
|
43 |
|
44 |
+
# Check if there are SNPs not well captured
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
for i in df.index:
|
46 |
safe = True
|
47 |
snp = df.loc[i, 'rsID']
|
|
|
54 |
snp = 'r' + snp
|
55 |
elif re.fullmatch('(\d)+', snp):
|
56 |
snp = 'rs' + snp
|
57 |
+
elif re.fullmatch('r(\d)+', snp):
|
58 |
+
snp = 'rs' + snp[1:]
|
59 |
+
if snp[2] == '5':
|
60 |
+
snp += f',rs{snp[3:]}'
|
61 |
else:
|
62 |
safe = False
|
63 |
df = df.drop(i)
|
|
|
65 |
if safe:
|
66 |
df.loc[i, 'rsID'] = snp
|
67 |
|
68 |
+
# Check if there are multiple rsIDs
|
69 |
+
i = 0
|
70 |
+
while i < len(df):
|
71 |
+
rsid = df.loc[i, 'rsID']
|
72 |
+
if ',' in rsid:
|
73 |
+
rsids = rsid.split(',')
|
74 |
+
df.loc[i + 0.1], df.loc[i + 0.9] = df.loc[i], df.loc[i]
|
75 |
+
df = df.sort_index().reset_index(drop=True)
|
76 |
+
df.loc[i + 1, 'rsID'], df.loc[i + 2, 'rsID'] = rsids[0], s.join(rsids[1:])
|
77 |
+
i += 1
|
78 |
+
|
79 |
df.reset_index(drop=True, inplace=True)
|
80 |
df_clean = df.copy()
|
81 |
|