fadliaulawi commited on
Commit
93c3874
1 Parent(s): 2bc0f8a

Add new validation case

Browse files
Files changed (1) hide show
  1. validate.py +16 -12
validate.py CHANGED
@@ -41,18 +41,7 @@ class Validation():
41
  break
42
  i += 1
43
 
44
- # Check if there are multiple rsIDs
45
- i = 0
46
- while i < len(df):
47
- rsid = df.loc[i, 'rsID']
48
- if ',' in rsid:
49
- rsids = rsid.split(',')
50
- df.loc[i + 0.1], df.loc[i + 0.9] = df.loc[i], df.loc[i]
51
- df = df.sort_index().reset_index(drop=True)
52
- df.loc[i + 1, 'rsID'], df.loc[i + 2, 'rsID'] = rsids[0], s.join(rsids[1:])
53
- i += 1
54
-
55
- # Check if there is SNPs without 'rs'
56
  for i in df.index:
57
  safe = True
58
  snp = df.loc[i, 'rsID']
@@ -65,6 +54,10 @@ class Validation():
65
  snp = 'r' + snp
66
  elif re.fullmatch('(\d)+', snp):
67
  snp = 'rs' + snp
 
 
 
 
68
  else:
69
  safe = False
70
  df = df.drop(i)
@@ -72,6 +65,17 @@ class Validation():
72
  if safe:
73
  df.loc[i, 'rsID'] = snp
74
 
 
 
 
 
 
 
 
 
 
 
 
75
  df.reset_index(drop=True, inplace=True)
76
  df_clean = df.copy()
77
 
 
41
  break
42
  i += 1
43
 
44
+ # Check if there are SNPs not well captured
 
 
 
 
 
 
 
 
 
 
 
45
  for i in df.index:
46
  safe = True
47
  snp = df.loc[i, 'rsID']
 
54
  snp = 'r' + snp
55
  elif re.fullmatch('(\d)+', snp):
56
  snp = 'rs' + snp
57
+ elif re.fullmatch('r(\d)+', snp):
58
+ snp = 'rs' + snp[1:]
59
+ if snp[2] == '5':
60
+ snp += f',rs{snp[3:]}'
61
  else:
62
  safe = False
63
  df = df.drop(i)
 
65
  if safe:
66
  df.loc[i, 'rsID'] = snp
67
 
68
+ # Check if there are multiple rsIDs
69
+ i = 0
70
+ while i < len(df):
71
+ rsid = df.loc[i, 'rsID']
72
+ if ',' in rsid:
73
+ rsids = rsid.split(',')
74
+ df.loc[i + 0.1], df.loc[i + 0.9] = df.loc[i], df.loc[i]
75
+ df = df.sort_index().reset_index(drop=True)
76
+ df.loc[i + 1, 'rsID'], df.loc[i + 2, 'rsID'] = rsids[0], s.join(rsids[1:])
77
+ i += 1
78
+
79
  df.reset_index(drop=True, inplace=True)
80
  df_clean = df.copy()
81