ireneisdoomed commited on
Commit
10f4478
·
verified ·
1 Parent(s): 4205c97

chore: update model (run 05-11_manual_egl_old_l2g_hits)

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. 05-11_manual_egl_old_l2g_hits.skops +3 -0
  3. README.md +193 -0
  4. config.json +190 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 05-11_manual_egl_old_l2g_hits.skops filter=lfs diff=lfs merge=lfs -text
05-11_manual_egl_old_l2g_hits.skops ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:863649058bc216bfbcd2e302b9c5eb7b503cd6897e21632a39677412767fef14
3
+ size 2896050
README.md ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sklearn
3
+ tags:
4
+ - sklearn
5
+ - skops
6
+ - tabular-classification
7
+ model_format: skops
8
+ model_file: 05-11_manual_egl_old_l2g_hits.skops
9
+ widget:
10
+ - structuredData:
11
+ credibleSetConfidence:
12
+ - 0.75
13
+ - 0.75
14
+ - 0.75
15
+ distanceFootprintMean:
16
+ - 0.9955834746360779
17
+ - 0.9958390593528748
18
+ - 0.9838611483573914
19
+ distanceFootprintMeanNeighbourhood:
20
+ - 0.00971545372158289
21
+ - 0.009760296903550625
22
+ - 0.007658529095351696
23
+ distanceSentinelFootprint:
24
+ - 0.9955792427062988
25
+ - 0.9958348274230957
26
+ - 0.983856201171875
27
+ distanceSentinelFootprintNeighbourhood:
28
+ - 0.009715789929032326
29
+ - 0.00976063497364521
30
+ - 0.007658741902559996
31
+ distanceSentinelTss:
32
+ - 0.9955792427062988
33
+ - 0.9958348274230957
34
+ - 0.983856201171875
35
+ distanceSentinelTssNeighbourhood:
36
+ - 0.00974195171147585
37
+ - 0.009786796756088734
38
+ - 0.007684903685003519
39
+ distanceTssMean:
40
+ - 0.9955834746360779
41
+ - 0.9958390593528748
42
+ - 0.9838611483573914
43
+ distanceTssMeanNeighbourhood:
44
+ - 0.009741270914673805
45
+ - 0.009786113165318966
46
+ - 0.007684345822781324
47
+ eQtlColocClppMaximum:
48
+ - 0.0
49
+ - 0.0
50
+ - 0.0
51
+ eQtlColocClppMaximumNeighbourhood:
52
+ - 0.0
53
+ - 0.0
54
+ - 0.0
55
+ eQtlColocH4Maximum:
56
+ - 0.0
57
+ - 0.0
58
+ - 0.0
59
+ eQtlColocH4MaximumNeighbourhood:
60
+ - 0.0
61
+ - 0.0
62
+ - 0.0
63
+ geneCount500kb:
64
+ - 13.0
65
+ - 13.0
66
+ - 13.0
67
+ isProteinCoding:
68
+ - 0.0
69
+ - 1.0
70
+ - 1.0
71
+ pQtlColocClppMaximum:
72
+ - 0.0
73
+ - 0.0
74
+ - 0.0
75
+ pQtlColocClppMaximumNeighbourhood:
76
+ - 0.0
77
+ - 0.0
78
+ - 0.0
79
+ pQtlColocH4Maximum:
80
+ - 0.0
81
+ - 0.0
82
+ - 0.0
83
+ pQtlColocH4MaximumNeighbourhood:
84
+ - 0.0
85
+ - 0.0
86
+ - 0.0
87
+ proteinGeneCount500kb:
88
+ - 7.0
89
+ - 7.0
90
+ - 7.0
91
+ sQtlColocClppMaximum:
92
+ - 0.0
93
+ - 0.0
94
+ - 0.0
95
+ sQtlColocClppMaximumNeighbourhood:
96
+ - 0.0
97
+ - 0.0
98
+ - 0.0
99
+ sQtlColocH4Maximum:
100
+ - 0.0
101
+ - 0.0
102
+ - 0.0
103
+ sQtlColocH4MaximumNeighbourhood:
104
+ - 0.0
105
+ - 0.0
106
+ - 0.0
107
+ studyLocusId:
108
+ - 0080a1923fa758d2016b4667710b49e9
109
+ - 0080a1923fa758d2016b4667710b49e9
110
+ - 0080a1923fa758d2016b4667710b49e9
111
+ vepMaximum:
112
+ - 0.0
113
+ - 0.0
114
+ - 0.0
115
+ vepMaximumNeighbourhood:
116
+ - -0.07333333790302277
117
+ - -0.07333333790302277
118
+ - -0.07333333790302277
119
+ vepMean:
120
+ - 0.0
121
+ - 0.0
122
+ - 0.0
123
+ vepMeanNeighbourhood:
124
+ - -0.01638231799006462
125
+ - -0.01638231799006462
126
+ - -0.01638231799006462
127
+ ---
128
+
129
+ # Model description
130
+
131
+ The locus-to-gene (L2G) model derives features to prioritise likely causal genes at each GWAS locus based on genetic and functional genomics features. The main categories of predictive features are:
132
+
133
+ - Distance: (from credible set variants to gene)
134
+ - Molecular QTL Colocalization
135
+ - Chromatin Interaction: (e.g., promoter-capture Hi-C)
136
+ - Variant Pathogenicity: (from VEP)
137
+
138
+ More information at: https://opentargets.github.io/gentropy/python_api/methods/l2g/_l2g/
139
+
140
+
141
+ ## Intended uses & limitations
142
+
143
+ [More Information Needed]
144
+
145
+ ## Training Procedure
146
+
147
+ Gradient Boosting Classifier
148
+
149
+ ### Hyperparameters
150
+
151
+ <details>
152
+ <summary> Click to expand </summary>
153
+
154
+ | Hyperparameter | Value |
155
+ |--------------------------|--------------|
156
+ | ccp_alpha | 0.0 |
157
+ | criterion | friedman_mse |
158
+ | init | |
159
+ | learning_rate | 0.1 |
160
+ | loss | log_loss |
161
+ | max_depth | 5 |
162
+ | max_features | |
163
+ | max_leaf_nodes | |
164
+ | min_impurity_decrease | 0.0 |
165
+ | min_samples_leaf | 1 |
166
+ | min_samples_split | 2 |
167
+ | min_weight_fraction_leaf | 0.0 |
168
+ | n_estimators | 100 |
169
+ | n_iter_no_change | |
170
+ | random_state | 42 |
171
+ | subsample | 1.0 |
172
+ | tol | 0.0001 |
173
+ | validation_fraction | 0.1 |
174
+ | verbose | 0 |
175
+ | warm_start | False |
176
+
177
+ </details>
178
+
179
+ # How to Get Started with the Model
180
+
181
+ To use the model, you can load it using the `LocusToGeneModel.load_from_hub` method. This will return a `LocusToGeneModel` object that can be used to make predictions on a feature matrix.
182
+ The model can then be used to make predictions using the `predict` method.
183
+
184
+ More information can be found at: https://opentargets.github.io/gentropy/python_api/methods/l2g/model/
185
+
186
+
187
+ # Citation
188
+
189
+ https://doi.org/10.1038/s41588-021-00945-5
190
+
191
+ # License
192
+
193
+ MIT
config.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sklearn": {
3
+ "columns": [
4
+ "studyLocusId",
5
+ "credibleSetConfidence",
6
+ "distanceFootprintMean",
7
+ "distanceFootprintMeanNeighbourhood",
8
+ "distanceSentinelFootprint",
9
+ "distanceSentinelFootprintNeighbourhood",
10
+ "distanceSentinelTss",
11
+ "distanceSentinelTssNeighbourhood",
12
+ "distanceTssMean",
13
+ "distanceTssMeanNeighbourhood",
14
+ "eQtlColocClppMaximum",
15
+ "eQtlColocClppMaximumNeighbourhood",
16
+ "eQtlColocH4Maximum",
17
+ "eQtlColocH4MaximumNeighbourhood",
18
+ "geneCount500kb",
19
+ "isProteinCoding",
20
+ "pQtlColocClppMaximum",
21
+ "pQtlColocClppMaximumNeighbourhood",
22
+ "pQtlColocH4Maximum",
23
+ "pQtlColocH4MaximumNeighbourhood",
24
+ "proteinGeneCount500kb",
25
+ "sQtlColocClppMaximum",
26
+ "sQtlColocClppMaximumNeighbourhood",
27
+ "sQtlColocH4Maximum",
28
+ "sQtlColocH4MaximumNeighbourhood",
29
+ "vepMaximum",
30
+ "vepMaximumNeighbourhood",
31
+ "vepMean",
32
+ "vepMeanNeighbourhood"
33
+ ],
34
+ "environment": [
35
+ "scikit-learn=1.5.2"
36
+ ],
37
+ "example_input": {
38
+ "credibleSetConfidence": [
39
+ 0.75,
40
+ 0.75,
41
+ 0.75
42
+ ],
43
+ "distanceFootprintMean": [
44
+ 0.9955834746360779,
45
+ 0.9958390593528748,
46
+ 0.9838611483573914
47
+ ],
48
+ "distanceFootprintMeanNeighbourhood": [
49
+ 0.00971545372158289,
50
+ 0.009760296903550625,
51
+ 0.007658529095351696
52
+ ],
53
+ "distanceSentinelFootprint": [
54
+ 0.9955792427062988,
55
+ 0.9958348274230957,
56
+ 0.983856201171875
57
+ ],
58
+ "distanceSentinelFootprintNeighbourhood": [
59
+ 0.009715789929032326,
60
+ 0.00976063497364521,
61
+ 0.007658741902559996
62
+ ],
63
+ "distanceSentinelTss": [
64
+ 0.9955792427062988,
65
+ 0.9958348274230957,
66
+ 0.983856201171875
67
+ ],
68
+ "distanceSentinelTssNeighbourhood": [
69
+ 0.00974195171147585,
70
+ 0.009786796756088734,
71
+ 0.007684903685003519
72
+ ],
73
+ "distanceTssMean": [
74
+ 0.9955834746360779,
75
+ 0.9958390593528748,
76
+ 0.9838611483573914
77
+ ],
78
+ "distanceTssMeanNeighbourhood": [
79
+ 0.009741270914673805,
80
+ 0.009786113165318966,
81
+ 0.007684345822781324
82
+ ],
83
+ "eQtlColocClppMaximum": [
84
+ 0.0,
85
+ 0.0,
86
+ 0.0
87
+ ],
88
+ "eQtlColocClppMaximumNeighbourhood": [
89
+ 0.0,
90
+ 0.0,
91
+ 0.0
92
+ ],
93
+ "eQtlColocH4Maximum": [
94
+ 0.0,
95
+ 0.0,
96
+ 0.0
97
+ ],
98
+ "eQtlColocH4MaximumNeighbourhood": [
99
+ 0.0,
100
+ 0.0,
101
+ 0.0
102
+ ],
103
+ "geneCount500kb": [
104
+ 13.0,
105
+ 13.0,
106
+ 13.0
107
+ ],
108
+ "isProteinCoding": [
109
+ 0.0,
110
+ 1.0,
111
+ 1.0
112
+ ],
113
+ "pQtlColocClppMaximum": [
114
+ 0.0,
115
+ 0.0,
116
+ 0.0
117
+ ],
118
+ "pQtlColocClppMaximumNeighbourhood": [
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ],
123
+ "pQtlColocH4Maximum": [
124
+ 0.0,
125
+ 0.0,
126
+ 0.0
127
+ ],
128
+ "pQtlColocH4MaximumNeighbourhood": [
129
+ 0.0,
130
+ 0.0,
131
+ 0.0
132
+ ],
133
+ "proteinGeneCount500kb": [
134
+ 7.0,
135
+ 7.0,
136
+ 7.0
137
+ ],
138
+ "sQtlColocClppMaximum": [
139
+ 0.0,
140
+ 0.0,
141
+ 0.0
142
+ ],
143
+ "sQtlColocClppMaximumNeighbourhood": [
144
+ 0.0,
145
+ 0.0,
146
+ 0.0
147
+ ],
148
+ "sQtlColocH4Maximum": [
149
+ 0.0,
150
+ 0.0,
151
+ 0.0
152
+ ],
153
+ "sQtlColocH4MaximumNeighbourhood": [
154
+ 0.0,
155
+ 0.0,
156
+ 0.0
157
+ ],
158
+ "studyLocusId": [
159
+ "0080a1923fa758d2016b4667710b49e9",
160
+ "0080a1923fa758d2016b4667710b49e9",
161
+ "0080a1923fa758d2016b4667710b49e9"
162
+ ],
163
+ "vepMaximum": [
164
+ 0.0,
165
+ 0.0,
166
+ 0.0
167
+ ],
168
+ "vepMaximumNeighbourhood": [
169
+ -0.07333333790302277,
170
+ -0.07333333790302277,
171
+ -0.07333333790302277
172
+ ],
173
+ "vepMean": [
174
+ 0.0,
175
+ 0.0,
176
+ 0.0
177
+ ],
178
+ "vepMeanNeighbourhood": [
179
+ -0.01638231799006462,
180
+ -0.01638231799006462,
181
+ -0.01638231799006462
182
+ ]
183
+ },
184
+ "model": {
185
+ "file": "05-11_manual_egl_old_l2g_hits.skops"
186
+ },
187
+ "model_format": "skops",
188
+ "task": "tabular-classification"
189
+ }
190
+ }