Ekjaer commited on
Commit
dd5a4e5
·
1 Parent(s): 92e909a

pushing files to the repo from the example!

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. init_repo_MLstructureMining.py +8 -14
  3. labels.csv +11 -0
README.md CHANGED
@@ -185,7 +185,7 @@ The model is trained with below hyperparameters.
185
 
186
  The model plot is below.
187
 
188
- <style>#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 {color: black;background-color: white;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 pre{padding: 0;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-toggleable {background-color: white;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-estimator:hover {background-color: #d4ebff;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-item {z-index: 1;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-parallel::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-parallel-item:only-child::after {width: 0;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-c0be133d-c30b-49f5-a2b3-96bdba68e768 div.sk-text-repr-fallback {display: none;}</style><div id="sk-c0be133d-c30b-49f5-a2b3-96bdba68e768" class="sk-top-container" style="overflow: auto;"><div class="sk-text-repr-fallback"><pre>XGBClassifier(base_score=0.5, booster=&#x27;gbtree&#x27;, colsample_bylevel=1,colsample_bynode=1, colsample_bytree=1, enable_categorical=False,gamma=0, gpu_id=-1, importance_type=None,interaction_constraints=&#x27;&#x27;, learning_rate=0.300000012,max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,monotone_constraints=&#x27;()&#x27;, n_estimators=100, n_jobs=8,num_parallel_tree=1, predictor=&#x27;auto&#x27;, random_state=0,reg_alpha=0, reg_lambda=1, scale_pos_weight=None, subsample=1,tree_method=&#x27;auto&#x27;, validate_parameters=1, verbosity=None)</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class="sk-container" hidden><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="a8a3e199-8311-41f7-b297-2437550e529a" type="checkbox" checked><label for="a8a3e199-8311-41f7-b297-2437550e529a" class="sk-toggleable__label sk-toggleable__label-arrow">XGBClassifier</label><div class="sk-toggleable__content"><pre>XGBClassifier(base_score=0.5, booster=&#x27;gbtree&#x27;, colsample_bylevel=1,colsample_bynode=1, colsample_bytree=1, enable_categorical=False,gamma=0, gpu_id=-1, importance_type=None,interaction_constraints=&#x27;&#x27;, learning_rate=0.300000012,max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,monotone_constraints=&#x27;()&#x27;, n_estimators=100, n_jobs=8,num_parallel_tree=1, predictor=&#x27;auto&#x27;, random_state=0,reg_alpha=0, reg_lambda=1, scale_pos_weight=None, subsample=1,tree_method=&#x27;auto&#x27;, validate_parameters=1, verbosity=None)</pre></div></div></div></div></div>
189
 
190
  ## Evaluation Results
191
 
 
185
 
186
  The model plot is below.
187
 
188
+ <style>#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 {color: black;background-color: white;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 pre{padding: 0;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-toggleable {background-color: white;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-estimator:hover {background-color: #d4ebff;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-item {z-index: 1;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-parallel::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-parallel-item:only-child::after {width: 0;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67 div.sk-text-repr-fallback {display: none;}</style><div id="sk-155c4ebd-3ff3-4878-a6ed-b467111c8e67" class="sk-top-container" style="overflow: auto;"><div class="sk-text-repr-fallback"><pre>XGBClassifier(base_score=0.5, booster=&#x27;gbtree&#x27;, colsample_bylevel=1,colsample_bynode=1, colsample_bytree=1, enable_categorical=False,gamma=0, gpu_id=-1, importance_type=None,interaction_constraints=&#x27;&#x27;, learning_rate=0.300000012,max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,monotone_constraints=&#x27;()&#x27;, n_estimators=100, n_jobs=8,num_parallel_tree=1, predictor=&#x27;auto&#x27;, random_state=0,reg_alpha=0, reg_lambda=1, scale_pos_weight=None, subsample=1,tree_method=&#x27;auto&#x27;, validate_parameters=1, verbosity=None)</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class="sk-container" hidden><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="15187887-3eef-43d1-af97-9d744586ed11" type="checkbox" checked><label for="15187887-3eef-43d1-af97-9d744586ed11" class="sk-toggleable__label sk-toggleable__label-arrow">XGBClassifier</label><div class="sk-toggleable__content"><pre>XGBClassifier(base_score=0.5, booster=&#x27;gbtree&#x27;, colsample_bylevel=1,colsample_bynode=1, colsample_bytree=1, enable_categorical=False,gamma=0, gpu_id=-1, importance_type=None,interaction_constraints=&#x27;&#x27;, learning_rate=0.300000012,max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,monotone_constraints=&#x27;()&#x27;, n_estimators=100, n_jobs=8,num_parallel_tree=1, predictor=&#x27;auto&#x27;, random_state=0,reg_alpha=0, reg_lambda=1, scale_pos_weight=None, subsample=1,tree_method=&#x27;auto&#x27;, validate_parameters=1, verbosity=None)</pre></div></div></div></div></div>
189
 
190
  ## Evaluation Results
191
 
init_repo_MLstructureMining.py CHANGED
@@ -18,6 +18,11 @@ from sklearn.model_selection import HalvingGridSearchCV, train_test_split
18
  import shutil
19
  from skops import card, hub_utils
20
 
 
 
 
 
 
21
  # Data
22
  X, y = load_breast_cancer(as_frame=True, return_X_y=True)
23
  X_train, X_test, y_train, y_test = train_test_split(
@@ -32,21 +37,10 @@ param_grid = {
32
  "max_depth": [2, 5, 10],
33
  }
34
 
35
- # model = HalvingGridSearchCV(
36
- # estimator=HistGradientBoostingClassifier(),
37
- # param_grid=param_grid,
38
- # random_state=42,
39
- # n_jobs=-1,
40
- # ).fit(X_train, y_train)
41
- # model.score(X_test, y_test)# The file name is not significant, here we choose to save it with a `pkl`
42
- # # extension.
43
 
44
- # _, pkl_name = mkstemp(prefix="skops-", suffix=".pkl")
45
- # with open(pkl_name, mode="bw") as f:
46
- # pickle.dump(model, file=f)
47
 
48
  booster = xgboost.Booster({'nthread': 8})
49
- booster.load_model("xgb_model_bayse_optimization_00000.bin")
50
 
51
  model = XGBClassifier()
52
 
@@ -55,13 +49,13 @@ model._Booster = booster
55
 
56
  local_repo = mkdtemp(prefix="skops-")
57
  hub_utils.init(
58
- #model=pkl_name,
59
- model="xgb_model_bayse_optimization_00000.bin",
60
  requirements=[f"scikit-learn={sklearn.__version__}", f"xgboost={xgboost.__version__}"],
61
  dst=local_repo,
62
  task="tabular-classification",
63
  data=X_test,
64
  )
 
65
  if "__file__" in locals(): # __file__ not defined during docs built
66
  # Add this script itself to the files to be uploaded for reproducibility
67
  hub_utils.add_files(__file__, dst=local_repo)
 
18
  import shutil
19
  from skops import card, hub_utils
20
 
21
+ # Paths
22
+ model_path = "xgb_model_bayse_optimization_00000.bin"
23
+ label_path = "labels.csv"
24
+
25
+
26
  # Data
27
  X, y = load_breast_cancer(as_frame=True, return_X_y=True)
28
  X_train, X_test, y_train, y_test = train_test_split(
 
37
  "max_depth": [2, 5, 10],
38
  }
39
 
 
 
 
 
 
 
 
 
40
 
 
 
 
41
 
42
  booster = xgboost.Booster({'nthread': 8})
43
+ booster.load_model(model_path)
44
 
45
  model = XGBClassifier()
46
 
 
49
 
50
  local_repo = mkdtemp(prefix="skops-")
51
  hub_utils.init(
52
+ model=model_path,
 
53
  requirements=[f"scikit-learn={sklearn.__version__}", f"xgboost={xgboost.__version__}"],
54
  dst=local_repo,
55
  task="tabular-classification",
56
  data=X_test,
57
  )
58
+ shutil.copy(label_path, os.path.join(local_repo, label_path))
59
  if "__file__" in locals(): # __file__ not defined during docs built
60
  # Add this script itself to the files to be uploaded for reproducibility
61
  hub_utils.add_files(__file__, dst=local_repo)
labels.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,Label,Similar
2
+ 0,1000017.csv,"['1000032.csv', '1000059.csv']"
3
+ 1,1000024.csv,
4
+ 2,1000035.csv,
5
+ 3,1000058.csv,
6
+ 4,1000060.csv,
7
+ 5,1000061.csv,
8
+ 6,1000062.csv,
9
+ 7,1000063.csv,
10
+ 8,1000094.csv,
11
+ 9,1000096.csv,