From: Karanraj Chauhan Date: Wed, 31 Jul 2019 20:53:02 +0000 (-0400) Subject: mgr/diskprediction_local: Replaced old models and updated predictor. X-Git-Tag: v15.1.0~981^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=26ea7b670a9127c2e6f8640ada9efcb371a132d8;p=ceph.git mgr/diskprediction_local: Replaced old models and updated predictor. ProphetStor models are replaced with in-house developed models. Preprocessors are also stored in addition to the prediction models. Objects are now stored using joblib instead of pickle, as recommended by scikit-learn docs. "manufacturer-specific" models are used instead of "best-feature-match" models. i.e., instead of models being trained (presumably) just based on what features are available, models have been trained for each manufacturer. This is because of variation in meaning and availibility of SMART attributes across manufacturers. Updated config.json, requirements.txt, and DiskFailurePredictor for these changes. Signed-off-by: Karanraj Chauhan --- diff --git a/src/pybind/mgr/diskprediction_local/models/config.json b/src/pybind/mgr/diskprediction_local/models/config.json index 9a1485ca35dd..f77cb11e06dd 100644 --- a/src/pybind/mgr/diskprediction_local/models/config.json +++ b/src/pybind/mgr/diskprediction_local/models/config.json @@ -1,77 +1,4 @@ { -"svm_123.pkl": ["smart_197_raw", "smart_183_raw", "smart_200_raw", "smart_194_raw", "smart_254_raw", "smart_252_raw", "smart_4_raw", "smart_222_raw", "smart_187_raw", "smart_184_raw"], -"svm_105.pkl": ["smart_197_raw", "smart_4_raw", "smart_5_raw", "smart_252_raw", "smart_184_raw", "smart_223_raw", "smart_198_raw", "smart_10_raw", "smart_189_raw", "smart_222_raw"], -"svm_82.pkl":["smart_184_raw", "smart_2_raw", "smart_187_raw", "smart_225_raw", "smart_198_raw", "smart_197_raw", "smart_4_raw", "smart_13_raw", "smart_188_raw", "smart_251_raw"], -"svm_186.pkl":["smart_3_raw", "smart_11_raw", "smart_198_raw", "smart_250_raw", "smart_13_raw", "smart_200_raw", "smart_224_raw", "smart_187_raw", "smart_22_raw", "smart_4_raw", "smart_220_raw"], -"svm_14.pkl":["smart_12_raw", "smart_226_raw", "smart_187_raw", "smart_196_raw", "smart_5_raw", "smart_183_raw", "smart_255_raw", "smart_250_raw", "smart_201_raw", "smart_8_raw"], -"svm_10.pkl":["smart_251_raw", "smart_4_raw", "smart_223_raw", "smart_13_raw", "smart_255_raw", "smart_188_raw", "smart_197_raw", "smart_201_raw", "smart_250_raw", "smart_15_raw"], -"svm_235.pkl":["smart_15_raw", "smart_255_raw", "smart_252_raw", "smart_197_raw", "smart_250_raw", "smart_254_raw", "smart_13_raw", "smart_251_raw", "smart_198_raw", "smart_189_raw", "smart_191_raw"], -"svm_234.pkl":["smart_187_raw", "smart_183_raw", "smart_3_raw", "smart_4_raw", "smart_222_raw", "smart_184_raw", "smart_5_raw", "smart_198_raw", "smart_200_raw", "smart_8_raw", "smart_10_raw"], -"svm_119.pkl":["smart_254_raw", "smart_8_raw", "smart_183_raw", "smart_184_raw", "smart_195_raw", "smart_252_raw", "smart_191_raw", "smart_10_raw", "smart_200_raw", "smart_197_raw"], -"svm_227.pkl":["smart_254_raw", "smart_189_raw", "smart_225_raw", "smart_224_raw", "smart_197_raw", "smart_223_raw", "smart_4_raw", "smart_183_raw", "smart_11_raw", "smart_184_raw", "smart_13_raw"], -"svm_18.pkl":["smart_197_raw", "smart_3_raw", "smart_220_raw", "smart_193_raw", "smart_10_raw", "smart_187_raw", "smart_188_raw", "smart_225_raw", "smart_194_raw", "smart_13_raw"], -"svm_78.pkl":["smart_10_raw", "smart_183_raw", "smart_191_raw", "smart_13_raw", "smart_198_raw", "smart_22_raw", "smart_195_raw", "smart_12_raw", "smart_224_raw", "smart_200_raw"], -"svm_239.pkl":["smart_3_raw", "smart_254_raw", "smart_199_raw", "smart_225_raw", "smart_187_raw", "smart_195_raw", "smart_197_raw", "smart_2_raw", "smart_193_raw", "smart_220_raw", "smart_183_raw"], -"svm_174.pkl":["smart_183_raw", "smart_196_raw", "smart_225_raw", "smart_189_raw", "smart_4_raw", "smart_3_raw", "smart_9_raw", "smart_198_raw", "smart_15_raw", "smart_5_raw", "smart_194_raw"], -"svm_104.pkl":["smart_12_raw", "smart_198_raw", "smart_197_raw", "smart_4_raw", "smart_240_raw", "smart_187_raw", "smart_225_raw", "smart_8_raw", "smart_3_raw", "smart_2_raw"], -"svm_12.pkl":["smart_222_raw", "smart_251_raw", "smart_194_raw", "smart_9_raw", "smart_184_raw", "smart_191_raw", "smart_187_raw", "smart_255_raw", "smart_4_raw", "smart_11_raw"], -"svm_97.pkl":["smart_15_raw", "smart_197_raw", "smart_190_raw", "smart_199_raw", "smart_200_raw", "smart_12_raw", "smart_191_raw", "smart_254_raw", "smart_194_raw", "smart_201_raw"], -"svm_118.pkl":["smart_11_raw", "smart_225_raw", "smart_196_raw", "smart_197_raw", "smart_198_raw", "smart_200_raw", "smart_3_raw", "smart_10_raw", "smart_191_raw", "smart_22_raw"], -"svm_185.pkl":["smart_191_raw", "smart_254_raw", "smart_3_raw", "smart_190_raw", "smart_15_raw", "smart_22_raw", "smart_2_raw", "smart_198_raw", "smart_13_raw", "smart_226_raw", "smart_225_raw"], -"svm_206.pkl":["smart_183_raw", "smart_192_raw", "smart_197_raw", "smart_255_raw", "smart_187_raw", "smart_254_raw", "smart_198_raw", "smart_13_raw", "smart_226_raw", "smart_240_raw", "smart_8_raw"], -"svm_225.pkl":["smart_224_raw", "smart_11_raw", "smart_5_raw", "smart_4_raw", "smart_225_raw", "smart_197_raw", "smart_15_raw", "smart_183_raw", "smart_193_raw", "smart_190_raw", "smart_187_raw"], -"svm_169.pkl":["smart_252_raw", "smart_183_raw", "smart_254_raw", "smart_11_raw", "smart_193_raw", "smart_22_raw", "smart_226_raw", "smart_189_raw", "smart_225_raw", "smart_198_raw", "smart_200_raw"], -"svm_79.pkl":["smart_184_raw", "smart_196_raw", "smart_4_raw", "smart_226_raw", "smart_199_raw", "smart_187_raw", "smart_193_raw", "smart_188_raw", "smart_12_raw", "smart_250_raw"], -"svm_69.pkl":["smart_187_raw", "smart_9_raw", "smart_200_raw", "smart_11_raw", "smart_252_raw", "smart_189_raw", "smart_4_raw", "smart_188_raw", "smart_255_raw", "smart_201_raw"], -"svm_201.pkl":["smart_224_raw", "smart_8_raw", "smart_250_raw", "smart_2_raw", "smart_198_raw", "smart_15_raw", "smart_193_raw", "smart_223_raw", "smart_3_raw", "smart_11_raw", "smart_191_raw"], -"svm_114.pkl":["smart_226_raw", "smart_188_raw", "smart_2_raw", "smart_11_raw", "smart_4_raw", "smart_193_raw", "smart_184_raw", "smart_194_raw", "smart_198_raw", "smart_13_raw"], -"svm_219.pkl":["smart_12_raw", "smart_22_raw", "smart_8_raw", "smart_191_raw", "smart_197_raw", "smart_254_raw", "smart_15_raw", "smart_193_raw", "smart_199_raw", "smart_225_raw", "smart_192_raw"], -"svm_168.pkl":["smart_255_raw", "smart_191_raw", "smart_193_raw", "smart_220_raw", "smart_5_raw", "smart_3_raw", "smart_222_raw", "smart_223_raw", "smart_197_raw", "smart_196_raw", "smart_22_raw"], -"svm_243.pkl":["smart_11_raw", "smart_255_raw", "smart_10_raw", "smart_189_raw", "smart_225_raw", "smart_240_raw", "smart_222_raw", "smart_197_raw", "smart_183_raw", "smart_198_raw", "smart_12_raw"], -"svm_195.pkl":["smart_183_raw", "smart_5_raw", "smart_11_raw", "smart_197_raw", "smart_15_raw", "smart_9_raw", "smart_4_raw", "smart_220_raw", "smart_12_raw", "smart_192_raw", "smart_240_raw"], -"svm_222.pkl":["smart_10_raw", "smart_13_raw", "smart_188_raw", "smart_15_raw", "smart_192_raw", "smart_224_raw", "smart_225_raw", "smart_187_raw", "smart_222_raw", "smart_220_raw", "smart_252_raw"], -"svm_62.pkl":["smart_196_raw", "smart_251_raw", "smart_187_raw", "smart_224_raw", "smart_11_raw", "smart_12_raw", "smart_8_raw", "smart_199_raw", "smart_220_raw", "smart_195_raw"], -"svm_151.pkl":["smart_187_raw", "smart_223_raw", "smart_200_raw", "smart_189_raw", "smart_251_raw", "smart_255_raw", "smart_222_raw", "smart_192_raw", "smart_12_raw", "smart_183_raw", "smart_22_raw"], -"svm_125.pkl":["smart_9_raw", "smart_252_raw", "smart_197_raw", "smart_251_raw", "smart_11_raw", "smart_12_raw", "smart_188_raw", "smart_240_raw", "smart_10_raw", "smart_223_raw"], -"svm_124.pkl":["smart_193_raw", "smart_187_raw", "smart_183_raw", "smart_11_raw", "smart_10_raw", "smart_8_raw", "smart_194_raw", "smart_189_raw", "smart_222_raw", "smart_191_raw"], -"svm_67.pkl":["smart_2_raw", "smart_8_raw", "smart_225_raw", "smart_240_raw", "smart_13_raw", "smart_5_raw", "smart_187_raw", "smart_198_raw", "smart_199_raw", "smart_3_raw"], -"svm_115.pkl":["smart_222_raw", "smart_193_raw", "smart_223_raw", "smart_195_raw", "smart_252_raw", "smart_189_raw", "smart_199_raw", "smart_187_raw", "smart_15_raw", "smart_184_raw"], -"svm_1.pkl":["smart_201_raw", "smart_8_raw", "smart_200_raw", "smart_252_raw", "smart_251_raw", "smart_187_raw", "smart_9_raw", "smart_188_raw", "smart_15_raw", "smart_184_raw"], -"svm_112.pkl":["smart_220_raw", "smart_197_raw", "smart_10_raw", "smart_188_raw", "smart_12_raw", "smart_4_raw", "smart_196_raw", "smart_3_raw", "smart_240_raw", "smart_225_raw"], -"svm_138.pkl":["smart_183_raw", "smart_10_raw", "smart_191_raw", "smart_195_raw", "smart_223_raw", "smart_189_raw", "smart_187_raw", "smart_255_raw", "smart_226_raw", "smart_8_raw"], -"svm_229.pkl":["smart_224_raw", "smart_8_raw", "smart_192_raw", "smart_220_raw", "smart_195_raw", "smart_183_raw", "smart_250_raw", "smart_187_raw", "smart_225_raw", "smart_4_raw", "smart_252_raw"], -"svm_145.pkl":["smart_190_raw", "smart_8_raw", "smart_226_raw", "smart_184_raw", "smart_225_raw", "smart_220_raw", "smart_193_raw", "smart_183_raw", "smart_201_raw", "smart_187_raw", "smart_2_raw"], -"svm_59.pkl":["smart_188_raw", "smart_11_raw", "smart_184_raw", "smart_2_raw", "smart_220_raw", "smart_198_raw", "smart_225_raw", "smart_240_raw", "smart_197_raw", "smart_251_raw"], -"svm_204.pkl":["smart_15_raw", "smart_240_raw", "smart_225_raw", "smart_223_raw", "smart_252_raw", "smart_22_raw", "smart_200_raw", "smart_13_raw", "smart_220_raw", "smart_198_raw", "smart_191_raw"], -"svm_88.pkl":["smart_198_raw", "smart_3_raw", "smart_8_raw", "smart_225_raw", "smart_251_raw", "smart_222_raw", "smart_188_raw", "smart_10_raw", "smart_240_raw", "smart_189_raw"], -"svm_182.pkl":["smart_10_raw", "smart_190_raw", "smart_250_raw", "smart_15_raw", "smart_193_raw", "smart_22_raw", "smart_200_raw", "smart_8_raw", "smart_4_raw", "smart_187_raw", "smart_9_raw"], -"svm_61.pkl":["smart_5_raw", "smart_12_raw", "smart_9_raw", "smart_198_raw", "smart_195_raw", "smart_252_raw", "smart_15_raw", "smart_240_raw", "smart_255_raw", "smart_224_raw"], -"svm_50.pkl":["smart_220_raw", "smart_5_raw", "smart_194_raw", "smart_250_raw", "smart_15_raw", "smart_240_raw", "smart_8_raw", "smart_198_raw", "smart_224_raw", "smart_191_raw"], -"svm_210.pkl":["smart_8_raw", "smart_15_raw", "smart_195_raw", "smart_224_raw", "smart_5_raw", "smart_191_raw", "smart_198_raw", "smart_225_raw", "smart_200_raw", "smart_251_raw", "smart_240_raw"], -"svm_16.pkl":["smart_222_raw", "smart_10_raw", "smart_250_raw", "smart_189_raw", "smart_191_raw", "smart_2_raw", "smart_5_raw", "smart_193_raw", "smart_9_raw", "smart_187_raw"], -"svm_85.pkl":["smart_252_raw", "smart_184_raw", "smart_9_raw", "smart_5_raw", "smart_254_raw", "smart_3_raw", "smart_195_raw", "smart_10_raw", "smart_12_raw", "smart_222_raw"], -"svm_36.pkl":["smart_201_raw", "smart_251_raw", "smart_184_raw", "smart_3_raw", "smart_5_raw", "smart_183_raw", "smart_194_raw", "smart_195_raw", "smart_224_raw", "smart_2_raw"], -"svm_33.pkl":["smart_223_raw", "smart_254_raw", "smart_225_raw", "smart_9_raw", "smart_199_raw", "smart_5_raw", "smart_189_raw", "smart_194_raw", "smart_240_raw", "smart_4_raw"], -"svm_3.pkl":["smart_225_raw", "smart_194_raw", "smart_3_raw", "smart_189_raw", "smart_9_raw", "smart_254_raw", "smart_240_raw", "smart_5_raw", "smart_255_raw", "smart_223_raw"], -"svm_93.pkl":["smart_8_raw", "smart_188_raw", "smart_5_raw", "smart_10_raw", "smart_222_raw", "smart_2_raw", "smart_254_raw", "smart_12_raw", "smart_193_raw", "smart_224_raw"], -"svm_120.pkl":["smart_189_raw", "smart_224_raw", "smart_222_raw", "smart_193_raw", "smart_5_raw", "smart_201_raw", "smart_8_raw", "smart_254_raw", "smart_194_raw", "smart_22_raw"], -"svm_128.pkl":["smart_195_raw", "smart_184_raw", "smart_251_raw", "smart_8_raw", "smart_5_raw", "smart_196_raw", "smart_10_raw", "smart_4_raw", "smart_225_raw", "smart_191_raw"], -"svm_212.pkl":["smart_225_raw", "smart_192_raw", "smart_10_raw", "smart_12_raw", "smart_222_raw", "smart_184_raw", "smart_13_raw", "smart_226_raw", "smart_5_raw", "smart_201_raw", "smart_22_raw"], -"svm_221.pkl":["smart_255_raw", "smart_2_raw", "smart_224_raw", "smart_192_raw", "smart_252_raw", "smart_13_raw", "smart_183_raw", "smart_193_raw", "smart_15_raw", "smart_199_raw", "smart_200_raw"], -"svm_223.pkl":["smart_4_raw", "smart_194_raw", "smart_9_raw", "smart_255_raw", "smart_188_raw", "smart_201_raw", "smart_3_raw", "smart_226_raw", "smart_192_raw", "smart_251_raw", "smart_191_raw"], -"svm_44.pkl":["smart_255_raw", "smart_11_raw", "smart_200_raw", "smart_3_raw", "smart_195_raw", "smart_201_raw", "smart_4_raw", "smart_5_raw", "smart_10_raw", "smart_191_raw"], -"svm_213.pkl":["smart_22_raw", "smart_191_raw", "smart_183_raw", "smart_4_raw", "smart_194_raw", "smart_255_raw", "smart_254_raw", "smart_193_raw", "smart_11_raw", "smart_10_raw", "smart_220_raw"], -"svm_131.pkl":["smart_22_raw", "smart_194_raw", "smart_184_raw", "smart_250_raw", "smart_10_raw", "smart_189_raw", "smart_183_raw", "smart_240_raw", "smart_12_raw", "smart_252_raw"], -"svm_6.pkl":["smart_194_raw", "smart_250_raw", "smart_223_raw", "smart_224_raw", "smart_184_raw", "smart_191_raw", "smart_201_raw", "smart_9_raw", "smart_252_raw", "smart_3_raw"], -"svm_161.pkl":["smart_255_raw", "smart_222_raw", "smart_226_raw", "smart_254_raw", "smart_183_raw", "smart_22_raw", "smart_12_raw", "smart_190_raw", "smart_11_raw", "smart_192_raw", "smart_251_raw"], -"svm_72.pkl":["smart_13_raw", "smart_184_raw", "smart_223_raw", "smart_240_raw", "smart_250_raw", "smart_251_raw", "smart_201_raw", "smart_196_raw", "smart_5_raw", "smart_4_raw"], -"svm_27.pkl":["smart_189_raw", "smart_188_raw", "smart_255_raw", "smart_251_raw", "smart_240_raw", "smart_15_raw", "smart_9_raw", "smart_191_raw", "smart_226_raw", "smart_10_raw"], -"svm_141.pkl":["smart_9_raw", "smart_191_raw", "smart_2_raw", "smart_226_raw", "smart_13_raw", "smart_22_raw", "smart_193_raw", "smart_222_raw", "smart_220_raw", "smart_225_raw", "smart_3_raw"], -"svm_57.pkl":["smart_12_raw", "smart_252_raw", "smart_190_raw", "smart_226_raw", "smart_10_raw", "smart_189_raw", "smart_193_raw", "smart_2_raw", "smart_9_raw", "smart_223_raw"], -"svm_236.pkl":["smart_200_raw", "smart_189_raw", "smart_226_raw", "smart_252_raw", "smart_250_raw", "smart_193_raw", "smart_13_raw", "smart_2_raw", "smart_254_raw", "smart_22_raw", "smart_9_raww"], -"svm_208.pkl":["smart_223_raw", "smart_15_raw", "smart_251_raw", "smart_5_raw", "smart_198_raw", "smart_252_raw", "smart_4_raw", "smart_8_raw", "smart_220_raw", "smart_254_raw", "smart_193_raw"], -"svm_230.pkl":["smart_184_raw", "smart_5_raw", "smart_191_raw", "smart_198_raw", "smart_11_raw", "smart_255_raw", "smart_189_raw", "smart_254_raw", "smart_196_raw", "smart_199_raw", "smart_223_raw"], -"svm_134.pkl":["smart_8_raw", "smart_194_raw", "smart_4_raw", "smart_189_raw", "smart_223_raw", "smart_5_raw", "smart_187_raw", "smart_9_raw", "smart_192_raw", "smart_220_raw"], -"svm_71.pkl":["smart_220_raw", "smart_13_raw", "smart_194_raw", "smart_197_raw", "smart_192_raw", "smart_22_raw", "smart_184_raw", "smart_199_raw", "smart_222_raw", "smart_183_raw"], -"svm_109.pkl":["smart_224_raw", "smart_252_raw", "smart_2_raw", "smart_200_raw", "smart_5_raw", "smart_194_raw", "smart_222_raw", "smart_198_raw", "smart_4_raw", "smart_13_raw"] +"hgst": ["user_capacity", "smart_1_normalized", "smart_1_raw", "smart_2_normalized", "smart_2_raw", "smart_3_normalized", "smart_3_raw", "smart_4_normalized", "smart_4_raw", "smart_5_normalized", "smart_5_raw", "smart_7_normalized", "smart_7_raw", "smart_8_normalized", "smart_8_raw", "smart_9_normalized", "smart_9_raw", "smart_10_normalized", "smart_10_raw", "smart_12_normalized", "smart_12_raw", "smart_22_normalized", "smart_22_raw", "smart_192_normalized", "smart_192_raw", "smart_193_normalized", "smart_193_raw", "smart_194_normalized", "smart_194_raw", "smart_196_normalized", "smart_196_raw", "smart_197_normalized", "smart_197_raw", "smart_198_normalized", "smart_198_raw", "smart_199_normalized", "smart_199_raw"], +"seagate": ["user_capacity", "smart_1_normalized", "smart_1_raw", "smart_5_normalized", "smart_5_raw", "smart_7_normalized", "smart_7_raw", "smart_9_normalized", "smart_9_raw", "smart_10_normalized", "smart_10_raw", "smart_184_normalized", "smart_184_raw", "smart_187_normalized", "smart_187_raw", "smart_188_normalized", "smart_188_raw", "smart_189_normalized", "smart_189_raw", "smart_190_normalized", "smart_190_raw", "smart_193_normalized", "smart_193_raw", "smart_194_normalized", "smart_194_raw", "smart_197_normalized", "smart_197_raw", "smart_198_normalized", "smart_198_raw", "smart_240_normalized", "smart_240_raw", "smart_241_normalized", "smart_241_raw", "smart_242_normalized", "smart_242_raw"] } diff --git a/src/pybind/mgr/diskprediction_local/models/hgst_predictor.joblib b/src/pybind/mgr/diskprediction_local/models/hgst_predictor.joblib new file mode 100644 index 000000000000..9e1c51f04795 Binary files /dev/null and b/src/pybind/mgr/diskprediction_local/models/hgst_predictor.joblib differ diff --git a/src/pybind/mgr/diskprediction_local/models/hgst_preprocessor.joblib b/src/pybind/mgr/diskprediction_local/models/hgst_preprocessor.joblib new file mode 100644 index 000000000000..2d94963ecd49 Binary files /dev/null and b/src/pybind/mgr/diskprediction_local/models/hgst_preprocessor.joblib differ diff --git a/src/pybind/mgr/diskprediction_local/models/seagate_predictor.joblib b/src/pybind/mgr/diskprediction_local/models/seagate_predictor.joblib new file mode 100644 index 000000000000..574223e668b9 Binary files /dev/null and b/src/pybind/mgr/diskprediction_local/models/seagate_predictor.joblib differ diff --git a/src/pybind/mgr/diskprediction_local/models/seagate_preprocessor.joblib b/src/pybind/mgr/diskprediction_local/models/seagate_preprocessor.joblib new file mode 100644 index 000000000000..34f96ab99d82 Binary files /dev/null and b/src/pybind/mgr/diskprediction_local/models/seagate_preprocessor.joblib differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_1.pkl b/src/pybind/mgr/diskprediction_local/models/svm_1.pkl deleted file mode 100644 index 5eb30f300e56..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_1.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_10.pkl b/src/pybind/mgr/diskprediction_local/models/svm_10.pkl deleted file mode 100644 index 9259c1e74334..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_10.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_104.pkl b/src/pybind/mgr/diskprediction_local/models/svm_104.pkl deleted file mode 100644 index d5d5cf5b7bc1..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_104.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_105.pkl b/src/pybind/mgr/diskprediction_local/models/svm_105.pkl deleted file mode 100644 index 4aadc3cfbf1d..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_105.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_109.pkl b/src/pybind/mgr/diskprediction_local/models/svm_109.pkl deleted file mode 100644 index c99c353be31f..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_109.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_112.pkl b/src/pybind/mgr/diskprediction_local/models/svm_112.pkl deleted file mode 100644 index 367a3304aa3b..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_112.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_114.pkl b/src/pybind/mgr/diskprediction_local/models/svm_114.pkl deleted file mode 100644 index 946d5cef1bab..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_114.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_115.pkl b/src/pybind/mgr/diskprediction_local/models/svm_115.pkl deleted file mode 100644 index ff834929ec65..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_115.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_118.pkl b/src/pybind/mgr/diskprediction_local/models/svm_118.pkl deleted file mode 100644 index eec8689ea63c..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_118.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_119.pkl b/src/pybind/mgr/diskprediction_local/models/svm_119.pkl deleted file mode 100644 index 6a26c050267c..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_119.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_12.pkl b/src/pybind/mgr/diskprediction_local/models/svm_12.pkl deleted file mode 100644 index 5cbe9775a153..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_12.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_120.pkl b/src/pybind/mgr/diskprediction_local/models/svm_120.pkl deleted file mode 100644 index d2041c267a41..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_120.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_123.pkl b/src/pybind/mgr/diskprediction_local/models/svm_123.pkl deleted file mode 100644 index 0ab6187e99da..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_123.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_124.pkl b/src/pybind/mgr/diskprediction_local/models/svm_124.pkl deleted file mode 100644 index 8f9ea4ec7c89..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_124.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_125.pkl b/src/pybind/mgr/diskprediction_local/models/svm_125.pkl deleted file mode 100644 index 4d49900f9322..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_125.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_128.pkl b/src/pybind/mgr/diskprediction_local/models/svm_128.pkl deleted file mode 100644 index 6a18726de2fe..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_128.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_131.pkl b/src/pybind/mgr/diskprediction_local/models/svm_131.pkl deleted file mode 100644 index e6a55dcaeced..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_131.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_134.pkl b/src/pybind/mgr/diskprediction_local/models/svm_134.pkl deleted file mode 100644 index 51171e00c594..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_134.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_138.pkl b/src/pybind/mgr/diskprediction_local/models/svm_138.pkl deleted file mode 100644 index bc98e0c7255f..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_138.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_14.pkl b/src/pybind/mgr/diskprediction_local/models/svm_14.pkl deleted file mode 100644 index c4547dc6394e..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_14.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_141.pkl b/src/pybind/mgr/diskprediction_local/models/svm_141.pkl deleted file mode 100644 index 86d9f38de313..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_141.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_145.pkl b/src/pybind/mgr/diskprediction_local/models/svm_145.pkl deleted file mode 100644 index 24ff96231037..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_145.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_151.pkl b/src/pybind/mgr/diskprediction_local/models/svm_151.pkl deleted file mode 100644 index 92bfd3f1b860..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_151.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_16.pkl b/src/pybind/mgr/diskprediction_local/models/svm_16.pkl deleted file mode 100644 index 11664b3dd01d..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_16.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_161.pkl b/src/pybind/mgr/diskprediction_local/models/svm_161.pkl deleted file mode 100644 index 2d421685e6f8..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_161.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_168.pkl b/src/pybind/mgr/diskprediction_local/models/svm_168.pkl deleted file mode 100644 index 12a811cfab59..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_168.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_169.pkl b/src/pybind/mgr/diskprediction_local/models/svm_169.pkl deleted file mode 100644 index 0c51446c689b..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_169.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_174.pkl b/src/pybind/mgr/diskprediction_local/models/svm_174.pkl deleted file mode 100644 index d2945ce9f9a7..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_174.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_18.pkl b/src/pybind/mgr/diskprediction_local/models/svm_18.pkl deleted file mode 100644 index d05520ccd878..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_18.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_182.pkl b/src/pybind/mgr/diskprediction_local/models/svm_182.pkl deleted file mode 100644 index 7fcfb3cbdec3..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_182.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_185.pkl b/src/pybind/mgr/diskprediction_local/models/svm_185.pkl deleted file mode 100644 index 785301c17963..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_185.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_186.pkl b/src/pybind/mgr/diskprediction_local/models/svm_186.pkl deleted file mode 100644 index 4ea83da7773c..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_186.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_195.pkl b/src/pybind/mgr/diskprediction_local/models/svm_195.pkl deleted file mode 100644 index 12273f7ce7e1..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_195.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_201.pkl b/src/pybind/mgr/diskprediction_local/models/svm_201.pkl deleted file mode 100644 index c866cf00e63f..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_201.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_204.pkl b/src/pybind/mgr/diskprediction_local/models/svm_204.pkl deleted file mode 100644 index 8cf1c3aa28ee..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_204.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_206.pkl b/src/pybind/mgr/diskprediction_local/models/svm_206.pkl deleted file mode 100644 index cba64e800496..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_206.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_208.pkl b/src/pybind/mgr/diskprediction_local/models/svm_208.pkl deleted file mode 100644 index ba0df0abdbd2..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_208.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_210.pkl b/src/pybind/mgr/diskprediction_local/models/svm_210.pkl deleted file mode 100644 index 6b5bee219e3a..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_210.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_212.pkl b/src/pybind/mgr/diskprediction_local/models/svm_212.pkl deleted file mode 100644 index 11eafc64c565..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_212.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_213.pkl b/src/pybind/mgr/diskprediction_local/models/svm_213.pkl deleted file mode 100644 index 0b8475c581cc..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_213.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_219.pkl b/src/pybind/mgr/diskprediction_local/models/svm_219.pkl deleted file mode 100644 index 4a248c14ca05..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_219.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_221.pkl b/src/pybind/mgr/diskprediction_local/models/svm_221.pkl deleted file mode 100644 index e37c6b4fb3d7..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_221.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_222.pkl b/src/pybind/mgr/diskprediction_local/models/svm_222.pkl deleted file mode 100644 index e54303863e09..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_222.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_223.pkl b/src/pybind/mgr/diskprediction_local/models/svm_223.pkl deleted file mode 100644 index 8b208f4e8237..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_223.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_225.pkl b/src/pybind/mgr/diskprediction_local/models/svm_225.pkl deleted file mode 100644 index 3f2b62984afb..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_225.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_227.pkl b/src/pybind/mgr/diskprediction_local/models/svm_227.pkl deleted file mode 100644 index 5e4fb56f4b75..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_227.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_229.pkl b/src/pybind/mgr/diskprediction_local/models/svm_229.pkl deleted file mode 100644 index 1e9c33599ed7..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_229.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_230.pkl b/src/pybind/mgr/diskprediction_local/models/svm_230.pkl deleted file mode 100644 index 36f8205cead5..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_230.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_234.pkl b/src/pybind/mgr/diskprediction_local/models/svm_234.pkl deleted file mode 100644 index 199f9ba5110c..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_234.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_235.pkl b/src/pybind/mgr/diskprediction_local/models/svm_235.pkl deleted file mode 100644 index d986526eca2d..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_235.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_236.pkl b/src/pybind/mgr/diskprediction_local/models/svm_236.pkl deleted file mode 100644 index 160e22fae38a..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_236.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_239.pkl b/src/pybind/mgr/diskprediction_local/models/svm_239.pkl deleted file mode 100644 index 8d98572acce8..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_239.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_243.pkl b/src/pybind/mgr/diskprediction_local/models/svm_243.pkl deleted file mode 100644 index 4fca95e1da0e..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_243.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_27.pkl b/src/pybind/mgr/diskprediction_local/models/svm_27.pkl deleted file mode 100644 index 011974ed1b94..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_27.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_3.pkl b/src/pybind/mgr/diskprediction_local/models/svm_3.pkl deleted file mode 100644 index e5e97a8888b2..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_3.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_33.pkl b/src/pybind/mgr/diskprediction_local/models/svm_33.pkl deleted file mode 100644 index e709d7b46e57..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_33.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_36.pkl b/src/pybind/mgr/diskprediction_local/models/svm_36.pkl deleted file mode 100644 index 3d87b8bd904c..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_36.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_44.pkl b/src/pybind/mgr/diskprediction_local/models/svm_44.pkl deleted file mode 100644 index 9abcece9239d..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_44.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_50.pkl b/src/pybind/mgr/diskprediction_local/models/svm_50.pkl deleted file mode 100644 index b7ce5eda94a8..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_50.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_57.pkl b/src/pybind/mgr/diskprediction_local/models/svm_57.pkl deleted file mode 100644 index fe7832894bb4..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_57.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_59.pkl b/src/pybind/mgr/diskprediction_local/models/svm_59.pkl deleted file mode 100644 index 76217777be8a..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_59.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_6.pkl b/src/pybind/mgr/diskprediction_local/models/svm_6.pkl deleted file mode 100644 index 4fb09d374642..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_6.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_61.pkl b/src/pybind/mgr/diskprediction_local/models/svm_61.pkl deleted file mode 100644 index 319fc5f457ba..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_61.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_62.pkl b/src/pybind/mgr/diskprediction_local/models/svm_62.pkl deleted file mode 100644 index 25b21aed63be..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_62.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_67.pkl b/src/pybind/mgr/diskprediction_local/models/svm_67.pkl deleted file mode 100644 index 1e6e7383a618..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_67.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_69.pkl b/src/pybind/mgr/diskprediction_local/models/svm_69.pkl deleted file mode 100644 index 22d349a7c5e6..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_69.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_71.pkl b/src/pybind/mgr/diskprediction_local/models/svm_71.pkl deleted file mode 100644 index e0760add9256..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_71.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_72.pkl b/src/pybind/mgr/diskprediction_local/models/svm_72.pkl deleted file mode 100644 index 5096aa8e4662..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_72.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_78.pkl b/src/pybind/mgr/diskprediction_local/models/svm_78.pkl deleted file mode 100644 index 7958f3b6c256..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_78.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_79.pkl b/src/pybind/mgr/diskprediction_local/models/svm_79.pkl deleted file mode 100644 index 2ed3a0fe9110..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_79.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_82.pkl b/src/pybind/mgr/diskprediction_local/models/svm_82.pkl deleted file mode 100644 index 2e1884094b77..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_82.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_85.pkl b/src/pybind/mgr/diskprediction_local/models/svm_85.pkl deleted file mode 100644 index 88161af56fa3..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_85.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_88.pkl b/src/pybind/mgr/diskprediction_local/models/svm_88.pkl deleted file mode 100644 index 715633982ced..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_88.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_93.pkl b/src/pybind/mgr/diskprediction_local/models/svm_93.pkl deleted file mode 100644 index 703429fe3c8b..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_93.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_97.pkl b/src/pybind/mgr/diskprediction_local/models/svm_97.pkl deleted file mode 100644 index 9653d20f397b..000000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_97.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/predictor.py b/src/pybind/mgr/diskprediction_local/predictor.py index bf9b0d7db16e..3ddd93466625 100644 --- a/src/pybind/mgr/diskprediction_local/predictor.py +++ b/src/pybind/mgr/diskprediction_local/predictor.py @@ -1,12 +1,13 @@ -"""Sample code for disk failure prediction. +"""Machine learning model for disk failure prediction. -This sample code is a community version for anyone who is interested in Machine -Learning and care about disk failure. +This class provides serves the disk failure prediction module. It uses the +models developed at the AICoE in the Office of the CTO at Red Hat. -This class provides a disk failure prediction module. Given models dirpath to -initialize a predictor instance and then use 6 days data to predict. Predict -function will return a string to indicate disk failure status: "Good", -"Warning", "Bad", or "Unknown". +An instance of the predictor is initialized by providing the path to trained +models. Then, to predict hard drive health and deduce time to failure, the +predict function is called with 6 days worth of SMART data from the hard drive. +It will return a string to indicate disk failure status: "Good", "Warning", +"Bad", or "Unknown". An example code is as follows: @@ -15,17 +16,15 @@ An example code is as follows: >>> if status: >>> model.predict(disk_days) 'Bad' - - -Provided by ProphetStor Data Services Inc. -http://www.prophetstor.com/ - """ - -from __future__ import print_function import os import json -import pickle +import joblib +import logging + +import numpy as np +import pandas as pd +from scipy import stats def get_diskfailurepredictor_path(): @@ -39,30 +38,33 @@ class DiskFailurePredictor(object): This class implements a disk failure prediction module. """ - + # json with manufacturer names as keys + # and features used for prediction as values CONFIG_FILE = "config.json" - EXCLUDED_ATTRS = ['smart_9_raw', 'smart_241_raw', 'smart_242_raw'] + PREDICTION_CLASSES = {-1: "Unknown", + 0: "Good", + 1: "Warning", + 2: "Bad"} + def __init__(self): """ This function may throw exception due to wrong file operation. """ - self.model_dirpath = "" self.model_context = {} + def initialize(self, model_dirpath): - """ - Initialize all models. + """Initialize all models. Save paths of all trained model files to list - Args: None + Arguments: + model_dirpath {str} -- path to directory of trained models Returns: - Error message. If all goes well, return an empty string. - - Raises: + str -- Error message. If all goes well, return None """ - + # read config file as json, if it exists config_path = os.path.join(model_dirpath, self.CONFIG_FILE) if not os.path.isfile(config_path): return "Missing config file: " + config_path @@ -70,196 +72,124 @@ class DiskFailurePredictor(object): with open(config_path) as f_conf: self.model_context = json.load(f_conf) - for model_name in self.model_context: - model_path = os.path.join(model_dirpath, model_name) - + # ensure all manufacturers whose context is defined in config file + # have models and preprocessors saved inside model_dirpath + for manufacturer in self.model_context: + preprocessor_path = os.path.join(model_dirpath, manufacturer + '_preprocessor.joblib') + if not os.path.isfile(preprocessor_path): + return "Missing preprocessor file: {}".format(preprocessor_path) + model_path = os.path.join(model_dirpath, manufacturer + '_predictor.joblib') if not os.path.isfile(model_path): - return "Missing model file: " + model_path + return "Missing model file: {}".format(model_path) self.model_dirpath = model_dirpath - def __preprocess(self, disk_days): - """ - Preprocess disk attributes. - - Args: - disk_days: Refer to function predict(...). - - Returns: - new_disk_days: Processed disk days. - """ - - req_attrs = [] - new_disk_days = [] - - attr_list = set.intersection(*[set(disk_day.keys()) - for disk_day in disk_days]) - for attr in attr_list: - if (attr.startswith('smart_') and attr.endswith('_raw')) and \ - attr not in self.EXCLUDED_ATTRS: - req_attrs.append(attr) - - for disk_day in disk_days: - new_disk_day = {} - for attr in req_attrs: - if float(disk_day[attr]) >= 0.0: - new_disk_day[attr] = disk_day[attr] - new_disk_days.append(new_disk_day) + def __format_raw_data(self, disk_days): + """Massages the input raw data into a form that can be used by the + predictor for preprocessing, feeding to model, etc. Specifically, + converts list of dictionaries to a pandas.DataFrame. - return new_disk_days - - @staticmethod - def __get_diff_attrs(disk_days): - """ - Get 5 days differential attributes. - - Args: - disk_days: Refer to function predict(...). + Arguments: + disk_days {list} -- list of n dictionaries representing SMART data + from the past n days. Value of n depends on the + Module defined in module.py Returns: - attr_list: All S.M.A.R.T. attributes used in given disk. Here we - use intersection set of all disk days. + pandas.DataFrame -- df where each row holds SMART attributes and + possibly other data for the drive from one day. + """ + # list of dictionaries to dataframe + df = pd.DataFrame(disk_days) - diff_disk_days: A list struct comprises 5 dictionaries, each - dictionary contains differential attributes. + # change from dict type {'bytes': 123} to just float64 type 123 + df['user_capacity'] = df['user_capacity'].apply(lambda x: x['bytes']) - Raises: - Exceptions of wrong list/dict operations. - """ + # change from dict type {'table': [{}, {}, {}]} to list type [{}, {}, {}] + df['ata_smart_attributes'] = df['ata_smart_attributes'].apply(lambda x: x['table']) - all_attrs = [set(disk_day.keys()) for disk_day in disk_days] - attr_list = list(set.intersection(*all_attrs)) - attr_list = disk_days[0].keys() - prev_days = disk_days[:-1] - curr_days = disk_days[1:] - diff_disk_days = [] + # make a separate column for raw and normalized values of each smart id + for day_idx in range(len(disk_days)): + for attr_dict in df.iloc[0]['ata_smart_attributes']: + smart_id = attr_dict['id'] + df.at[day_idx, 'smart_{}_raw'.format(smart_id)] = int(attr_dict['raw']['value']) + df.at[day_idx, 'smart_{}_normalized'.format(smart_id)] = int(attr_dict['value']) - for prev, cur in zip(prev_days, curr_days): - diff_disk_days.append({attr:(int(cur[attr]) - int(prev[attr])) - for attr in attr_list}) + # drop the now-redundant column + df = df.drop('ata_smart_attributes', axis=1) + return df - return attr_list, diff_disk_days - def __get_best_models(self, attr_list): - """ - Find the best model from model list according to given attribute list. + def __preprocess(self, disk_days_df): + """Scales and transforms input dataframe to feed it to prediction model - Args: - attr_list: All S.M.A.R.T. attributes used in given disk. + Arguments: + disk_days_df {pandas.DataFrame} -- df where each row holds drive + features from one day. Returns: - modelpath: The best model for the given attribute list. - model_attrlist: 'Ordered' attribute list of the returned model. - Must be aware that SMART attributes is in order. - - Raises: + numpy.ndarray -- (n, d) shaped array of n days worth of data and d + features, scaled """ - - models = self.model_context.keys() - - scores = [] - for model_name in models: - scores.append(sum(attr in attr_list - for attr in self.model_context[model_name])) - max_score = max(scores) - - # Skip if too few matched attributes. - if max_score < 3: - print("Too few matched attributes") + # preprocessing may vary across manufactueres. so get manufacturer + manufacturer = DiskFailurePredictor.__get_manufacturer(disk_days_df['model_name'].iloc[0]).lower() + + # keep only the features used for prediction for current manufacturer + try: + disk_days_df = disk_days_df[self.model_context[manufacturer]] + except KeyError as e: + # TODO: change to log.error + print("Either SMART attributes mismatch for hard drive and prediction model,\ + or 'model_name' not available in input data") + print(e) return None - best_models = {} - best_model_indices = [idx for idx, score in enumerate(scores) - if score > max_score - 2] - for model_idx in best_model_indices: - model_name = list(models)[model_idx] - model_path = os.path.join(self.model_dirpath, model_name) - model_attrlist = self.model_context[model_name] - best_models[model_path] = model_attrlist + # scale raw data + preprocessor_path = os.path.join(self.model_dirpath, manufacturer + '_preprocessor.joblib') + preprocessor = joblib.load(preprocessor_path) + disk_days_df = preprocessor.transform(disk_days_df) + return disk_days_df - return best_models - # return os.path.join(self.model_dirpath, model_name), model_attrlist @staticmethod - def __get_ordered_attrs(disk_days, model_attrlist): - """ - Return ordered attributes of given disk days. + def __get_manufacturer(model_name): + """Returns the manufacturer name for a given hard drive model name - Args: - disk_days: Unordered disk days. - model_attrlist: Model's ordered attribute list. + Arguments: + model_name {str} -- hard drive model name Returns: - ordered_attrs: Ordered disk days. - - Raises: None + str -- manufacturer name """ + if model_name.startswith("W"): + return "WDC" + elif model_name.startswith("T"): + return "Toshiba" + elif model_name.startswith("S"): + return "Seagate" + elif model_name.startswith("Hi"): + return "Hitachi" + else: + return "HGST" - ordered_attrs = [] - - for one_day in disk_days: - one_day_attrs = [] - - for attr in model_attrlist: - if attr in one_day: - one_day_attrs.append(one_day[attr]) - else: - one_day_attrs.append(0) - - ordered_attrs.append(one_day_attrs) - - return ordered_attrs def predict(self, disk_days): - """ - Predict using given 6-days disk S.M.A.R.T. attributes. - - Args: - disk_days: A list struct comprises 6 dictionaries. These - dictionaries store 'consecutive' days of disk SMART - attributes. - Returns: - A string indicates prediction result. One of following four strings - will be returned according to disk failure status: - (1) Good : Disk is health - (2) Warning : Disk has some symptoms but may not fail immediately - (3) Bad : Disk is in danger and data backup is highly recommended - (4) Unknown : Not enough data for prediction. - - Raises: - Pickle exceptions - """ - - all_pred = [] - - proc_disk_days = self.__preprocess(disk_days) - attr_list, diff_data = DiskFailurePredictor.__get_diff_attrs(proc_disk_days) - modellist = self.__get_best_models(attr_list) - if modellist is None: - return "Unknown" - - for modelpath in modellist: - model_attrlist = modellist[modelpath] - ordered_data = DiskFailurePredictor.__get_ordered_attrs( - diff_data, model_attrlist) - - try: - with open(modelpath, 'rb') as f_model: - clf = pickle.load(f_model) + # massage data into a format that can be fed to models + raw_df = self.__format_raw_data(disk_days) - except UnicodeDecodeError: - # Compatibility for python3 - with open(modelpath, 'rb') as f_model: - clf = pickle.load(f_model, encoding='latin1') + # preprocess + preprocessed_data = self.__preprocess(raw_df) + if preprocessed_data is None: + return DiskFailurePredictor.PREDICTION_CLASSES[-1] - pred = clf.predict(ordered_data) + # get model for current manufacturer + manufacturer = self.__get_manufacturer(raw_df['model_name'].iloc[0]).lower() + model_path = os.path.join(self.model_dirpath, manufacturer + '_predictor.joblib') + model = joblib.load(model_path) - all_pred.append(1 if any(pred) else 0) + # predictions for each day + preds = model.predict(preprocessed_data) - score = 2 ** sum(all_pred) - len(modellist) - if score > 10: - return "Bad" - if score > 4: - return "Warning" - return "Good" + # use majority vote to decide class. raise if a nan prediction exists + pred_class_id = stats.mode(preds, nan_policy='raise').mode[0] + return DiskFailurePredictor.PREDICTION_CLASSES[pred_class_id] diff --git a/src/pybind/mgr/diskprediction_local/requirements.txt b/src/pybind/mgr/diskprediction_local/requirements.txt index 92a5aa3ab0c1..8769b42e6010 100644 --- a/src/pybind/mgr/diskprediction_local/requirements.txt +++ b/src/pybind/mgr/diskprediction_local/requirements.txt @@ -1,3 +1,6 @@ -numpy==1.15.1 -scikit-learn==0.19.2 -scipy==1.1.0 \ No newline at end of file +numpy==1.16.4 +scipy==1.2.1 +pandas==0.25.0 +joblib==0.13.2 +scikit-learn==0.21.2 +rgf-python==3.6.0