From: Karanraj Chauhan Date: Wed, 31 Jul 2019 20:53:02 +0000 (-0400) Subject: mgr/diskprediction_local: Replaced old models and updated predictor. X-Git-Tag: v15.1.0~981^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=26ea7b670a9127c2e6f8640ada9efcb371a132d8;p=ceph.git mgr/diskprediction_local: Replaced old models and updated predictor. ProphetStor models are replaced with in-house developed models. Preprocessors are also stored in addition to the prediction models. Objects are now stored using joblib instead of pickle, as recommended by scikit-learn docs. "manufacturer-specific" models are used instead of "best-feature-match" models. i.e., instead of models being trained (presumably) just based on what features are available, models have been trained for each manufacturer. This is because of variation in meaning and availibility of SMART attributes across manufacturers. Updated config.json, requirements.txt, and DiskFailurePredictor for these changes. Signed-off-by: Karanraj Chauhan --- diff --git a/src/pybind/mgr/diskprediction_local/models/config.json b/src/pybind/mgr/diskprediction_local/models/config.json index 9a1485ca35d..f77cb11e06d 100644 --- a/src/pybind/mgr/diskprediction_local/models/config.json +++ b/src/pybind/mgr/diskprediction_local/models/config.json @@ -1,77 +1,4 @@ { -"svm_123.pkl": ["smart_197_raw", "smart_183_raw", "smart_200_raw", "smart_194_raw", "smart_254_raw", "smart_252_raw", "smart_4_raw", "smart_222_raw", "smart_187_raw", "smart_184_raw"], -"svm_105.pkl": ["smart_197_raw", "smart_4_raw", "smart_5_raw", "smart_252_raw", "smart_184_raw", "smart_223_raw", "smart_198_raw", "smart_10_raw", "smart_189_raw", "smart_222_raw"], -"svm_82.pkl":["smart_184_raw", "smart_2_raw", "smart_187_raw", "smart_225_raw", "smart_198_raw", "smart_197_raw", "smart_4_raw", "smart_13_raw", "smart_188_raw", "smart_251_raw"], -"svm_186.pkl":["smart_3_raw", "smart_11_raw", "smart_198_raw", "smart_250_raw", "smart_13_raw", "smart_200_raw", "smart_224_raw", "smart_187_raw", "smart_22_raw", "smart_4_raw", "smart_220_raw"], -"svm_14.pkl":["smart_12_raw", "smart_226_raw", "smart_187_raw", "smart_196_raw", "smart_5_raw", "smart_183_raw", "smart_255_raw", "smart_250_raw", "smart_201_raw", "smart_8_raw"], -"svm_10.pkl":["smart_251_raw", "smart_4_raw", "smart_223_raw", "smart_13_raw", "smart_255_raw", "smart_188_raw", "smart_197_raw", "smart_201_raw", "smart_250_raw", "smart_15_raw"], -"svm_235.pkl":["smart_15_raw", "smart_255_raw", "smart_252_raw", "smart_197_raw", "smart_250_raw", "smart_254_raw", "smart_13_raw", "smart_251_raw", "smart_198_raw", "smart_189_raw", "smart_191_raw"], -"svm_234.pkl":["smart_187_raw", "smart_183_raw", "smart_3_raw", "smart_4_raw", "smart_222_raw", "smart_184_raw", "smart_5_raw", "smart_198_raw", "smart_200_raw", "smart_8_raw", "smart_10_raw"], -"svm_119.pkl":["smart_254_raw", "smart_8_raw", "smart_183_raw", "smart_184_raw", "smart_195_raw", "smart_252_raw", "smart_191_raw", "smart_10_raw", "smart_200_raw", "smart_197_raw"], -"svm_227.pkl":["smart_254_raw", "smart_189_raw", "smart_225_raw", "smart_224_raw", "smart_197_raw", "smart_223_raw", "smart_4_raw", "smart_183_raw", "smart_11_raw", "smart_184_raw", "smart_13_raw"], -"svm_18.pkl":["smart_197_raw", "smart_3_raw", "smart_220_raw", "smart_193_raw", "smart_10_raw", "smart_187_raw", "smart_188_raw", "smart_225_raw", "smart_194_raw", "smart_13_raw"], -"svm_78.pkl":["smart_10_raw", "smart_183_raw", "smart_191_raw", "smart_13_raw", "smart_198_raw", "smart_22_raw", "smart_195_raw", "smart_12_raw", "smart_224_raw", "smart_200_raw"], -"svm_239.pkl":["smart_3_raw", "smart_254_raw", "smart_199_raw", "smart_225_raw", "smart_187_raw", "smart_195_raw", "smart_197_raw", "smart_2_raw", "smart_193_raw", "smart_220_raw", "smart_183_raw"], -"svm_174.pkl":["smart_183_raw", "smart_196_raw", "smart_225_raw", "smart_189_raw", "smart_4_raw", "smart_3_raw", "smart_9_raw", "smart_198_raw", "smart_15_raw", "smart_5_raw", "smart_194_raw"], -"svm_104.pkl":["smart_12_raw", "smart_198_raw", "smart_197_raw", "smart_4_raw", "smart_240_raw", "smart_187_raw", "smart_225_raw", "smart_8_raw", "smart_3_raw", "smart_2_raw"], -"svm_12.pkl":["smart_222_raw", "smart_251_raw", "smart_194_raw", "smart_9_raw", "smart_184_raw", "smart_191_raw", "smart_187_raw", "smart_255_raw", "smart_4_raw", "smart_11_raw"], -"svm_97.pkl":["smart_15_raw", "smart_197_raw", "smart_190_raw", "smart_199_raw", "smart_200_raw", "smart_12_raw", "smart_191_raw", "smart_254_raw", "smart_194_raw", "smart_201_raw"], -"svm_118.pkl":["smart_11_raw", "smart_225_raw", "smart_196_raw", "smart_197_raw", "smart_198_raw", "smart_200_raw", "smart_3_raw", "smart_10_raw", "smart_191_raw", "smart_22_raw"], -"svm_185.pkl":["smart_191_raw", "smart_254_raw", "smart_3_raw", "smart_190_raw", "smart_15_raw", "smart_22_raw", "smart_2_raw", "smart_198_raw", "smart_13_raw", "smart_226_raw", "smart_225_raw"], -"svm_206.pkl":["smart_183_raw", "smart_192_raw", "smart_197_raw", "smart_255_raw", "smart_187_raw", "smart_254_raw", "smart_198_raw", "smart_13_raw", "smart_226_raw", "smart_240_raw", "smart_8_raw"], -"svm_225.pkl":["smart_224_raw", "smart_11_raw", "smart_5_raw", "smart_4_raw", "smart_225_raw", "smart_197_raw", "smart_15_raw", "smart_183_raw", "smart_193_raw", "smart_190_raw", "smart_187_raw"], -"svm_169.pkl":["smart_252_raw", "smart_183_raw", "smart_254_raw", "smart_11_raw", "smart_193_raw", "smart_22_raw", "smart_226_raw", "smart_189_raw", "smart_225_raw", "smart_198_raw", "smart_200_raw"], -"svm_79.pkl":["smart_184_raw", "smart_196_raw", "smart_4_raw", "smart_226_raw", "smart_199_raw", "smart_187_raw", "smart_193_raw", "smart_188_raw", "smart_12_raw", "smart_250_raw"], -"svm_69.pkl":["smart_187_raw", "smart_9_raw", "smart_200_raw", "smart_11_raw", "smart_252_raw", "smart_189_raw", "smart_4_raw", "smart_188_raw", "smart_255_raw", "smart_201_raw"], -"svm_201.pkl":["smart_224_raw", "smart_8_raw", "smart_250_raw", "smart_2_raw", "smart_198_raw", "smart_15_raw", "smart_193_raw", "smart_223_raw", "smart_3_raw", "smart_11_raw", "smart_191_raw"], -"svm_114.pkl":["smart_226_raw", "smart_188_raw", "smart_2_raw", "smart_11_raw", "smart_4_raw", "smart_193_raw", "smart_184_raw", "smart_194_raw", "smart_198_raw", "smart_13_raw"], -"svm_219.pkl":["smart_12_raw", "smart_22_raw", "smart_8_raw", "smart_191_raw", "smart_197_raw", "smart_254_raw", "smart_15_raw", "smart_193_raw", "smart_199_raw", "smart_225_raw", "smart_192_raw"], -"svm_168.pkl":["smart_255_raw", "smart_191_raw", "smart_193_raw", "smart_220_raw", "smart_5_raw", "smart_3_raw", "smart_222_raw", "smart_223_raw", "smart_197_raw", "smart_196_raw", "smart_22_raw"], -"svm_243.pkl":["smart_11_raw", "smart_255_raw", "smart_10_raw", "smart_189_raw", "smart_225_raw", "smart_240_raw", "smart_222_raw", "smart_197_raw", "smart_183_raw", "smart_198_raw", "smart_12_raw"], -"svm_195.pkl":["smart_183_raw", "smart_5_raw", "smart_11_raw", "smart_197_raw", "smart_15_raw", "smart_9_raw", "smart_4_raw", "smart_220_raw", "smart_12_raw", "smart_192_raw", "smart_240_raw"], -"svm_222.pkl":["smart_10_raw", "smart_13_raw", "smart_188_raw", "smart_15_raw", "smart_192_raw", "smart_224_raw", "smart_225_raw", "smart_187_raw", "smart_222_raw", "smart_220_raw", "smart_252_raw"], -"svm_62.pkl":["smart_196_raw", "smart_251_raw", "smart_187_raw", "smart_224_raw", "smart_11_raw", "smart_12_raw", "smart_8_raw", "smart_199_raw", "smart_220_raw", "smart_195_raw"], -"svm_151.pkl":["smart_187_raw", "smart_223_raw", "smart_200_raw", "smart_189_raw", "smart_251_raw", "smart_255_raw", "smart_222_raw", "smart_192_raw", "smart_12_raw", "smart_183_raw", "smart_22_raw"], -"svm_125.pkl":["smart_9_raw", "smart_252_raw", "smart_197_raw", "smart_251_raw", "smart_11_raw", "smart_12_raw", "smart_188_raw", "smart_240_raw", "smart_10_raw", "smart_223_raw"], -"svm_124.pkl":["smart_193_raw", "smart_187_raw", "smart_183_raw", "smart_11_raw", "smart_10_raw", "smart_8_raw", "smart_194_raw", "smart_189_raw", "smart_222_raw", "smart_191_raw"], -"svm_67.pkl":["smart_2_raw", "smart_8_raw", "smart_225_raw", "smart_240_raw", "smart_13_raw", "smart_5_raw", "smart_187_raw", "smart_198_raw", "smart_199_raw", "smart_3_raw"], -"svm_115.pkl":["smart_222_raw", "smart_193_raw", "smart_223_raw", "smart_195_raw", "smart_252_raw", "smart_189_raw", "smart_199_raw", "smart_187_raw", "smart_15_raw", "smart_184_raw"], -"svm_1.pkl":["smart_201_raw", "smart_8_raw", "smart_200_raw", "smart_252_raw", "smart_251_raw", "smart_187_raw", "smart_9_raw", "smart_188_raw", "smart_15_raw", "smart_184_raw"], -"svm_112.pkl":["smart_220_raw", "smart_197_raw", "smart_10_raw", "smart_188_raw", "smart_12_raw", "smart_4_raw", "smart_196_raw", "smart_3_raw", "smart_240_raw", "smart_225_raw"], -"svm_138.pkl":["smart_183_raw", "smart_10_raw", "smart_191_raw", "smart_195_raw", "smart_223_raw", "smart_189_raw", "smart_187_raw", "smart_255_raw", "smart_226_raw", "smart_8_raw"], -"svm_229.pkl":["smart_224_raw", "smart_8_raw", "smart_192_raw", "smart_220_raw", "smart_195_raw", "smart_183_raw", "smart_250_raw", "smart_187_raw", "smart_225_raw", "smart_4_raw", "smart_252_raw"], -"svm_145.pkl":["smart_190_raw", "smart_8_raw", "smart_226_raw", "smart_184_raw", "smart_225_raw", "smart_220_raw", "smart_193_raw", "smart_183_raw", "smart_201_raw", "smart_187_raw", "smart_2_raw"], -"svm_59.pkl":["smart_188_raw", "smart_11_raw", "smart_184_raw", "smart_2_raw", "smart_220_raw", "smart_198_raw", "smart_225_raw", "smart_240_raw", "smart_197_raw", "smart_251_raw"], -"svm_204.pkl":["smart_15_raw", "smart_240_raw", "smart_225_raw", "smart_223_raw", "smart_252_raw", "smart_22_raw", "smart_200_raw", "smart_13_raw", "smart_220_raw", "smart_198_raw", "smart_191_raw"], -"svm_88.pkl":["smart_198_raw", "smart_3_raw", "smart_8_raw", "smart_225_raw", "smart_251_raw", "smart_222_raw", "smart_188_raw", "smart_10_raw", "smart_240_raw", "smart_189_raw"], -"svm_182.pkl":["smart_10_raw", "smart_190_raw", "smart_250_raw", "smart_15_raw", "smart_193_raw", "smart_22_raw", "smart_200_raw", "smart_8_raw", "smart_4_raw", "smart_187_raw", "smart_9_raw"], -"svm_61.pkl":["smart_5_raw", "smart_12_raw", "smart_9_raw", "smart_198_raw", "smart_195_raw", "smart_252_raw", "smart_15_raw", "smart_240_raw", "smart_255_raw", "smart_224_raw"], -"svm_50.pkl":["smart_220_raw", "smart_5_raw", "smart_194_raw", "smart_250_raw", "smart_15_raw", "smart_240_raw", "smart_8_raw", "smart_198_raw", "smart_224_raw", "smart_191_raw"], -"svm_210.pkl":["smart_8_raw", "smart_15_raw", "smart_195_raw", "smart_224_raw", "smart_5_raw", "smart_191_raw", "smart_198_raw", "smart_225_raw", "smart_200_raw", "smart_251_raw", "smart_240_raw"], -"svm_16.pkl":["smart_222_raw", "smart_10_raw", "smart_250_raw", "smart_189_raw", "smart_191_raw", "smart_2_raw", "smart_5_raw", "smart_193_raw", "smart_9_raw", "smart_187_raw"], -"svm_85.pkl":["smart_252_raw", "smart_184_raw", "smart_9_raw", "smart_5_raw", "smart_254_raw", "smart_3_raw", "smart_195_raw", "smart_10_raw", "smart_12_raw", "smart_222_raw"], -"svm_36.pkl":["smart_201_raw", "smart_251_raw", "smart_184_raw", "smart_3_raw", "smart_5_raw", "smart_183_raw", "smart_194_raw", "smart_195_raw", "smart_224_raw", "smart_2_raw"], -"svm_33.pkl":["smart_223_raw", "smart_254_raw", "smart_225_raw", "smart_9_raw", "smart_199_raw", "smart_5_raw", "smart_189_raw", "smart_194_raw", "smart_240_raw", "smart_4_raw"], -"svm_3.pkl":["smart_225_raw", "smart_194_raw", "smart_3_raw", "smart_189_raw", "smart_9_raw", "smart_254_raw", "smart_240_raw", "smart_5_raw", "smart_255_raw", "smart_223_raw"], -"svm_93.pkl":["smart_8_raw", "smart_188_raw", "smart_5_raw", "smart_10_raw", "smart_222_raw", "smart_2_raw", "smart_254_raw", "smart_12_raw", "smart_193_raw", "smart_224_raw"], -"svm_120.pkl":["smart_189_raw", "smart_224_raw", "smart_222_raw", "smart_193_raw", "smart_5_raw", "smart_201_raw", "smart_8_raw", "smart_254_raw", "smart_194_raw", "smart_22_raw"], -"svm_128.pkl":["smart_195_raw", "smart_184_raw", "smart_251_raw", "smart_8_raw", "smart_5_raw", "smart_196_raw", "smart_10_raw", "smart_4_raw", "smart_225_raw", "smart_191_raw"], -"svm_212.pkl":["smart_225_raw", "smart_192_raw", "smart_10_raw", "smart_12_raw", "smart_222_raw", "smart_184_raw", "smart_13_raw", "smart_226_raw", "smart_5_raw", "smart_201_raw", "smart_22_raw"], -"svm_221.pkl":["smart_255_raw", "smart_2_raw", "smart_224_raw", "smart_192_raw", "smart_252_raw", "smart_13_raw", "smart_183_raw", "smart_193_raw", "smart_15_raw", "smart_199_raw", "smart_200_raw"], -"svm_223.pkl":["smart_4_raw", "smart_194_raw", "smart_9_raw", "smart_255_raw", "smart_188_raw", "smart_201_raw", "smart_3_raw", "smart_226_raw", "smart_192_raw", "smart_251_raw", "smart_191_raw"], -"svm_44.pkl":["smart_255_raw", "smart_11_raw", "smart_200_raw", "smart_3_raw", "smart_195_raw", "smart_201_raw", "smart_4_raw", "smart_5_raw", "smart_10_raw", "smart_191_raw"], -"svm_213.pkl":["smart_22_raw", "smart_191_raw", "smart_183_raw", "smart_4_raw", "smart_194_raw", "smart_255_raw", "smart_254_raw", "smart_193_raw", "smart_11_raw", "smart_10_raw", "smart_220_raw"], -"svm_131.pkl":["smart_22_raw", "smart_194_raw", "smart_184_raw", "smart_250_raw", "smart_10_raw", "smart_189_raw", "smart_183_raw", "smart_240_raw", "smart_12_raw", "smart_252_raw"], -"svm_6.pkl":["smart_194_raw", "smart_250_raw", "smart_223_raw", "smart_224_raw", "smart_184_raw", "smart_191_raw", "smart_201_raw", "smart_9_raw", "smart_252_raw", "smart_3_raw"], -"svm_161.pkl":["smart_255_raw", "smart_222_raw", "smart_226_raw", "smart_254_raw", "smart_183_raw", "smart_22_raw", "smart_12_raw", "smart_190_raw", "smart_11_raw", "smart_192_raw", "smart_251_raw"], -"svm_72.pkl":["smart_13_raw", "smart_184_raw", "smart_223_raw", "smart_240_raw", "smart_250_raw", "smart_251_raw", "smart_201_raw", "smart_196_raw", "smart_5_raw", "smart_4_raw"], -"svm_27.pkl":["smart_189_raw", "smart_188_raw", "smart_255_raw", "smart_251_raw", "smart_240_raw", "smart_15_raw", "smart_9_raw", "smart_191_raw", "smart_226_raw", "smart_10_raw"], -"svm_141.pkl":["smart_9_raw", "smart_191_raw", "smart_2_raw", "smart_226_raw", "smart_13_raw", "smart_22_raw", "smart_193_raw", "smart_222_raw", "smart_220_raw", "smart_225_raw", "smart_3_raw"], -"svm_57.pkl":["smart_12_raw", "smart_252_raw", "smart_190_raw", "smart_226_raw", "smart_10_raw", "smart_189_raw", "smart_193_raw", "smart_2_raw", "smart_9_raw", "smart_223_raw"], -"svm_236.pkl":["smart_200_raw", "smart_189_raw", "smart_226_raw", "smart_252_raw", "smart_250_raw", "smart_193_raw", "smart_13_raw", "smart_2_raw", "smart_254_raw", "smart_22_raw", "smart_9_raww"], -"svm_208.pkl":["smart_223_raw", "smart_15_raw", "smart_251_raw", "smart_5_raw", "smart_198_raw", "smart_252_raw", "smart_4_raw", "smart_8_raw", "smart_220_raw", "smart_254_raw", "smart_193_raw"], -"svm_230.pkl":["smart_184_raw", "smart_5_raw", "smart_191_raw", "smart_198_raw", "smart_11_raw", "smart_255_raw", "smart_189_raw", "smart_254_raw", "smart_196_raw", "smart_199_raw", "smart_223_raw"], -"svm_134.pkl":["smart_8_raw", "smart_194_raw", "smart_4_raw", "smart_189_raw", "smart_223_raw", "smart_5_raw", "smart_187_raw", "smart_9_raw", "smart_192_raw", "smart_220_raw"], -"svm_71.pkl":["smart_220_raw", "smart_13_raw", "smart_194_raw", "smart_197_raw", "smart_192_raw", "smart_22_raw", "smart_184_raw", "smart_199_raw", "smart_222_raw", "smart_183_raw"], -"svm_109.pkl":["smart_224_raw", "smart_252_raw", "smart_2_raw", "smart_200_raw", "smart_5_raw", "smart_194_raw", "smart_222_raw", "smart_198_raw", "smart_4_raw", "smart_13_raw"] +"hgst": ["user_capacity", "smart_1_normalized", "smart_1_raw", "smart_2_normalized", "smart_2_raw", "smart_3_normalized", "smart_3_raw", "smart_4_normalized", "smart_4_raw", "smart_5_normalized", "smart_5_raw", "smart_7_normalized", "smart_7_raw", "smart_8_normalized", "smart_8_raw", "smart_9_normalized", "smart_9_raw", "smart_10_normalized", "smart_10_raw", "smart_12_normalized", "smart_12_raw", "smart_22_normalized", "smart_22_raw", "smart_192_normalized", "smart_192_raw", "smart_193_normalized", "smart_193_raw", "smart_194_normalized", "smart_194_raw", "smart_196_normalized", "smart_196_raw", "smart_197_normalized", "smart_197_raw", "smart_198_normalized", "smart_198_raw", "smart_199_normalized", "smart_199_raw"], +"seagate": ["user_capacity", "smart_1_normalized", "smart_1_raw", "smart_5_normalized", "smart_5_raw", "smart_7_normalized", "smart_7_raw", "smart_9_normalized", "smart_9_raw", "smart_10_normalized", "smart_10_raw", "smart_184_normalized", "smart_184_raw", "smart_187_normalized", "smart_187_raw", "smart_188_normalized", "smart_188_raw", "smart_189_normalized", "smart_189_raw", "smart_190_normalized", "smart_190_raw", "smart_193_normalized", "smart_193_raw", "smart_194_normalized", "smart_194_raw", "smart_197_normalized", "smart_197_raw", "smart_198_normalized", "smart_198_raw", "smart_240_normalized", "smart_240_raw", "smart_241_normalized", "smart_241_raw", "smart_242_normalized", "smart_242_raw"] } diff --git a/src/pybind/mgr/diskprediction_local/models/hgst_predictor.joblib b/src/pybind/mgr/diskprediction_local/models/hgst_predictor.joblib new file mode 100644 index 00000000000..9e1c51f0479 Binary files /dev/null and b/src/pybind/mgr/diskprediction_local/models/hgst_predictor.joblib differ diff --git a/src/pybind/mgr/diskprediction_local/models/hgst_preprocessor.joblib b/src/pybind/mgr/diskprediction_local/models/hgst_preprocessor.joblib new file mode 100644 index 00000000000..2d94963ecd4 Binary files /dev/null and b/src/pybind/mgr/diskprediction_local/models/hgst_preprocessor.joblib differ diff --git a/src/pybind/mgr/diskprediction_local/models/seagate_predictor.joblib b/src/pybind/mgr/diskprediction_local/models/seagate_predictor.joblib new file mode 100644 index 00000000000..574223e668b Binary files /dev/null and b/src/pybind/mgr/diskprediction_local/models/seagate_predictor.joblib differ diff --git a/src/pybind/mgr/diskprediction_local/models/seagate_preprocessor.joblib b/src/pybind/mgr/diskprediction_local/models/seagate_preprocessor.joblib new file mode 100644 index 00000000000..34f96ab99d8 Binary files /dev/null and b/src/pybind/mgr/diskprediction_local/models/seagate_preprocessor.joblib differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_1.pkl b/src/pybind/mgr/diskprediction_local/models/svm_1.pkl deleted file mode 100644 index 5eb30f300e5..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_1.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_10.pkl b/src/pybind/mgr/diskprediction_local/models/svm_10.pkl deleted file mode 100644 index 9259c1e7433..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_10.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_104.pkl b/src/pybind/mgr/diskprediction_local/models/svm_104.pkl deleted file mode 100644 index d5d5cf5b7bc..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_104.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_105.pkl b/src/pybind/mgr/diskprediction_local/models/svm_105.pkl deleted file mode 100644 index 4aadc3cfbf1..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_105.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_109.pkl b/src/pybind/mgr/diskprediction_local/models/svm_109.pkl deleted file mode 100644 index c99c353be31..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_109.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_112.pkl b/src/pybind/mgr/diskprediction_local/models/svm_112.pkl deleted file mode 100644 index 367a3304aa3..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_112.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_114.pkl b/src/pybind/mgr/diskprediction_local/models/svm_114.pkl deleted file mode 100644 index 946d5cef1ba..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_114.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_115.pkl b/src/pybind/mgr/diskprediction_local/models/svm_115.pkl deleted file mode 100644 index ff834929ec6..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_115.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_118.pkl b/src/pybind/mgr/diskprediction_local/models/svm_118.pkl deleted file mode 100644 index eec8689ea63..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_118.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_119.pkl b/src/pybind/mgr/diskprediction_local/models/svm_119.pkl deleted file mode 100644 index 6a26c050267..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_119.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_12.pkl b/src/pybind/mgr/diskprediction_local/models/svm_12.pkl deleted file mode 100644 index 5cbe9775a15..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_12.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_120.pkl b/src/pybind/mgr/diskprediction_local/models/svm_120.pkl deleted file mode 100644 index d2041c267a4..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_120.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_123.pkl b/src/pybind/mgr/diskprediction_local/models/svm_123.pkl deleted file mode 100644 index 0ab6187e99d..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_123.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_124.pkl b/src/pybind/mgr/diskprediction_local/models/svm_124.pkl deleted file mode 100644 index 8f9ea4ec7c8..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_124.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_125.pkl b/src/pybind/mgr/diskprediction_local/models/svm_125.pkl deleted file mode 100644 index 4d49900f932..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_125.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_128.pkl b/src/pybind/mgr/diskprediction_local/models/svm_128.pkl deleted file mode 100644 index 6a18726de2f..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_128.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_131.pkl b/src/pybind/mgr/diskprediction_local/models/svm_131.pkl deleted file mode 100644 index e6a55dcaece..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_131.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_134.pkl b/src/pybind/mgr/diskprediction_local/models/svm_134.pkl deleted file mode 100644 index 51171e00c59..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_134.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_138.pkl b/src/pybind/mgr/diskprediction_local/models/svm_138.pkl deleted file mode 100644 index bc98e0c7255..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_138.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_14.pkl b/src/pybind/mgr/diskprediction_local/models/svm_14.pkl deleted file mode 100644 index c4547dc6394..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_14.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_141.pkl b/src/pybind/mgr/diskprediction_local/models/svm_141.pkl deleted file mode 100644 index 86d9f38de31..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_141.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_145.pkl b/src/pybind/mgr/diskprediction_local/models/svm_145.pkl deleted file mode 100644 index 24ff9623103..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_145.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_151.pkl b/src/pybind/mgr/diskprediction_local/models/svm_151.pkl deleted file mode 100644 index 92bfd3f1b86..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_151.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_16.pkl b/src/pybind/mgr/diskprediction_local/models/svm_16.pkl deleted file mode 100644 index 11664b3dd01..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_16.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_161.pkl b/src/pybind/mgr/diskprediction_local/models/svm_161.pkl deleted file mode 100644 index 2d421685e6f..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_161.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_168.pkl b/src/pybind/mgr/diskprediction_local/models/svm_168.pkl deleted file mode 100644 index 12a811cfab5..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_168.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_169.pkl b/src/pybind/mgr/diskprediction_local/models/svm_169.pkl deleted file mode 100644 index 0c51446c689..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_169.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_174.pkl b/src/pybind/mgr/diskprediction_local/models/svm_174.pkl deleted file mode 100644 index d2945ce9f9a..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_174.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_18.pkl b/src/pybind/mgr/diskprediction_local/models/svm_18.pkl deleted file mode 100644 index d05520ccd87..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_18.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_182.pkl b/src/pybind/mgr/diskprediction_local/models/svm_182.pkl deleted file mode 100644 index 7fcfb3cbdec..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_182.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_185.pkl b/src/pybind/mgr/diskprediction_local/models/svm_185.pkl deleted file mode 100644 index 785301c1796..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_185.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_186.pkl b/src/pybind/mgr/diskprediction_local/models/svm_186.pkl deleted file mode 100644 index 4ea83da7773..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_186.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_195.pkl b/src/pybind/mgr/diskprediction_local/models/svm_195.pkl deleted file mode 100644 index 12273f7ce7e..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_195.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_201.pkl b/src/pybind/mgr/diskprediction_local/models/svm_201.pkl deleted file mode 100644 index c866cf00e63..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_201.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_204.pkl b/src/pybind/mgr/diskprediction_local/models/svm_204.pkl deleted file mode 100644 index 8cf1c3aa28e..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_204.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_206.pkl b/src/pybind/mgr/diskprediction_local/models/svm_206.pkl deleted file mode 100644 index cba64e80049..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_206.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_208.pkl b/src/pybind/mgr/diskprediction_local/models/svm_208.pkl deleted file mode 100644 index ba0df0abdbd..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_208.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_210.pkl b/src/pybind/mgr/diskprediction_local/models/svm_210.pkl deleted file mode 100644 index 6b5bee219e3..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_210.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_212.pkl b/src/pybind/mgr/diskprediction_local/models/svm_212.pkl deleted file mode 100644 index 11eafc64c56..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_212.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_213.pkl b/src/pybind/mgr/diskprediction_local/models/svm_213.pkl deleted file mode 100644 index 0b8475c581c..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_213.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_219.pkl b/src/pybind/mgr/diskprediction_local/models/svm_219.pkl deleted file mode 100644 index 4a248c14ca0..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_219.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_221.pkl b/src/pybind/mgr/diskprediction_local/models/svm_221.pkl deleted file mode 100644 index e37c6b4fb3d..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_221.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_222.pkl b/src/pybind/mgr/diskprediction_local/models/svm_222.pkl deleted file mode 100644 index e54303863e0..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_222.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_223.pkl b/src/pybind/mgr/diskprediction_local/models/svm_223.pkl deleted file mode 100644 index 8b208f4e823..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_223.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_225.pkl b/src/pybind/mgr/diskprediction_local/models/svm_225.pkl deleted file mode 100644 index 3f2b62984af..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_225.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_227.pkl b/src/pybind/mgr/diskprediction_local/models/svm_227.pkl deleted file mode 100644 index 5e4fb56f4b7..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_227.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_229.pkl b/src/pybind/mgr/diskprediction_local/models/svm_229.pkl deleted file mode 100644 index 1e9c33599ed..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_229.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_230.pkl b/src/pybind/mgr/diskprediction_local/models/svm_230.pkl deleted file mode 100644 index 36f8205cead..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_230.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_234.pkl b/src/pybind/mgr/diskprediction_local/models/svm_234.pkl deleted file mode 100644 index 199f9ba5110..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_234.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_235.pkl b/src/pybind/mgr/diskprediction_local/models/svm_235.pkl deleted file mode 100644 index d986526eca2..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_235.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_236.pkl b/src/pybind/mgr/diskprediction_local/models/svm_236.pkl deleted file mode 100644 index 160e22fae38..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_236.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_239.pkl b/src/pybind/mgr/diskprediction_local/models/svm_239.pkl deleted file mode 100644 index 8d98572acce..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_239.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_243.pkl b/src/pybind/mgr/diskprediction_local/models/svm_243.pkl deleted file mode 100644 index 4fca95e1da0..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_243.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_27.pkl b/src/pybind/mgr/diskprediction_local/models/svm_27.pkl deleted file mode 100644 index 011974ed1b9..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_27.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_3.pkl b/src/pybind/mgr/diskprediction_local/models/svm_3.pkl deleted file mode 100644 index e5e97a8888b..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_3.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_33.pkl b/src/pybind/mgr/diskprediction_local/models/svm_33.pkl deleted file mode 100644 index e709d7b46e5..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_33.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_36.pkl b/src/pybind/mgr/diskprediction_local/models/svm_36.pkl deleted file mode 100644 index 3d87b8bd904..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_36.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_44.pkl b/src/pybind/mgr/diskprediction_local/models/svm_44.pkl deleted file mode 100644 index 9abcece9239..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_44.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_50.pkl b/src/pybind/mgr/diskprediction_local/models/svm_50.pkl deleted file mode 100644 index b7ce5eda94a..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_50.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_57.pkl b/src/pybind/mgr/diskprediction_local/models/svm_57.pkl deleted file mode 100644 index fe7832894bb..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_57.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_59.pkl b/src/pybind/mgr/diskprediction_local/models/svm_59.pkl deleted file mode 100644 index 76217777be8..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_59.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_6.pkl b/src/pybind/mgr/diskprediction_local/models/svm_6.pkl deleted file mode 100644 index 4fb09d37464..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_6.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_61.pkl b/src/pybind/mgr/diskprediction_local/models/svm_61.pkl deleted file mode 100644 index 319fc5f457b..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_61.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_62.pkl b/src/pybind/mgr/diskprediction_local/models/svm_62.pkl deleted file mode 100644 index 25b21aed63b..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_62.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_67.pkl b/src/pybind/mgr/diskprediction_local/models/svm_67.pkl deleted file mode 100644 index 1e6e7383a61..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_67.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_69.pkl b/src/pybind/mgr/diskprediction_local/models/svm_69.pkl deleted file mode 100644 index 22d349a7c5e..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_69.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_71.pkl b/src/pybind/mgr/diskprediction_local/models/svm_71.pkl deleted file mode 100644 index e0760add925..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_71.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_72.pkl b/src/pybind/mgr/diskprediction_local/models/svm_72.pkl deleted file mode 100644 index 5096aa8e466..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_72.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_78.pkl b/src/pybind/mgr/diskprediction_local/models/svm_78.pkl deleted file mode 100644 index 7958f3b6c25..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_78.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_79.pkl b/src/pybind/mgr/diskprediction_local/models/svm_79.pkl deleted file mode 100644 index 2ed3a0fe911..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_79.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_82.pkl b/src/pybind/mgr/diskprediction_local/models/svm_82.pkl deleted file mode 100644 index 2e1884094b7..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_82.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_85.pkl b/src/pybind/mgr/diskprediction_local/models/svm_85.pkl deleted file mode 100644 index 88161af56fa..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_85.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_88.pkl b/src/pybind/mgr/diskprediction_local/models/svm_88.pkl deleted file mode 100644 index 715633982ce..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_88.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_93.pkl b/src/pybind/mgr/diskprediction_local/models/svm_93.pkl deleted file mode 100644 index 703429fe3c8..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_93.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/models/svm_97.pkl b/src/pybind/mgr/diskprediction_local/models/svm_97.pkl deleted file mode 100644 index 9653d20f397..00000000000 Binary files a/src/pybind/mgr/diskprediction_local/models/svm_97.pkl and /dev/null differ diff --git a/src/pybind/mgr/diskprediction_local/predictor.py b/src/pybind/mgr/diskprediction_local/predictor.py index bf9b0d7db16..3ddd9346662 100644 --- a/src/pybind/mgr/diskprediction_local/predictor.py +++ b/src/pybind/mgr/diskprediction_local/predictor.py @@ -1,12 +1,13 @@ -"""Sample code for disk failure prediction. +"""Machine learning model for disk failure prediction. -This sample code is a community version for anyone who is interested in Machine -Learning and care about disk failure. +This class provides serves the disk failure prediction module. It uses the +models developed at the AICoE in the Office of the CTO at Red Hat. -This class provides a disk failure prediction module. Given models dirpath to -initialize a predictor instance and then use 6 days data to predict. Predict -function will return a string to indicate disk failure status: "Good", -"Warning", "Bad", or "Unknown". +An instance of the predictor is initialized by providing the path to trained +models. Then, to predict hard drive health and deduce time to failure, the +predict function is called with 6 days worth of SMART data from the hard drive. +It will return a string to indicate disk failure status: "Good", "Warning", +"Bad", or "Unknown". An example code is as follows: @@ -15,17 +16,15 @@ An example code is as follows: >>> if status: >>> model.predict(disk_days) 'Bad' - - -Provided by ProphetStor Data Services Inc. -http://www.prophetstor.com/ - """ - -from __future__ import print_function import os import json -import pickle +import joblib +import logging + +import numpy as np +import pandas as pd +from scipy import stats def get_diskfailurepredictor_path(): @@ -39,30 +38,33 @@ class DiskFailurePredictor(object): This class implements a disk failure prediction module. """ - + # json with manufacturer names as keys + # and features used for prediction as values CONFIG_FILE = "config.json" - EXCLUDED_ATTRS = ['smart_9_raw', 'smart_241_raw', 'smart_242_raw'] + PREDICTION_CLASSES = {-1: "Unknown", + 0: "Good", + 1: "Warning", + 2: "Bad"} + def __init__(self): """ This function may throw exception due to wrong file operation. """ - self.model_dirpath = "" self.model_context = {} + def initialize(self, model_dirpath): - """ - Initialize all models. + """Initialize all models. Save paths of all trained model files to list - Args: None + Arguments: + model_dirpath {str} -- path to directory of trained models Returns: - Error message. If all goes well, return an empty string. - - Raises: + str -- Error message. If all goes well, return None """ - + # read config file as json, if it exists config_path = os.path.join(model_dirpath, self.CONFIG_FILE) if not os.path.isfile(config_path): return "Missing config file: " + config_path @@ -70,196 +72,124 @@ class DiskFailurePredictor(object): with open(config_path) as f_conf: self.model_context = json.load(f_conf) - for model_name in self.model_context: - model_path = os.path.join(model_dirpath, model_name) - + # ensure all manufacturers whose context is defined in config file + # have models and preprocessors saved inside model_dirpath + for manufacturer in self.model_context: + preprocessor_path = os.path.join(model_dirpath, manufacturer + '_preprocessor.joblib') + if not os.path.isfile(preprocessor_path): + return "Missing preprocessor file: {}".format(preprocessor_path) + model_path = os.path.join(model_dirpath, manufacturer + '_predictor.joblib') if not os.path.isfile(model_path): - return "Missing model file: " + model_path + return "Missing model file: {}".format(model_path) self.model_dirpath = model_dirpath - def __preprocess(self, disk_days): - """ - Preprocess disk attributes. - - Args: - disk_days: Refer to function predict(...). - - Returns: - new_disk_days: Processed disk days. - """ - - req_attrs = [] - new_disk_days = [] - - attr_list = set.intersection(*[set(disk_day.keys()) - for disk_day in disk_days]) - for attr in attr_list: - if (attr.startswith('smart_') and attr.endswith('_raw')) and \ - attr not in self.EXCLUDED_ATTRS: - req_attrs.append(attr) - - for disk_day in disk_days: - new_disk_day = {} - for attr in req_attrs: - if float(disk_day[attr]) >= 0.0: - new_disk_day[attr] = disk_day[attr] - new_disk_days.append(new_disk_day) + def __format_raw_data(self, disk_days): + """Massages the input raw data into a form that can be used by the + predictor for preprocessing, feeding to model, etc. Specifically, + converts list of dictionaries to a pandas.DataFrame. - return new_disk_days - - @staticmethod - def __get_diff_attrs(disk_days): - """ - Get 5 days differential attributes. - - Args: - disk_days: Refer to function predict(...). + Arguments: + disk_days {list} -- list of n dictionaries representing SMART data + from the past n days. Value of n depends on the + Module defined in module.py Returns: - attr_list: All S.M.A.R.T. attributes used in given disk. Here we - use intersection set of all disk days. + pandas.DataFrame -- df where each row holds SMART attributes and + possibly other data for the drive from one day. + """ + # list of dictionaries to dataframe + df = pd.DataFrame(disk_days) - diff_disk_days: A list struct comprises 5 dictionaries, each - dictionary contains differential attributes. + # change from dict type {'bytes': 123} to just float64 type 123 + df['user_capacity'] = df['user_capacity'].apply(lambda x: x['bytes']) - Raises: - Exceptions of wrong list/dict operations. - """ + # change from dict type {'table': [{}, {}, {}]} to list type [{}, {}, {}] + df['ata_smart_attributes'] = df['ata_smart_attributes'].apply(lambda x: x['table']) - all_attrs = [set(disk_day.keys()) for disk_day in disk_days] - attr_list = list(set.intersection(*all_attrs)) - attr_list = disk_days[0].keys() - prev_days = disk_days[:-1] - curr_days = disk_days[1:] - diff_disk_days = [] + # make a separate column for raw and normalized values of each smart id + for day_idx in range(len(disk_days)): + for attr_dict in df.iloc[0]['ata_smart_attributes']: + smart_id = attr_dict['id'] + df.at[day_idx, 'smart_{}_raw'.format(smart_id)] = int(attr_dict['raw']['value']) + df.at[day_idx, 'smart_{}_normalized'.format(smart_id)] = int(attr_dict['value']) - for prev, cur in zip(prev_days, curr_days): - diff_disk_days.append({attr:(int(cur[attr]) - int(prev[attr])) - for attr in attr_list}) + # drop the now-redundant column + df = df.drop('ata_smart_attributes', axis=1) + return df - return attr_list, diff_disk_days - def __get_best_models(self, attr_list): - """ - Find the best model from model list according to given attribute list. + def __preprocess(self, disk_days_df): + """Scales and transforms input dataframe to feed it to prediction model - Args: - attr_list: All S.M.A.R.T. attributes used in given disk. + Arguments: + disk_days_df {pandas.DataFrame} -- df where each row holds drive + features from one day. Returns: - modelpath: The best model for the given attribute list. - model_attrlist: 'Ordered' attribute list of the returned model. - Must be aware that SMART attributes is in order. - - Raises: + numpy.ndarray -- (n, d) shaped array of n days worth of data and d + features, scaled """ - - models = self.model_context.keys() - - scores = [] - for model_name in models: - scores.append(sum(attr in attr_list - for attr in self.model_context[model_name])) - max_score = max(scores) - - # Skip if too few matched attributes. - if max_score < 3: - print("Too few matched attributes") + # preprocessing may vary across manufactueres. so get manufacturer + manufacturer = DiskFailurePredictor.__get_manufacturer(disk_days_df['model_name'].iloc[0]).lower() + + # keep only the features used for prediction for current manufacturer + try: + disk_days_df = disk_days_df[self.model_context[manufacturer]] + except KeyError as e: + # TODO: change to log.error + print("Either SMART attributes mismatch for hard drive and prediction model,\ + or 'model_name' not available in input data") + print(e) return None - best_models = {} - best_model_indices = [idx for idx, score in enumerate(scores) - if score > max_score - 2] - for model_idx in best_model_indices: - model_name = list(models)[model_idx] - model_path = os.path.join(self.model_dirpath, model_name) - model_attrlist = self.model_context[model_name] - best_models[model_path] = model_attrlist + # scale raw data + preprocessor_path = os.path.join(self.model_dirpath, manufacturer + '_preprocessor.joblib') + preprocessor = joblib.load(preprocessor_path) + disk_days_df = preprocessor.transform(disk_days_df) + return disk_days_df - return best_models - # return os.path.join(self.model_dirpath, model_name), model_attrlist @staticmethod - def __get_ordered_attrs(disk_days, model_attrlist): - """ - Return ordered attributes of given disk days. + def __get_manufacturer(model_name): + """Returns the manufacturer name for a given hard drive model name - Args: - disk_days: Unordered disk days. - model_attrlist: Model's ordered attribute list. + Arguments: + model_name {str} -- hard drive model name Returns: - ordered_attrs: Ordered disk days. - - Raises: None + str -- manufacturer name """ + if model_name.startswith("W"): + return "WDC" + elif model_name.startswith("T"): + return "Toshiba" + elif model_name.startswith("S"): + return "Seagate" + elif model_name.startswith("Hi"): + return "Hitachi" + else: + return "HGST" - ordered_attrs = [] - - for one_day in disk_days: - one_day_attrs = [] - - for attr in model_attrlist: - if attr in one_day: - one_day_attrs.append(one_day[attr]) - else: - one_day_attrs.append(0) - - ordered_attrs.append(one_day_attrs) - - return ordered_attrs def predict(self, disk_days): - """ - Predict using given 6-days disk S.M.A.R.T. attributes. - - Args: - disk_days: A list struct comprises 6 dictionaries. These - dictionaries store 'consecutive' days of disk SMART - attributes. - Returns: - A string indicates prediction result. One of following four strings - will be returned according to disk failure status: - (1) Good : Disk is health - (2) Warning : Disk has some symptoms but may not fail immediately - (3) Bad : Disk is in danger and data backup is highly recommended - (4) Unknown : Not enough data for prediction. - - Raises: - Pickle exceptions - """ - - all_pred = [] - - proc_disk_days = self.__preprocess(disk_days) - attr_list, diff_data = DiskFailurePredictor.__get_diff_attrs(proc_disk_days) - modellist = self.__get_best_models(attr_list) - if modellist is None: - return "Unknown" - - for modelpath in modellist: - model_attrlist = modellist[modelpath] - ordered_data = DiskFailurePredictor.__get_ordered_attrs( - diff_data, model_attrlist) - - try: - with open(modelpath, 'rb') as f_model: - clf = pickle.load(f_model) + # massage data into a format that can be fed to models + raw_df = self.__format_raw_data(disk_days) - except UnicodeDecodeError: - # Compatibility for python3 - with open(modelpath, 'rb') as f_model: - clf = pickle.load(f_model, encoding='latin1') + # preprocess + preprocessed_data = self.__preprocess(raw_df) + if preprocessed_data is None: + return DiskFailurePredictor.PREDICTION_CLASSES[-1] - pred = clf.predict(ordered_data) + # get model for current manufacturer + manufacturer = self.__get_manufacturer(raw_df['model_name'].iloc[0]).lower() + model_path = os.path.join(self.model_dirpath, manufacturer + '_predictor.joblib') + model = joblib.load(model_path) - all_pred.append(1 if any(pred) else 0) + # predictions for each day + preds = model.predict(preprocessed_data) - score = 2 ** sum(all_pred) - len(modellist) - if score > 10: - return "Bad" - if score > 4: - return "Warning" - return "Good" + # use majority vote to decide class. raise if a nan prediction exists + pred_class_id = stats.mode(preds, nan_policy='raise').mode[0] + return DiskFailurePredictor.PREDICTION_CLASSES[pred_class_id] diff --git a/src/pybind/mgr/diskprediction_local/requirements.txt b/src/pybind/mgr/diskprediction_local/requirements.txt index 92a5aa3ab0c..8769b42e601 100644 --- a/src/pybind/mgr/diskprediction_local/requirements.txt +++ b/src/pybind/mgr/diskprediction_local/requirements.txt @@ -1,3 +1,6 @@ -numpy==1.15.1 -scikit-learn==0.19.2 -scipy==1.1.0 \ No newline at end of file +numpy==1.16.4 +scipy==1.2.1 +pandas==0.25.0 +joblib==0.13.2 +scikit-learn==0.21.2 +rgf-python==3.6.0