参考:https://github.com/webankfintech/fate
https://www.fedai.org/#/
FATE $ sh build_standalone_docker.sh FATE $ CONTAINER_ID=`docker run -t -d fate/standalone` FATE $ docker exec -t -i ${CONTAINER_ID} bash
### 逐层遍历所有文件夹,运行其中test文件夹下以_test.py为结尾的python测试文件
cd $(dirname "$0") cur_dir=$(pwd) # Print Working Directory 查看当前目录的绝对路径 run_test() { file=$1 echo "start to run test "$file python $file } traverse_folder() { for file in $(ls ${1}); do file_fullname=$1/$file # -d filename 如果 filename为目录,则为真 if [ -d $file_fullname ]; then traverse_folder $file_fullname # =~ 表示正则匹配 $ 行尾定位符 elif [[ $file =~ _test.py$ ]] && [[ $1 =~ /test$ ]]; then run_test $file_fullname fi done }
sh ./federatedml/test/run_test.sh > test_log.txt:
start to run test /fate/federatedml/test/../evaluation/test/evaluation_test.py {'auc': 0.0, 'ks': 0.0, 'lift': [(0.5, 0.0)], 'precision': [(0.5, 0.0)], 'recall': [(0.5, 0.0)], 'accuracy': [(0.5, 0.0)], 'explained_variance': -0.6539, 'mean_absolute_error': 0.638, 'mean_squared_error': 0.4135, 'mean_squared_log_error': 0.1988, 'median_absolute_error': 0.635, 'r2_score': -0.6539, 'root_mean_squared_error': 0.643} start to run test /fate/federatedml/test/../feature/test/feature_select_test.py param_set { key: "eps" value: 9.999999747378752e-06 } original_cols: 0 original_cols: 1 left_cols: 0 left_cols: 1 filter_name: "unique_value" start to run test /fate/federatedml/test/../feature/test/imputer_test.py start to run test /fate/federatedml/test/../feature/test/instance_test.py start to run test /fate/federatedml/test/../feature/test/min_max_scaler_test.py start to run test /fate/federatedml/test/../feature/test/quantile_binning_test.py -0.928683554705954 -0.8283685741417925 -0.8111241409211205 min_rank: 190, found_rank: 214, max_rank: 210 Spend time: 2.453406572341919 collect and use numpy time: 4.527343034744263 start to run test /fate/federatedml/test/../feature/test/quantile_summaries_test.py min_rank: 89800, found_rank: 89995, max_rank: 90200 min_rank: 89800, found_rank: 89948, max_rank: 90200 min_rank: 89800, found_rank: 89995, max_rank: 90200 min_rank: 89800, found_rank: 89979, max_rank: 90200 min_rank: 89800, found_rank: 89977, max_rank: 90200 min_rank: 89800, found_rank: 90041, max_rank: 90200 start to run test /fate/federatedml/test/../feature/test/quantile_test.py start to run test /fate/federatedml/test/../feature/test/sampler_test.py start to run test /fate/federatedml/test/../feature/test/sparse_vector_test.py start to run test /fate/federatedml/test/../feature/test/standard_scaler_test.py start to run test /fate/federatedml/test/../ftl/test/common_data_util_test.py start to run test /fate/federatedml/test/../ftl/test/eggroll_XY_test.py --- test_distributed_calculate_XY_1 --- [[ 2. 4. 6.] [ 4. 5. 6.] [-7. -8. -9.] [10. 11. 12.]] (4, 3) --- test_distributed_calculate_XY_2 --- [[[0.43551517 0.70466051 0.30466157] [0.26808046 0.76825121 0.06513553] [0.51540669 0.2421427 0.48011399]] [[0.42700336 0.33585337 0.49496926] [0.25135199 0.38382964 0.12959278] [0.35088255 0.45791475 0.25325645]] [[0.13418796 0.20833246 0.31760359] [0.2749853 0.21743268 0.48146084] [0.20890022 0.14694317 0.49790149]] [[0.00388095 0.00665658 0.00157656] [0.00567649 0.00432882 0.00119392] [0.0014735 0.00646106 0.00375111]]] (4, 3, 3) --- test_distributed_calculate_avg_XY_1 --- --- test_distributed_calculate_avg_XY_2 --- --- test_distributed_calculate_sum_XY --- --- test_distributed_compute_XY_plus_Z --- start to run test /fate/federatedml/test/../ftl/test/eggroll_X_plus_Y_test.py start to run test /fate/federatedml/test/../ftl/test/eggroll_encryption_matmul_test.py start to run test /fate/federatedml/test/../ftl/test/eggroll_encryption_runtime_test.py start to run test /fate/federatedml/test/../ftl/test/eggroll_encryption_test.py start to run test /fate/federatedml/test/../ftl/test/eggroll_storage_test.py {'learning_rate': 0.01, 'input_dim': 100, 'hidden_dim': 64} start to run test /fate/federatedml/test/../ftl/test/host_guest_factory_test.py start to run test /fate/federatedml/test/../ftl/test/params_validation_test.py start to run test /fate/federatedml/test/../ftl/test/random_mask_test.py ----test_mask_2_dim---- ----test_mask_3_dim_1---- ----test_encrypt_3_dim_2---- ----test_mask_float---- original matrix 31.444 cleared_matrix 31.444 ----test_mask_integer---- original matrix 31 cleared_matrix 31.0 ----test_mask_scalar---- start to run test /fate/federatedml/test/../ftl/test/whitebox_autoencoder_test.py 0 / 0 cost: 1.0359334789708443 0 / 0 cost: 0.9028475258265454 0 / 0 cost: 0.7708735124447182 0 / 0 cost: 0.6399453667039348 0 / 0 cost: 0.5099926274994172 0 / 0 cost: 0.38094404886348604 0 / 0 cost: 0.25272588523352807 0 / 0 cost: 0.1252602715537511 0 / 0 cost: -0.0015352838442681354 0 / 0 cost: -0.1277506050008835 start to run test /fate/federatedml/test/../ftl/test/whitebox_enc_gradients_test.py encrypt_grads_ex shape (4, 1, 5) encrypt_grads_W shape (4, 5) encrypt_grads_b shape (4,) encrypt_grads_ex shape (2, 1, 5) encrypt_grads_W shape (2, 5) encrypt_grads_b shape (2,) encrypt_grads_ex shape (4, 1, 5) encrypt_grads_W shape (4, 5) encrypt_grads_b shape (4,) encrypt_grads_ex shape (2, 1, 5) encrypt_grads_W shape (2, 5) encrypt_grads_b shape (2,) encrypt_grads_ex shape (4, 1, 5) encrypt_grads_W shape (4, 5) encrypt_grads_b shape (4,) encrypt_grads_ex shape (2, 1, 5) encrypt_grads_W shape (2, 5) encrypt_grads_b shape (2,) encrypt_grads_ex shape (4, 1, 5) encrypt_grads_W shape (4, 5) encrypt_grads_b shape (4,) encrypt_grads_ex shape (2, 1, 5) encrypt_grads_W shape (2, 5) encrypt_grads_b shape (2,) start to run test /fate/federatedml/test/../ftl/test/whitebox_faster_enc_gradients_test.py encrypt_grads_ex shape (4, 1, 5) encrypt_grads_W shape (4, 5) encrypt_grads_b shape (4,) encrypt_grads_ex shape (2, 1, 5) encrypt_grads_W shape (2, 5) encrypt_grads_b shape (2,) encrypt_grads_ex shape (4, 1, 5) encrypt_grads_W shape (4, 5) encrypt_grads_b shape (4,) encrypt_grads_ex shape (2, 1, 5) encrypt_grads_W shape (2, 5) encrypt_grads_b shape (2,) encrypt_grads_ex shape (4, 1, 5) encrypt_grads_W shape (4, 5) encrypt_grads_b shape (4,) encrypt_grads_ex shape (2, 1, 5) encrypt_grads_W shape (2, 5) encrypt_grads_b shape (2,) encrypt_grads_ex shape (4, 1, 5) encrypt_grads_W shape (4, 5) encrypt_grads_b shape (4,) encrypt_grads_ex shape (2, 1, 5) encrypt_grads_W shape (2, 5) encrypt_grads_b shape (2,) start to run test /fate/federatedml/test/../ftl/test/whitebox_plain_gradients_test.py start to run test /fate/federatedml/test/../logistic_regression/hetero_dnn_logistic_regression/test/local_model_proxy_test.py ----test_DNNLR_transform---- 0 [4 5 6 7 8] 1 [9 3 6 7 8] 2 [ 7 8 9 10 11] 3 [1 2 3 4 5] index_tracking_list [0, 1, 2, 3] actual_features [[114 169 196 225] [118 179 212 247] [171 253 298 345] [ 57 85 94 105]] expected_trans_features [[114 169 196 225] [118 179 212 247] [171 253 298 345] [ 57 85 94 105]] ----test_DNNLR_update_local_model---- 0 [4 5 6 7 8] 1 [9 3 6 7 8] 2 [ 7 8 9 10 11] 3 [1 2 3 4 5] X: [[ 4 5 6 7 8] [ 9 3 6 7 8] [ 7 8 9 10 11] [ 1 2 3 4 5]] (4, 5) in_grad: [[ 24. 32. 40.] [ 72. 96. 120.] [ 36. 48. 60.] [ 12. 16. 20.]] (4, 3) expected_instances: [[ 4 5 6 7 8] [ 9 3 6 7 8] [ 7 8 9 10 11] [ 1 2 3 4 5]] actual_instances: [[ 4 5 6 7 8] [ 9 3 6 7 8] [ 7 8 9 10 11] [ 1 2 3 4 5]] expected_back_grad [[ 24 32 40] [ 72 96 120] [ 36 48 60] [ 12 16 20]] actual_back_grad [[ 24. 32. 40.] [ 72. 96. 120.] [ 36. 48. 60.] [ 12. 16. 20.]] start to run test /fate/federatedml/test/../logistic_regression/test/homo_lr_test.py before training, coef: [0.13968188 0.88744885 0.80187938 0.43285573 0.80779013], intercept: 0.831288889651533 [100, 5.723491806287644, 5.671945837532906, 5.621491950842434, 5.572123354216551, 5.523832548805044, 5.476611347744709, 5.43045089676398, 5.385341696437183, 5.341273625963129, 5.2982359683360585] before training, coef: [0.86678672 0.84918847 0.52887216 0.43555516 0.91879362], intercept: 0.8588231086310137 After training, coef: [0.86346528 0.84547825 0.52417372 0.43242595 0.91320305], intercept: 0.8380594240649859, loss: 5.835974210822435 start to run test /fate/federatedml/test/../logistic_regression/test/logistic_regression_test.py start to run test /fate/federatedml/test/../loss/test/cross_entropy_test.py start to run test /fate/federatedml/test/../loss/test/regression_loss_test.py start to run test /fate/federatedml/test/../model_selection/test/KFold_test.py type: IN_MEMORY, namespace: 123, name: 3f80a06c-8b85-11e9-8fee-0242ac110002, partitions: 3 1000 expect_train_data_num: 900.0, expect_test_data_num: 100.0 train_num: 900, test_num: 100 train_num: 900, test_num: 100 train_num: 900, test_num: 100 train_num: 900, test_num: 100 train_num: 900, test_num: 100 train_num: 900, test_num: 100 train_num: 900, test_num: 100 train_num: 900, test_num: 100 train_num: 900, test_num: 100 train_num: 900, test_num: 100 start to run test /fate/federatedml/test/../model_selection/test/mini_batch_test.py start to run test /fate/federatedml/test/../optim/federated_aggregator/test/hetero_federated_aggregator_test.py start to run test /fate/federatedml/test/../optim/gradient/test/gradient_method_test.py compute time: 13.412506341934204 start to run test /fate/federatedml/test/../optim/gradient/test/hetero_lr_gradient_test.py start to run test /fate/federatedml/test/../optim/gradient/test/homo_lr_gradient_test.py start to run test /fate/federatedml/test/../optim/test/convergence_test.py start to run test /fate/federatedml/test/../optim/test/initialize_test.py start to run test /fate/federatedml/test/../optim/test/updater_test.py start to run test /fate/federatedml/test/../secureprotol/test/encode_test.py start to run test /fate/federatedml/test/../secureprotol/test/fate_paillier_test.py start to run test /fate/federatedml/test/../statistic/test/statics_test.py start to run test /fate/federatedml/test/../tree/test/criterion_test.py start to run test /fate/federatedml/test/../tree/test/feature_histogram_test.py start to run test /fate/federatedml/test/../tree/test/node_test.py start to run test /fate/federatedml/test/../util/test/classify_label_checker_test.py start to run test /fate/federatedml/test/../util/test/data_io_test.py start to run test /fate/federatedml/test/../util/test/param_extract_test.py start to run test /fate/federatedml/test/../util/test/parameter_checker_test.py
./federatedml/evaluation/test/evaluation.py:
from federatedml.evaluation import Evaluation import numpy as np import unittest class TestClassificationEvaluaction(unittest.TestCase): def assertFloatEqual(self, op1, op2): diff = np.abs(op1 - op2) self.assertLess(diff, 1e-6) def test_auc(self): y_true = np.array([0, 0, 1, 1]) y_predict = np.array([0.1, 0.4, 0.35, 0.8]) ground_true_auc = 0.75 eva = Evaluation("binary") auc = eva.auc(y_true, y_predict) auc = round(auc, 2) self.assertFloatEqual(auc, ground_true_auc) def test_ks(self): y_true = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0]) y_predict = np.array( [0.42, 0.73, 0.55, 0.37, 0.57, 0.70, 0.25, 0.23, 0.46, 0.62, 0.76, 0.46, 0.55, 0.56, 0.56, 0.38, 0.37, 0.73, 0.77, 0.21, 0.39]) ground_true_ks = 0.75 eva = Evaluation("binary") ks = eva.ks(y_true, y_predict) ks = round(ks, 2) self.assertFloatEqual(ks, ground_true_ks) def test_lift(self): y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array([0.57, 0.70, 0.25, 0.30, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) dict_score = {"0": {0: 0, 1: 1}, "0.4": {0: 2, 1: 1.43}, "0.6": {0: 1.43, 1: 2}} eva = Evaluation("binary") split_thresholds = [0, 0.4, 0.6] lifts = eva.lift(y_true, y_predict, thresholds=split_thresholds) fix_lifts = [] for lift in lifts: fix_lift = [round(pos, 2) for pos in lift] fix_lifts.append(fix_lift) for i in range(len(split_thresholds)): score_0 = dict_score[str(split_thresholds[i])][0] score_1 = dict_score[str(split_thresholds[i])][1] pos_lift = fix_lifts[i] self.assertEqual(len(pos_lift), 2) self.assertFloatEqual(score_0, pos_lift[0]) self.assertFloatEqual(score_1, pos_lift[1]) def test_precision(self): y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array([0.57, 0.70, 0.25, 0.30, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) dict_score = {"0.4": {0: 1, 1: 0.71}, "0.6": {0: 0.71, 1: 1}} eva = Evaluation("binary") split_thresholds = [0.4, 0.6] prec_values = eva.precision(y_true, y_predict, thresholds=split_thresholds) fix_prec_values = [] for prec_value in prec_values: fix_prec_value = [round(pos, 2) for pos in prec_value] fix_prec_values.append(fix_prec_value) for i in range(len(split_thresholds)): score_0 = dict_score[str(split_thresholds[i])][0] score_1 = dict_score[str(split_thresholds[i])][1] pos_prec_value = fix_prec_values[i] self.assertEqual(len(pos_prec_value), 2) self.assertFloatEqual(score_0, pos_prec_value[0]) self.assertFloatEqual(score_1, pos_prec_value[1]) def test_recall(self): y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) dict_score = {"0.3": {0: 0.2, 1: 1}, "0.4": {0: 0.6, 1: 1}} eva = Evaluation("binary") split_thresholds = [0.3, 0.4] recalls = eva.recall(y_true, y_predict, thresholds=split_thresholds) round_recalls = [] for recall in recalls: round_recall = [round(pos, 2) for pos in recall] round_recalls.append(round_recall) for i in range(len(split_thresholds)): score_0 = dict_score[str(split_thresholds[i])][0] score_1 = dict_score[str(split_thresholds[i])][1] pos_recall = round_recalls[i] self.assertEqual(len(pos_recall), 2) self.assertFloatEqual(score_0, pos_recall[0]) self.assertFloatEqual(score_1, pos_recall[1]) def test_bin_accuracy(self): y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) gt_score = {"0.3": 0.6, "0.5": 1.0, "0.7": 0.7} split_thresholds = [0.3, 0.5, 0.7] eva = Evaluation("binary") acc = eva.accuracy(y_true, y_predict, thresholds=split_thresholds) for i in range(len(split_thresholds)): score = gt_score[str(split_thresholds[i])] self.assertFloatEqual(score, acc[i]) def test_multi_accuracy(self): y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4]) y_predict = [1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4] gt_score = 0.6 gt_number = 12 eva = Evaluation("multi") acc = eva.accuracy(y_true, y_predict) self.assertFloatEqual(gt_score, acc) acc_number = eva.accuracy(y_true, y_predict, normalize=False) self.assertEqual(acc_number, gt_number) def test_multi_recall(self): y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5]) y_predict = np.array([1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6]) gt_score = {1: 0.4, 3: 0.8, 4: 1.0, 6: 0, 7: -1} eva = Evaluation("multi") result_filter = [1, 3, 4, 6, 7] recall_scores = eva.recall(y_true, y_predict, result_filter=result_filter) for i in range(len(result_filter)): score = gt_score[result_filter[i]] self.assertFloatEqual(score, recall_scores[result_filter[i]]) def test_multi_precision(self): y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5]) y_predict = np.array([1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6]) gt_score = {2: 0.25, 3: 0.8, 5: 0, 6: 0, 7: -1} eva = Evaluation("multi") result_filter = [2, 3, 5, 6, 7] precision_scores = eva.precision(y_true, y_predict, result_filter=result_filter) for i in range(len(result_filter)): score = gt_score[result_filter[i]] self.assertFloatEqual(score, precision_scores[result_filter[i]]) def test_explained_variance(self): eva = Evaluation() y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(np.around(eva.explain_variance(y_true, y_pred), 4), 0.9572) y_true = [[0.5, 1], [-1, 1], [7, -6]] y_pred = [[0, 2], [-1, 2], [8, -5]] self.assertFloatEqual(np.around(eva.explain_variance(y_true, y_pred), 4), 0.9839) def test_mean_absolute_error(self): eva = Evaluation() y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(eva.mean_absolute_error(y_true, y_pred), 0.5) y_true = [[0.5, 1], [-1, 1], [7, -6]] y_pred = [[0, 2], [-1, 2], [8, -5]] self.assertFloatEqual(eva.mean_absolute_error(y_true, y_pred), 0.75) def test_mean_squared_error(self): eva = Evaluation() y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(eva.mean_squared_error(y_true, y_pred), 0.375) y_true = [[0.5, 1], [-1, 1], [7, -6]] y_pred = [[0, 2], [-1, 2], [8, -5]] self.assertFloatEqual(np.around(eva.mean_squared_error(y_true, y_pred), 4), 0.7083) def test_mean_squared_log_error(self): eva = Evaluation() y_true = [3, 5, 2.5, 7] y_pred = [2.5, 5, 4, 8] self.assertFloatEqual(np.around(eva.mean_squared_log_error(y_true, y_pred), 4), 0.0397) y_true = [[0.5, 1], [1, 2], [7, 6]] y_pred = [[0.5, 2], [1, 2.5], [8, 8]] self.assertFloatEqual(np.around(eva.mean_squared_log_error(y_true, y_pred), 4), 0.0442) def test_median_absolute_error(self): eva = Evaluation() y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(eva.median_absolute_error(y_true, y_pred), 0.5) y_true = [3, -0.6, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(eva.median_absolute_error(y_true, y_pred), 0.55) def test_root_mean_squared_error(self): eva = Evaluation() y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] self.assertFloatEqual(np.around(eva.root_mean_squared_error(y_true, y_pred), 4), 0.6124) y_true = [[0.5, 1], [-1, 1], [7, -6]] y_pred = [[0, 2], [-1, 2], [8, -5]] self.assertFloatEqual(np.around(eva.root_mean_squared_error(y_true, y_pred), 4), 0.8416) def test_binary_report(self): eva = Evaluation("binary") y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy", "explained_variance", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics) self.assertFloatEqual(eval_results['auc'], 1.0) self.assertFloatEqual(eval_results['ks'], 1.0) self.assertListEqual(eval_results['lift'], [(0.5, 2.0)]) self.assertListEqual(eval_results['precision'], [(0.5, 1.0)]) self.assertListEqual(eval_results['recall'], [(0.5, 1.0)]) self.assertListEqual(eval_results['accuracy'], [(0.5, 1.0)]) self.assertFloatEqual(eval_results['explained_variance'], 0.4501) self.assertFloatEqual(eval_results['mean_absolute_error'], 0.3620) self.assertFloatEqual(eval_results['mean_squared_error'], 0.1375) self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.0707) self.assertFloatEqual(eval_results['median_absolute_error'], 0.3650) self.assertFloatEqual(eval_results['r2_score'], 0.4501) self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.3708) def test_binary_report_with_pos_label(self): eva = Evaluation("binary") y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy", "explained_variance", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics, pos_label=0) print(eval_results) self.assertFloatEqual(eval_results['auc'], 0.0) self.assertFloatEqual(eval_results['ks'], 0.0) self.assertListEqual(eval_results['lift'], [(0.5, 0.0)]) self.assertListEqual(eval_results['precision'], [(0.5, 0.0)]) self.assertListEqual(eval_results['recall'], [(0.5, 0.0)]) self.assertListEqual(eval_results['accuracy'], [(0.5, 0.0)]) self.assertFloatEqual(eval_results['explained_variance'], -0.6539) self.assertFloatEqual(eval_results['mean_absolute_error'], 0.6380) self.assertFloatEqual(eval_results['mean_squared_error'], 0.4135) self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.1988) self.assertFloatEqual(eval_results['median_absolute_error'], 0.6350) self.assertFloatEqual(eval_results['r2_score'], -0.6539) self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.643) def test_multi_report(self): eva = Evaluation("multi") y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5]) y_predict = np.array([1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy", "explained_variance", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics) self.assertIsNone(eval_results['auc']) self.assertIsNone(eval_results['ks']) self.assertIsNone(eval_results['lift']) self.assertDictEqual(eval_results['precision'], {1: 0.3333, 2: 0.25, 3: 0.8, 4: 1.0, 5: 0.0, 6: 0.0}) self.assertDictEqual(eval_results['recall'], {1: 0.4, 2: 0.2, 3: 0.8, 4: 1.0, 5: 0.0, 6: 0.0}) self.assertFloatEqual(eval_results['accuracy'], 0.48) self.assertFloatEqual(eval_results['explained_variance'], 0.6928) self.assertFloatEqual(eval_results['mean_absolute_error'], 0.5600) self.assertFloatEqual(eval_results['mean_squared_error'], 0.6400) self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.0667) self.assertFloatEqual(eval_results['median_absolute_error'], 1.000) self.assertFloatEqual(eval_results['r2_score'], 0.6800) def test_multi_report_with_absent_value(self): eva = Evaluation("multi") y_true = np.array( [1, 1, 1, 1, 1, None, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, None]) y_predict = np.array([1, 1, 2, 2, 3, 3,2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy", "explained_variance", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics) self.assertIsNone(eval_results['auc']) self.assertIsNone(eval_results['ks']) self.assertIsNone(eval_results['lift']) self.assertDictEqual(eval_results['precision'], {1: 0.3333, 2: 0.25, 3: 0.8, 4: 1.0, 5: 0.0, 6: 0.0}) self.assertDictEqual(eval_results['recall'], {1: 0.4, 2: 0.2, 3: 0.8, 4: 1.0, 5: 0.0, 6: 0.0}) self.assertFloatEqual(eval_results['accuracy'], 0.48) self.assertFloatEqual(eval_results['explained_variance'], 0.6928) self.assertFloatEqual(eval_results['mean_absolute_error'], 0.5600) self.assertFloatEqual(eval_results['mean_squared_error'], 0.6400) self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.0667) self.assertFloatEqual(eval_results['median_absolute_error'], 1.000) self.assertFloatEqual(eval_results['r2_score'], 0.6800) self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.800) self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.800) def test_regression_report(self): eva = Evaluation("regression") y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5]) y_predict = np.array([1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy", "explained_variance", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics) self.assertFloatEqual(eval_results['explained_variance'], 0.6928) self.assertFloatEqual(eval_results['mean_absolute_error'], 0.5600) self.assertFloatEqual(eval_results['mean_squared_error'], 0.6400) self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.0667) self.assertFloatEqual(eval_results['median_absolute_error'], 1.000) self.assertFloatEqual(eval_results['r2_score'], 0.6800) self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.800) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy"] eval_results = eva.report(y_true, y_predict, metrics) self.assertIsNone(eval_results) if __name__ == '__main__': unittest.main()
// 唯一带有print输出的函数,计算各个预测评估指标
def test_binary_report_with_pos_label(self): eva = Evaluation("binary") y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1]) y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy", "explained_variance", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics, pos_label=0) print(eval_results)
./federatedml/feature/test/feature_select_test.py:
import unittest from federatedml.feature.feature_selection import UniqueValueFilter from federatedml.param.param import UniqueValueParam class TestFeatureSelect(unittest.TestCase): def setUp(self): param = UniqueValueParam() self.filter_obj = UniqueValueFilter(param, select_cols=[0, 1]) self.filter_obj.left_cols = [0, 1] def test_protobuf(self): result = self.filter_obj.to_result()
// 唯一的print输出 print(result) if __name__ == '__main__': unittest.main()
三、Example Programs:
examples文件夹下,有12个子文件夹
1、data:测试数据集.
2、hetero_dnn_logistic_regression(多相深度神经网络逻辑回归):
run_logistic_regression_standalone.sh:
cd $(dirname "$0") curtime=$(date +%Y%m%d%H%M%S) work_mode=0 jobid="hetero_logistic_regression_example_standalone_"$curtime guest_partyid=10000 host_partyid=9999 arbiter_partyid=10001 bash run_logistic_regression.sh $work_mode $jobid $guest_partyid $host_partyid $arbiter_partyid
run_logistic_regression.sh:
// Vertical Federated Learning
work_mode=$1 jobid=$2 guest_partyid=$3 host_partyid=$4 arbiter_partyid=$5 if [[ $work_mode -eq 1 ]]; then role=$6 fi cur_dir=$(pwd)
# 数据集文件夹 data_dir=$cur_dir/../data
# 文件数据加载程序 load_file_program=$cur_dir/../load_file/load_file.py
# 配置文件夹 conf_dir=$cur_dir/conf
# 日志记录文件夹 log_dir=$cur_dir/../../logs
# 文件数据加载配置 load_data_conf=$conf_dir/load_file.json
# 客户运行时间配置 guest_runtime_conf=$conf_dir/guest_runtime_conf.json
# 宿主运行时间配置 host_runtime_conf=$conf_dir/host_runtime_conf.json
# 仲裁者运行时间配置 arbiter_runtime_conf=$conf_dir/arbiter_runtime_conf.json
# 数据集类型 data_set=breast #data_set=default_credit #data_set=give_credit
# 训练数据集 train_data_host=$data_dir/${data_set}_a.csv train_data_guest=$data_dir/${data_set}_b.csv
# 测试数据集 predict_data_host=$data_dir/${data_set}_a.csv predict_data_guest=$data_dir/${data_set}_b.csv
# 验证数据集 cv_data_host=$data_dir/${data_set}_a.csv cv_data_guest=$data_dir/${data_set}_b.csv echo "data dir is : "$data_dir mode='train' #mode='predict' #mode='cross_validation' data_table='' log_file='' mkdir -p $log_dir load_file() { input_path=$1 role=$2 load_mode=$3 conf_path=$conf_dir/load_file.json_${role}_${load_mode}_$jobid cp $load_data_conf $conf_path data_table=${data_set}_${role}_${load_mode}_$jobid
// sed -i 直接修改读取的文件内容,而不是输出到终端 sed -i "s|_input_path|${input_path}|g" ${conf_path} sed -i "s/_table_name/${data_table}/g" ${conf_path} sed -i "s/_work_mode/${work_mode}/g" ${conf_path} python $load_file_program -c ${conf_path} } train() { role=$1 train_table=$2 predict_table=$3 runtime_conf='' if [ $role = 'guest' ]; then runtime_conf=$guest_runtime_conf elif [ $role = 'arbiter' ]; then runtime_conf=$arbiter_runtime_conf else runtime_conf=$host_runtime_conf fi cur_runtime_conf=${runtime_conf}_$jobid cp $runtime_conf $cur_runtime_conf echo "current runtime conf is "$cur_runtime_conf echo "training table is "$train_table echo $predict_table sed -i "s/_workflow_method/train/g" $cur_runtime_conf sed -i "s/_train_table_name/$train_table/g" $cur_runtime_conf sed -i "s/_predict_table_name/$predict_table/g" $cur_runtime_conf sed -i "s/_work_mode/$work_mode/g" $cur_runtime_conf sed -i "s/_guest_party_id/$guest_partyid/g" $cur_runtime_conf sed -i "s/_host_party_id/$host_partyid/g" $cur_runtime_conf sed -i "s/_arbiter_party_id/$arbiter_partyid/g" $cur_runtime_conf log_file=${log_dir}/${jobid} echo "Please check log file in "${log_file} if [[ $role == 'guest' ]]; then echo "enter guest" nohup bash run_guest.sh $cur_runtime_conf $jobid & elif [ $role == 'arbiter' ]; then echo "enter arbiter" nohup bash run_arbiter.sh $cur_runtime_conf $jobid & else echo "enter host" nohup bash run_host.sh $cur_runtime_conf $jobid & fi } cross_validation() { role=$1 cv_table=$2 runtime_conf='' if [ $role = 'guest' ]; then runtime_conf=$guest_runtime_conf elif [ $role = 'arbiter' ]; then runtime_conf=$arbiter_runtime_conf else runtime_conf=$host_runtime_conf fi cur_runtime_conf=${runtime_conf}_$jobid cp $runtime_conf $cur_runtime_conf echo "current runtime conf is "$cur_runtime_conf echo "cv talbe is"$cv_table sed -i "s/_workflow_method/cross_validation/g" $cur_runtime_conf sed -i "s/_cross_validation_table_name/$cv_table/g" $cur_runtime_conf sed -i "s/_work_mode/$work_mode/g" $cur_runtime_conf sed -i "s/_guest_party_id/$guest_partyid/g" $cur_runtime_conf sed -i "s/_host_party_id/$host_partyid/g" $cur_runtime_conf sed -i "s/_arbiter_party_id/$arbiter_partyid/g" $cur_runtime_conf log_file=${log_dir}/${jobid} echo "Please check log file in "${log_file} if [ $role == 'guest' ]; then echo "enter guest" nohup bash run_guest.sh $cur_runtime_conf $jobid & elif [ $role == 'arbiter' ]; then echo "enter arbiter" nohup bash run_arbiter.sh $cur_runtime_conf $jobid & else echo "enter host" nohup bash run_host.sh $cur_runtime_conf $jobid & fi } get_log_result() { log_path=$1 keyword=$2 sleep 5s while true do num=$(cat $log_path | grep $keyword | wc -l) if [ $num -ge 1 ]; then cat $log_path | grep $keyword break else echo "please wait or check more info in "$log_path sleep 10s fi done } if [ $mode = 'train' ]; then if [ $work_mode -eq 0 ]; then load_file $train_data_guest guest train train_table_guest=${data_table} echo "train_table guest is:"$train_table_guest load_file $train_data_host host train train_table_host=$data_table echo "train_table host is:"$train_table_host load_file $predict_data_guest guest predict predict_table_guest=${data_table} echo "predict_table guest is:"$predict_table_guest load_file $predict_data_host host predict predict_table_host=$data_table echo "predict_table host is:"$predict_table_host train guest $train_table_guest $predict_table_guest train host $train_table_host $predict_table_host train arbiter "" "" workflow_log=${log_file}/workflow.log get_log_result ${workflow_log} eval_result elif [[ $role == 'guest' ]]; then load_file $train_data_guest guest train train_table_guest=$data_table load_file $predict_data_guest guest predict predict_table_guest=$data_table train guest $train_table_guest $predict_table_guest workflow_log=${log_file}/workflow.log get_log_result ${workflow_log} eval_result elif [[ $role == 'host' ]]; then load_file $train_data_host host train train_table_host=$data_table load_file $predict_data_host host predict predict_table_host=$data_table echo "Predict_table host is:"${predict_table_host} train host $train_table_host $predict_table_host elif [[ $role == 'arbiter' ]]; then train arbiter '' '' fi elif [ $mode = 'cross_validation' ]; then if [[ $work_mode -eq 0 ]]; then load_file $cv_data_guest guest cross_validation cv_table_guest=$data_table load_file $cv_data_host host cross_validation cv_table_host=$data_table echo "cv table guest is:"$cv_table_guest echo "cv table host is:"$cv_table_host cross_validation guest $cv_table_guest cross_validation host $cv_table_host cross_validation arbiter "" workflow_log=${log_file}/workflow.log get_log_result ${workflow_log} mean elif [[ $role == 'guest' ]]; then load_file $cv_data_guest guest cross_validation cv_table_guest=$data_table echo "cv table guest is:"$cv_table_guest cross_validation guest $cv_table_guest workflow_log=${log_file}/workflow.log get_log_result ${workflow_log} mean elif [[ $role == 'host' ]]; then load_file $cv_data_host host cross_validation cv_table_host=$data_table echo "cv table host is:"$cv_table_host cross_validation host $cv_table_host elif [[ $role == 'arbiter' ]]; then echo "arbiter do not need data" cross_validation arbiter "" else echo $role" not support" fi fi
3、。。。(后续再作补充)
所有评论(0)