@@ -493,15 +493,21 @@ def test_classifier_custom_objective(output, task, cluster):
493
493
with Client (cluster ) as client :
494
494
X , y , w , _ , dX , dy , dw , _ = _create_data (
495
495
objective = task ,
496
- output = output
496
+ output = output ,
497
497
)
498
498
499
+ # + + + + + + + + + +
500
+ # + + + + + + + + + +
501
+ # + + + + + + + + + +
502
+ # + + + + + + + + + +
503
+ # + _ _ _ _ _ _ _ _ _
499
504
params = {
500
505
"n_estimators" : 50 ,
501
506
"num_leaves" : 31 ,
502
- "min_data" : 1 ,
503
507
"verbose" : - 1 ,
504
- "learning_rate" : 0.01 ,
508
+ "seed" : 708 ,
509
+ "deterministic" : True ,
510
+ "force_col_wise" : True
505
511
}
506
512
507
513
if task == 'binary-classification' :
@@ -522,25 +528,26 @@ def test_classifier_custom_objective(output, task, cluster):
522
528
)
523
529
dask_classifier = dask_classifier .fit (dX , dy , sample_weight = dw )
524
530
dask_classifier_local = dask_classifier .to_local ()
525
- p1_proba = dask_classifier .predict_proba (dX ).compute ()
526
- p1_proba_local = dask_classifier_local .predict_proba ( X )
531
+ p1_raw = dask_classifier .predict (dX , raw_score = True ).compute ()
532
+ p1_raw_local = dask_classifier_local .predict ( X , raw_score = True )
527
533
528
534
# with a custom objective, prediction result is a raw score instead of predicted class
529
- p1_class = (1.0 / (1.0 + np .exp (- p1_proba ))) > 0.5
530
- p1_class = p1_class .astype (np .int64 )
531
- p1_class_local = (1.0 / (1.0 + np .exp (- p1_proba_local ))) > 0.5
532
- p1_class_local = p1_class_local .astype (np .int64 )
535
+ p1_proba = 1.0 / (1.0 + np .exp (- p1_raw ))
536
+ p1_proba_local = 1.0 / (1.0 + np .exp (- p1_raw_local ))
533
537
534
538
local_classifier = lgb .LGBMClassifier (** params )
535
539
local_classifier .fit (X , y , sample_weight = w )
536
- p2_proba = local_classifier .predict_proba (X )
537
- p2_class = (1.0 / (1.0 + np .exp (- p1_proba ))) > 0.5
538
- p2_class = p2_class .astype (np .int64 )
540
+ p2_raw = local_classifier .predict (X , raw_score = True )
541
+ p2_proba = 1.0 / (1.0 + np .exp (- p2_raw ))
539
542
540
- if task == 'multiclass-classification' :
541
- p1_class = p1_class .argmax (axis = 1 )
542
- p1_class_local = p1_class_local .argmax (axis = 1 )
543
- p2_class = p2_class .argmax (axis = 1 )
543
+ if task == 'binary-classification' :
544
+ p1_class = (p1_proba > 0.5 ).astype (np .int64 )
545
+ p1_class_local = (p1_proba_local > 0.5 ).astype (np .int64 )
546
+ p2_class = (p2_proba > 0.5 ).astype (np .int64 )
547
+ elif task == 'multiclass-classification' :
548
+ p1_class = p1_proba .argmax (axis = 1 )
549
+ p1_class_local = p1_proba_local .argmax (axis = 1 )
550
+ p2_class = p2_proba .argmax (axis = 1 )
544
551
545
552
# function should have been preserved
546
553
assert callable (dask_classifier .objective_ )
@@ -552,7 +559,13 @@ def test_classifier_custom_objective(output, task, cluster):
552
559
assert_eq (p2_class , y )
553
560
554
561
# probability estimates should be similar
555
- assert_eq (p1_proba , p2_proba , atol = 0.03 )
562
+ assert_eq (p1_proba , p2_proba , atol = 0.04 )
563
+ # try:
564
+ # assert_eq(p1_proba, p2_proba, atol=0.04)
565
+ # except Exception as err:
566
+ # max_diff = np.max(np.abs(p1_proba - p2_proba))
567
+ # num_samples = np.sum(np.abs(p1_proba - p2_proba) > 0.04)
568
+ # raise RuntimeError(f"max diff: {max_diff} | n: {num_samples}")
556
569
557
570
558
571
def test_group_workers_by_host ():
0 commit comments