I’m trying LightGBMRegressor parameter tuning with Sklearn RandomizedSearchCV. I got an error with message below.
error:
JavaScript
x
2
1
LightGBMError: b'Check failed: num_data > 0 at /src/LightGBM/src/io/dataset.cpp, line 27 .n'
2
I cannot tell why and the specific parameters caused this error. Any of params_dist below was not suitable for train_x.shape:(1630, 1565)?
Please tell me any hints or solutions. Thank you.
LightGBM version: ‘2.0.12’
function caused this error:
JavaScript
1
43
43
1
def get_lgbm(train_x, train_y, val_x, val_y):
2
lgbm = lgb.LGBMRegressor(
3
objective='regression',
4
device='gpu',
5
n_jobs=1,
6
)
7
param_dist = {'boosting_type': ['gbdt', 'dart', 'rf'],
8
'num_leaves': sp.stats.randint(2, 1001),
9
'subsample_for_bin': sp.stats.randint(10, 1001),
10
'min_split_gain': sp.stats.uniform(0, 5.0),
11
'min_child_weight': sp.stats.uniform(1e-6, 1e-2),
12
'reg_alpha': sp.stats.uniform(0, 1e-2),
13
'reg_lambda': sp.stats.uniform(0, 1e-2),
14
'tree_learner': ['data', 'feature', 'serial', 'voting' ],
15
'application': ['regression_l1', 'regression_l2', 'regression'],
16
'bagging_freq': sp.stats.randint(1, 11),
17
'bagging_fraction': sp.stats.uniform(1e-3, 0.99),
18
'feature_fraction': sp.stats.uniform(1e-3, 0.99),
19
'learning_rate': sp.stats.uniform(1e-6, 0.99),
20
'max_depth': sp.stats.randint(1, 501),
21
'n_estimators': sp.stats.randint(100, 20001),
22
'gpu_use_dp': [True, False],
23
}
24
rscv = RandomizedSearchCV(
25
estimator=lgbm,
26
param_distributions=param_dist,
27
cv=3,
28
n_iter=3000,
29
n_jobs=4,
30
verbose=1,
31
refit=True,
32
fit_params={'eval_set':(val_x, val_y.ravel()),
33
'early_stopping_rounds':1,
34
'eval_metric':['l2', 'l1'],
35
'verbose': False,
36
},
37
)
38
# This line throws error
39
rscv = rscv.fit(train_x,
40
train_y.ravel(),
41
)
42
return rscv.best_estimator_
43
Too long to put full stack trace, here is on the lightgbm src.
JavaScript
1
81
81
1
2
/opt/conda/lib/python3.6/site-packages/lightgbm/sklearn.py in fit(self=LGBMRegressor(application='regression_l1',
3
subsample_freq=1, ,
4
tree_learner='voting'), X=memmap([[-0.80256822, 1.63302752, -0.55377441, 12.251635 ,
5
12.27866017, 1. ]]), y=array([-1.81712472, 0. , -1.7366136 , 0... , 0.36258158, -0.13661202, 0.2919708 ]), sample_weight=None, init_score=None, eval_set=(memmap([[-1.16531701, -0.97454256, -1.36807818, 11.55465037,
6
11.55160629, 2. ]]), array([ 0.58517555, -1.01419878, -0.05787037, -0...64139942, 1.04166667, 0. , -0.11668611])), eval_names=None, eval_sample_weight=None, eval_init_score=None, eval_metric=['l2', 'l1'], early_stopping_rounds=1, verbose=False, feature_name='auto', categorical_feature='auto', callbacks=None)
7
613 eval_init_score=eval_init_score,
8
614 eval_metric=eval_metric,
9
615 early_stopping_rounds=early_stopping_rounds,
10
616 verbose=verbose, feature_name=feature_name,
11
617 categorical_feature=categorical_feature,
12
--> 618 callbacks=callbacks)
13
callbacks = None
14
619 return self
15
620
16
621 base_doc = LGBMModel.fit.__doc__
17
622 fit.__doc__ = (base_doc[:base_doc.find('eval_class_weight :')] +
18
19
20
/opt/conda/lib/python3.6/site-packages/lightgbm/sklearn.py in fit(self=LGBMRegressor(application='regression_l1',
21
subsample_freq=1, ,
22
tree_learner='voting'), X=array([[-0.80256822, 1.63302752, -0.55377441, . 12.251635 ,
23
12.27866017, 1. ]]), y=array([-1.81712472, 0. , -1.7366136 , 0... , 0.36258158, -0.13661202, 0.2919708 ]), sample_weight=None, init_score=None, group=None, eval_set=[(memmap([[-1.16531701, -0.97454256, -1.36807818, 11.55465037,
24
11.55160629, 2. ]]), array([ 0.58517555, -1.01419878, -0.05787037, -0...64139942, 1.04166667, 0. , -0.11668611]))], eval_names=None, eval_sample_weight=None, eval_class_weight=None, eval_init_score=None, eval_group=None, eval_metric=['l2', 'l1'], early_stopping_rounds=1, verbose=False, feature_name='auto', categorical_feature='auto', callbacks=None)
25
468 self.n_estimators, valid_sets=valid_sets, valid_names=eval_names,
26
469 early_stopping_rounds=early_stopping_rounds,
27
470 evals_result=evals_result, fobj=self._fobj, feval=feval,
28
471 verbose_eval=verbose, feature_name=feature_name,
29
472 categorical_feature=categorical_feature,
30
--> 473 callbacks=callbacks)
31
callbacks = None
32
474
33
475 if evals_result:
34
476 self._evals_result = evals_result
35
477
36
37
38
/opt/conda/lib/python3.6/site-packages/lightgbm/engine.py in train(params={'application': 'regression_l1', 'bagging_fraction': 0.0013516565394267757, 'bagging_freq': 8, 'boosting_type': 'dart', 'colsample_bytree': 1.0, 'device': 'gpu', 'feature_fraction': 0.18574060093496944, 'gpu_use_dp': True, 'learning_rate': 0.06354739024799887, 'max_depth': 267, }, train_set=<lightgbm.basic.Dataset object>, num_boost_round=11610, valid_sets=[<lightgbm.basic.Dataset object>], valid_names=None, fobj=None, feval=None, init_model=None, feature_name='auto', categorical_feature='auto', early_stopping_rounds=1, evals_result={}, verbose_eval=False, learning_rates=None, keep_training_booster=False, callbacks={<function print_evaluation.<locals>.callback>, <function early_stopping.<locals>.callback>, <function record_evaluation.<locals>.callback>})
39
175 callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order'))
40
176 callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order'))
41
177
42
178 # construct booster
43
179 try:
44
--> 180 booster = Booster(params=params, train_set=train_set)
45
booster = undefined
46
params = {'application': 'regression_l1', 'bagging_fraction': 0.0013516565394267757, 'bagging_freq': 8, 'boosting_type': 'dart', 'colsample_bytree': 1.0, 'device': 'gpu', 'feature_fraction': 0.18574060093496944, 'gpu_use_dp': True, 'learning_rate': 0.06354739024799887, 'max_depth': 267, }
47
train_set = <lightgbm.basic.Dataset object>
48
181 if is_valid_contain_train:
49
182 booster.set_train_data_name(train_data_name)
50
183 for valid_set, name_valid_set in zip(reduced_valid_sets, name_valid_sets):
51
184 booster.add_valid(valid_set, name_valid_set)
52
53
54
/opt/conda/lib/python3.6/site-packages/lightgbm/basic.py in __init__(self=<lightgbm.basic.Booster object>, params={'application': 'regression_l1', 'bagging_fraction': 0.0013516565394267757, 'bagging_freq': 8, 'boosting_type': 'dart', 'colsample_bytree': 1.0, 'device': 'gpu', 'feature_fraction': 0.18574060093496944, 'gpu_use_dp': True, 'learning_rate': 0.06354739024799887, 'max_depth': 267, }, train_set=<lightgbm.basic.Dataset object>, model_file=None, silent=False)
55
1290 # construct booster object
56
1291 self.handle = ctypes.c_void_p()
57
1292 _safe_call(_LIB.LGBM_BoosterCreate(
58
1293 train_set.construct().handle,
59
1294 c_str(params_str),
60
-> 1295 ctypes.byref(self.handle)))
61
self.handle = c_void_p(None)
62
1296 # save reference to data
63
1297 self.train_set = train_set
64
1298 self.valid_sets = []
65
1299 self.name_valid_sets = []
66
67
68
/opt/conda/lib/python3.6/site-packages/lightgbm/basic.py in _safe_call(ret=-1)
69
43 ----------
70
44 ret : int
71
45 return value from API calls
72
46 """
73
47 if ret != 0:
74
---> 48 raise LightGBMError(_LIB.LGBM_GetLastError())
75
49
76
50
77
51 def is_numeric(obj):
78
52 """Check is a number or not, include numpy number etc."""
79
80
LightGBMError: b'Check failed: num_data > 0 at /usr/local/src/lightgbm/LightGBM/src/io/dataset.cpp, line 27 .n'
81
Advertisement
Answer
Minimum value of bagging_fraction and feature_fraction could be too small. I changed the distribution to “sp.stats.uniform(loc=0.1, scale=0.9)” and it works.