I’m trying to make a classifier with XGBoost, I fit it with RandomizedSearchCV.
Here is the code of my function:
def xgboost_classifier_rscv(x,y): from scipy import stats from xgboost import XGBClassifier from sklearn.metrics import fbeta_score, make_scorer, recall_score, accuracy_score, precision_score from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV #splitting the dataset into training and test parts x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) #bag of words implmentation cv = CountVectorizer() x_train = cv.fit_transform(x_train).toarray() #TF-IDF implementation vector = TfidfTransformer() x_train = vector.fit_transform(x_train).toarray() x_test = cv.transform(x_test) scorers = { 'f1_score':make_scorer(f1_score), 'precision_score': make_scorer(precision_score), 'recall_score': make_scorer(recall_score), 'accuracy_score': make_scorer(accuracy_score) } param_dist = {'n_estimators': stats.randint(150, 1000), 'learning_rate': stats.uniform(0.01, 0.59), 'subsample': stats.uniform(0.3, 0.6), 'max_depth': [3, 4, 5, 6, 7, 8, 9], 'colsample_bytree': stats.uniform(0.5, 0.4), 'min_child_weight': [1, 2, 3, 4] } n_folds = numFolds) skf = StratifiedKFold(n_splits=3, shuffle = True) gridCV = RandomizedSearchCV(xgb_model, param_distributions = param_dist, cv = skf, n_iter = 5, scoring = scorers, verbose = 3, n_jobs = -1, return_train_score=True, refit = precision_score) gridCV.fit(x_train,y_train) best_pars = gridCV.best_params_ print("best params : ", best_pars) xgb_predict = gridCV.predict(x_test) xgb_pred_prob = gridCV.predict_proba(x_test) print('best scores : ', gridCV.grid_scores_) scores = [x[1] for x in gridCV.grid_scores_] print("best scores : ", scores) return y_test, xgb_predict, xgb_pred_prob
When I run the code, I get an error, reported below:
TypeError Traceback (most recent call last) <ipython-input-30-9adf84d48e5c> in <module> 1 print("********** Xgboost classifier *************") 2 start_time = time.monotonic() ----> 3 y_test, xgb_predict, xgb_pred_prob = xgboost_classifier_rscv(x,y) 4 end_time = time.monotonic() 5 print("the time consumed is : ", timedelta(seconds=end_time - start_time)) <ipython-input-29-e0c6ae026076> in xgboost_classifier_rscv(x, y) 70 # verbose=3, random_state=1001, refit='precision_score' ) 71 ---> 72 gridCV.fit(x_train,y_train) 73 best_pars = gridCV.best_params_ 74 print("best params : ", best_pars) ~anaconda3libsite-packagessklearnutilsvalidation.py in inner_f(*args, **kwargs) 61 extra_args = len(args) - len(all_args) 62 if extra_args <= 0: ---> 63 return f(*args, **kwargs) 64 65 # extra_args > 0 ~anaconda3libsite-packagessklearnmodel_selection_search.py in fit(self, X, y, groups, **fit_params) 858 # parameter set. 859 if callable(self.refit): --> 860 self.best_index_ = self.refit(results) 861 if not isinstance(self.best_index_, numbers.Integral): 862 raise TypeError('best_index_ returned is not an integer') ~anaconda3libsite-packagessklearnutilsvalidation.py in inner_f(*args, **kwargs) 61 extra_args = len(args) - len(all_args) 62 if extra_args <= 0: ---> 63 return f(*args, **kwargs) 64 65 # extra_args > 0 TypeError: precision_score() missing 1 required positional argument: 'y_pred'
When I do the same thing but with GridSearchCV instead of RandomizedSearchCV, the code runs without any problems!
Advertisement
Answer
It’s not precision_score
it’s 'precision_score'
(with ‘ ‘), like this-
gridCV = RandomizedSearchCV(xgb_model, param_distributions = param_dist, cv = skf, n_iter = 5, scoring = scorers, verbose = 3, n_jobs = -1, return_train_score=True, refit = 'precision_score')
Another error:
grid_scores_
has been removed, so changed it to cv_results_
(in the last 3rd and 4th line)
print('best scores : ', gridCV.cv_results_) scores = [x[1] for x in gridCV.cv_results_]
One more error:
You have not defined that xgb_model
, so add that.
xgb_model = XGBClassifier(n_jobs = -1, random_state = 42)