I can train a XGBoost model using Sagemaker images like so:
import boto3 import sagemaker from sagemaker.inputs import TrainingInput import os folder = r"C:Somewhere" os.chdir(folder) s3_prefix = 'some_model' s3_bucket_name = 'the_bucket' train_file_name = 'train.csv' val_file_name = 'val.csv' role_arn = 'arn:aws:iam::482777693429:role/bla_instance_role' region_name = boto3.Session().region_name s3_input_train = TrainingInput(s3_data='s3://{}/{}/{}'.format(s3_bucket_name, s3_prefix, train_file_name), content_type='csv') s3_input_val = TrainingInput(s3_data='s3://{}/{}/{}'.format(s3_bucket_name, s3_prefix, val_file_name), content_type='csv') print(type(s3_input_train)) hyperparameters = { "max_depth":"13", "eta":"0.15", "gamma":"4", "min_child_weight":"6", "subsample":"0.7", "objective":"reg:squarederror", "num_round":"50"} output_path = 's3://{}/{}/output'.format(s3_bucket_name, s3_prefix) # 1.5-1 # 1.3-1 estimator = sagemaker.estimator.Estimator(image_uri=sagemaker.image_uris.retrieve("xgboost", region_name, "1.2-2"), hyperparameters=hyperparameters, role=role_arn, instance_count=1, instance_type='ml.m5.2xlarge', #instance_type='local', volume_size=1, # 1 GB output_path=output_path) estimator.fit({'train': s3_input_train, 'validation': s3_input_val})
This work for all versions 1.2-2, 1.3-1 and 1.5-1. Unfortunately the following code only works for version 1.2-2:
import boto3 import os import pickle as pkl import tarfile import pandas as pd import xgboost as xgb folder = r"C:Somewhere" os.chdir(folder) s3_prefix = 'some_model' s3_bucket_name = 'the_bucket' model_path = 'output/sagemaker-xgboost-2022-04-30-10-52-29-877/output/model.tar.gz' session = boto3.Session(profile_name='default') session.resource('s3').Bucket(s3_bucket_name).download_file('{}/{}'.format(s3_prefix, model_path), 'model.tar.gz') t = tarfile.open('model.tar.gz', 'r:gz') t.extractall() model_file_name = 'xgboost-model' with open(model_file_name, "rb") as input_file: e = pkl.load(input_file)
Otherwise I get a:
_pickle.UnpicklingError: unpickling stack underflow
Am I missing something? Is my “pickle loading code wrong”?
The version of xgboost is 1.6.0 where I run the pickle code.
Advertisement
Answer
I found the solution here. I will leave it in case someone come accross the same issue.