I can train a XGBoost model using Sagemaker images like so:
JavaScript
x
45
45
1
import boto3
2
import sagemaker
3
from sagemaker.inputs import TrainingInput
4
import os
5
6
folder = r"C:Somewhere"
7
os.chdir(folder)
8
9
s3_prefix = 'some_model'
10
s3_bucket_name = 'the_bucket'
11
train_file_name = 'train.csv'
12
val_file_name = 'val.csv'
13
role_arn = 'arn:aws:iam::482777693429:role/bla_instance_role'
14
15
region_name = boto3.Session().region_name
16
17
s3_input_train = TrainingInput(s3_data='s3://{}/{}/{}'.format(s3_bucket_name, s3_prefix, train_file_name), content_type='csv')
18
s3_input_val = TrainingInput(s3_data='s3://{}/{}/{}'.format(s3_bucket_name, s3_prefix, val_file_name), content_type='csv')
19
20
print(type(s3_input_train))
21
22
hyperparameters = {
23
"max_depth":"13",
24
"eta":"0.15",
25
"gamma":"4",
26
"min_child_weight":"6",
27
"subsample":"0.7",
28
"objective":"reg:squarederror",
29
"num_round":"50"}
30
31
output_path = 's3://{}/{}/output'.format(s3_bucket_name, s3_prefix)
32
33
# 1.5-1
34
# 1.3-1
35
estimator = sagemaker.estimator.Estimator(image_uri=sagemaker.image_uris.retrieve("xgboost", region_name, "1.2-2"),
36
hyperparameters=hyperparameters,
37
role=role_arn,
38
instance_count=1,
39
instance_type='ml.m5.2xlarge',
40
#instance_type='local',
41
volume_size=1, # 1 GB
42
output_path=output_path)
43
44
estimator.fit({'train': s3_input_train, 'validation': s3_input_val})
45
This work for all versions 1.2-2, 1.3-1 and 1.5-1. Unfortunately the following code only works for version 1.2-2:
JavaScript
1
22
22
1
import boto3
2
import os
3
import pickle as pkl
4
import tarfile
5
import pandas as pd
6
import xgboost as xgb
7
8
folder = r"C:Somewhere"
9
os.chdir(folder)
10
11
s3_prefix = 'some_model'
12
s3_bucket_name = 'the_bucket'
13
model_path = 'output/sagemaker-xgboost-2022-04-30-10-52-29-877/output/model.tar.gz'
14
session = boto3.Session(profile_name='default')
15
session.resource('s3').Bucket(s3_bucket_name).download_file('{}/{}'.format(s3_prefix, model_path), 'model.tar.gz')
16
t = tarfile.open('model.tar.gz', 'r:gz')
17
t.extractall()
18
19
model_file_name = 'xgboost-model'
20
with open(model_file_name, "rb") as input_file:
21
e = pkl.load(input_file)
22
Otherwise I get a:
JavaScript
1
2
1
_pickle.UnpicklingError: unpickling stack underflow
2
Am I missing something? Is my “pickle loading code wrong”?
The version of xgboost is 1.6.0 where I run the pickle code.
Advertisement
Answer
I found the solution here. I will leave it in case someone come accross the same issue.