I am using an AdamW optimizer that uses cosine decay with a warmup learning scheduler. I have written the custom scheduler from scratch and using the AdamW optimizer provided by the TensorFlow addons library.
JavaScript
x
28
28
1
class CosineScheduler(tf.keras.optimizers.schedules.LearningRateSchedule):
2
def __init__(self,
3
learning_rate_base,
4
total_steps,
5
warmup_learning_rate=0.0,
6
warmup_steps=0):
7
self.learning_rate_base = learning_rate_base
8
self.total_steps = total_steps
9
self.warmup_learning_rate =warmup_learning_rate
10
self.warmup_steps = warmup_steps
11
12
def __call__(self,step):
13
learning_rate = 0.5 * self.learning_rate_base * (1 + tf.cos(
14
np.pi *
15
(tf.cast(step, tf.float32) - self.warmup_steps)/ float(self.total_steps-self.warmup_steps)))
16
if self.warmup_steps > 0:
17
slope = (self.learning_rate_base - self.warmup_learning_rate) / self.warmup_steps
18
warmup_rate = slope * tf.cast(step, tf.float32) + self.warmup_learning_rate
19
learning_rate = tf.where(step < self.warmup_steps, warmup_rate, learning_rate)
20
lr = tf.where(step > self.total_steps, 0.0, learning_rate, name='learning_rate')
21
wandb.log({"lr": lr})
22
return lr
23
24
learning_rate = CosineScheduler(learning_rate_base=0.001,
25
total_steps=23000,
26
warmup_learning_rate=0.0,
27
warmup_steps=1660)
28
JavaScript
1
3
1
loss_func = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)
2
optimizer = tfa.optimizers.AdamW(learning_rate,weight_decay=0.1)
3
I get the following error prompt where it says that weight_decay has multiple arguments
JavaScript
1
36
36
1
---------------------------------------------------------------------------
2
TypeError Traceback (most recent call last)
3
<ipython-input-12-6f9fd0a9c1cb> in <module>
4
1 loss_func = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)
5
----> 2 optimizer = tfa.optimizers.AdamW(learning_rate,weight_decay=0.1)
6
7
/opt/conda/lib/python3.7/site-packages/typeguard/__init__.py in wrapper(*args, **kwargs)
8
923
9
924 def wrapper(*args, **kwargs):
10
--> 925 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
11
926 check_argument_types(memo)
12
927 retval = func(*args, **kwargs)
13
14
/opt/conda/lib/python3.7/site-packages/typeguard/__init__.py in __init__(self, func, frame_locals, args, kwargs, forward_refs_policy)
15
126
16
127 if args is not None and kwargs is not None:
17
--> 128 self.arguments = signature.bind(*args, **kwargs).arguments
18
129 else:
19
130 assert frame_locals is not None, 'frame must be specified if args or kwargs is None'
20
21
/opt/conda/lib/python3.7/inspect.py in bind(*args, **kwargs)
22
3013 if the passed arguments can not be bound.
23
3014 """
24
-> 3015 return args[0]._bind(args[1:], kwargs)
25
3016
26
3017 def bind_partial(*args, **kwargs):
27
28
/opt/conda/lib/python3.7/inspect.py in _bind(self, args, kwargs, partial)
29
2954 raise TypeError(
30
2955 'multiple values for argument {arg!r}'.format(
31
-> 2956 arg=param.name)) from None
32
2957
33
2958 arguments[param.name] = arg_val
34
35
TypeError: multiple values for argument 'weight_decay'
36
What is causing problem and how do I resolve this?
Advertisement
Answer
The problem is that weight_decay
is the first positional argument of tfa.optimizers.AdamW
. In
JavaScript
1
2
1
optimizer = tfa.optimizers.AdamW(learning_rate,weight_decay=0.1)
2
you hand over a positional argument and a kw argument weight_decay
. This causes the error. According to the documentation, learning rate
is the second positional parameter (even though optional), not the first.
Just write
JavaScript
1
2
1
optimizer = tfa.optimizers.AdamW(0.1, learning_rate)
2
or
JavaScript
1
2
1
optimizer = tfa.optimizers.AdamW(weight_decay=0.1, learning_rate=learning_rate)
2
or
JavaScript
1
2
1
optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, weight_decay=0.1)
2