I want to replace nan values in B with A in df: df: df data types: I tried: and it caught error: Answer Use numpy.where for avoid floats in output column: If need strings in output:

TypeError: object cannot be converted to an IntegerDtype

I want to replace nan values in B with A in df:

df:

    A       B
0   Bb00    100080
1   Aa00    <NA>
2   Cc10    450089

JavaScript
​x
 
    A       B
0   Bb00    100080
1   Aa00    <NA>
2   Cc10    450089
​
​

df data types:

A    object
B     Int64
dtype: object

JavaScript
 
A    object
B     Int64
dtype: object
​

I tried:

df['new_col'] = df['B'].fillna(df['A'])

JavaScript
 
df['new_col'] = df['B'].fillna(df['A'])
​

and it caught error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-120-1d24cd9977ed> in <module>
      2 print(df.dtypes)
      3 
----> 4 df['new_col'] = df['B'].fillna(df['A'])

~AppDataRoamingPythonPython38site-packagespandascoreseries.py in fillna(self, value, method, axis, inplace, limit, downcast)
   4515         downcast=None,
   4516     ) -> Optional["Series"]:
-> 4517         return super().fillna(
   4518             value=value,
   4519             method=method,

~AppDataRoamingPythonPython38site-packagespandascoregeneric.py in fillna(self, value, method, axis, inplace, limit, downcast)
   6046                     )
   6047 
-> 6048                 new_data = self._mgr.fillna(
   6049                     value=value, limit=limit, inplace=inplace, downcast=downcast
   6050                 )

~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in fillna(self, value, limit, inplace, downcast)
    555 
    556     def fillna(self, value, limit, inplace: bool, downcast) -> "BlockManager":
--> 557         return self.apply(
    558             "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
    559         )

~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in apply(self, f, align_keys, **kwargs)
    394                 applied = b.apply(f, **kwargs)
    395             else:
--> 396                 applied = getattr(b, f)(**kwargs)
    397             result_blocks = _extend_blocks(applied, result_blocks)
    398 

~AppDataRoamingPythonPython38site-packagespandascoreinternalsblocks.py in fillna(self, value, limit, inplace, downcast)
   1773     def fillna(self, value, limit=None, inplace=False, downcast=None):
   1774         values = self.values if inplace else self.values.copy()
-> 1775         values = values.fillna(value=value, limit=limit)
   1776         return [
   1777             self.make_block_same_class(

~AppDataRoamingPythonPython38site-packagespandascorearraysbase.py in fillna(self, value, method, limit)
    615                 # fill with value
    616                 new_values = self.copy()
--> 617                 new_values[mask] = value
    618         else:
    619             new_values = self.copy()

~AppDataRoamingPythonPython38site-packagespandascorearraysmasked.py in __setitem__(self, key, value)
    103         if _is_scalar:
    104             value = [value]
--> 105         value, mask = self._coerce_to_array(value)
    106 
    107         if _is_scalar:

~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in _coerce_to_array(self, value)
    423 
    424     def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
--> 425         return coerce_to_array(value, dtype=self.dtype)
    426 
    427     def astype(self, dtype, copy: bool = True) -> ArrayLike:

~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in coerce_to_array(values, dtype, mask, copy)
    241             "mixed-integer-float",
    242         ]:
--> 243             raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
    244 
    245     elif is_bool_dtype(values) and is_integer_dtype(dtype):

TypeError: object cannot be converted to an IntegerDtype

JavaScript
 
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-120-1d24cd9977ed> in <module>
      2 print(df.dtypes)
      3 
----> 4 df['new_col'] = df['B'].fillna(df['A'])
​
~AppDataRoamingPythonPython38site-packagespandascoreseries.py in fillna(self, value, method, axis, inplace, limit, downcast)
   4515         downcast=None,
   4516     ) -> Optional["Series"]:
-> 4517         return super().fillna(
   4518             value=value,
   4519             method=method,
​
~AppDataRoamingPythonPython38site-packagespandascoregeneric.py in fillna(self, value, method, axis, inplace, limit, downcast)
   6046                     )
   6047 
-> 6048                 new_data = self._mgr.fillna(
   6049                     value=value, limit=limit, inplace=inplace, downcast=downcast
   6050                 )
​
~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in fillna(self, value, limit, inplace, downcast)
    555 
    556     def fillna(self, value, limit, inplace: bool, downcast) -> "BlockManager":
--> 557         return self.apply(
    558             "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
    559         )
​
~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in apply(self, f, align_keys, **kwargs)
    394                 applied = b.apply(f, **kwargs)
    395             else:
--> 396                 applied = getattr(b, f)(**kwargs)
    397             result_blocks = _extend_blocks(applied, result_blocks)
    398 
​
~AppDataRoamingPythonPython38site-packagespandascoreinternalsblocks.py in fillna(self, value, limit, inplace, downcast)
   1773     def fillna(self, value, limit=None, inplace=False, downcast=None):
   1774         values = self.values if inplace else self.values.copy()
-> 1775         values = values.fillna(value=value, limit=limit)
   1776         return [
   1777             self.make_block_same_class(
​
~AppDataRoamingPythonPython38site-packagespandascorearraysbase.py in fillna(self, value, method, limit)
    615                 # fill with value
    616                 new_values = self.copy()
--> 617                 new_values[mask] = value
    618         else:
    619             new_values = self.copy()
​
~AppDataRoamingPythonPython38site-packagespandascorearraysmasked.py in __setitem__(self, key, value)
    103         if _is_scalar:
    104             value = [value]
--> 105         value, mask = self._coerce_to_array(value)
    106 
    107         if _is_scalar:
​
~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in _coerce_to_array(self, value)
    423 
    424     def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
--> 425         return coerce_to_array(value, dtype=self.dtype)
    426 
    427     def astype(self, dtype, copy: bool = True) -> ArrayLike:
​
~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in coerce_to_array(values, dtype, mask, copy)
    241             "mixed-integer-float",
    242         ]:
--> 243             raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
    244 
    245     elif is_bool_dtype(values) and is_integer_dtype(dtype):
​
TypeError: object cannot be converted to an IntegerDtype
​

Answer

Use numpy.where for avoid floats in output column:

df['new_col'] = np.where(df['B'].isna(), df.A, df.B)
print (df)
      A       B new_col
0  Bb00  100080  100080
1  Aa00    <NA>    Aa00
2  Cc10  450089  450089

JavaScript
 
df['new_col'] = np.where(df['B'].isna(), df.A, df.B)
print (df)
      A       B new_col
0  Bb00  100080  100080
1  Aa00    <NA>    Aa00
2  Cc10  450089  450089
​

print (df['new_col'].apply(type))
0    <class 'int'>
1    <class 'str'>
2    <class 'int'>
Name: new_col, dtype: object

JavaScript
 
print (df['new_col'].apply(type))
0    <class 'int'>
1    <class 'str'>
2    <class 'int'>
Name: new_col, dtype: object
​

If need strings in output:

df['new_col'] = np.where(df['B'].isna(), df.A, df.B).astype(str)

print (df)
      A       B new_col
0  Bb00  100080  100080
1  Aa00    <NA>    Aa00
2  Cc10  450089  450089

print (df['new_col'].apply(type))
0    <class 'str'>
1    <class 'str'>
2    <class 'str'>
Name: new_col, dtype: object

JavaScript
 
df['new_col'] = np.where(df['B'].isna(), df.A, df.B).astype(str)
​
print (df)
      A       B new_col
0  Bb00  100080  100080
1  Aa00    <NA>    Aa00
2  Cc10  450089  450089
​
print (df['new_col'].apply(type))
0    <class 'str'>
1    <class 'str'>
2    <class 'str'>
Name: new_col, dtype: object
​

Advertisement

Answer