I want to replace nan values in B with A in df:
df:
A B 0 Bb00 100080 1 Aa00 <NA> 2 Cc10 450089
df data types:
A object B Int64 dtype: object
I tried:
df['new_col'] = df['B'].fillna(df['A'])
and it caught error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-120-1d24cd9977ed> in <module>
2 print(df.dtypes)
3
----> 4 df['new_col'] = df['B'].fillna(df['A'])
~AppDataRoamingPythonPython38site-packagespandascoreseries.py in fillna(self, value, method, axis, inplace, limit, downcast)
4515 downcast=None,
4516 ) -> Optional["Series"]:
-> 4517 return super().fillna(
4518 value=value,
4519 method=method,
~AppDataRoamingPythonPython38site-packagespandascoregeneric.py in fillna(self, value, method, axis, inplace, limit, downcast)
6046 )
6047
-> 6048 new_data = self._mgr.fillna(
6049 value=value, limit=limit, inplace=inplace, downcast=downcast
6050 )
~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in fillna(self, value, limit, inplace, downcast)
555
556 def fillna(self, value, limit, inplace: bool, downcast) -> "BlockManager":
--> 557 return self.apply(
558 "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
559 )
~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in apply(self, f, align_keys, **kwargs)
394 applied = b.apply(f, **kwargs)
395 else:
--> 396 applied = getattr(b, f)(**kwargs)
397 result_blocks = _extend_blocks(applied, result_blocks)
398
~AppDataRoamingPythonPython38site-packagespandascoreinternalsblocks.py in fillna(self, value, limit, inplace, downcast)
1773 def fillna(self, value, limit=None, inplace=False, downcast=None):
1774 values = self.values if inplace else self.values.copy()
-> 1775 values = values.fillna(value=value, limit=limit)
1776 return [
1777 self.make_block_same_class(
~AppDataRoamingPythonPython38site-packagespandascorearraysbase.py in fillna(self, value, method, limit)
615 # fill with value
616 new_values = self.copy()
--> 617 new_values[mask] = value
618 else:
619 new_values = self.copy()
~AppDataRoamingPythonPython38site-packagespandascorearraysmasked.py in __setitem__(self, key, value)
103 if _is_scalar:
104 value = [value]
--> 105 value, mask = self._coerce_to_array(value)
106
107 if _is_scalar:
~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in _coerce_to_array(self, value)
423
424 def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
--> 425 return coerce_to_array(value, dtype=self.dtype)
426
427 def astype(self, dtype, copy: bool = True) -> ArrayLike:
~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in coerce_to_array(values, dtype, mask, copy)
241 "mixed-integer-float",
242 ]:
--> 243 raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
244
245 elif is_bool_dtype(values) and is_integer_dtype(dtype):
TypeError: object cannot be converted to an IntegerDtype
Advertisement
Answer
Use numpy.where for avoid floats in output column:
df['new_col'] = np.where(df['B'].isna(), df.A, df.B)
print (df)
A B new_col
0 Bb00 100080 100080
1 Aa00 <NA> Aa00
2 Cc10 450089 450089
print (df['new_col'].apply(type)) 0 <class 'int'> 1 <class 'str'> 2 <class 'int'> Name: new_col, dtype: object
If need strings in output:
df['new_col'] = np.where(df['B'].isna(), df.A, df.B).astype(str)
print (df)
A B new_col
0 Bb00 100080 100080
1 Aa00 <NA> Aa00
2 Cc10 450089 450089
print (df['new_col'].apply(type))
0 <class 'str'>
1 <class 'str'>
2 <class 'str'>
Name: new_col, dtype: object