I want to replace nan
values in B
with A
in df
:
df:
A B 0 Bb00 100080 1 Aa00 <NA> 2 Cc10 450089
df data types:
A object B Int64 dtype: object
I tried:
df['new_col'] = df['B'].fillna(df['A'])
and it caught error:
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-120-1d24cd9977ed> in <module> 2 print(df.dtypes) 3 ----> 4 df['new_col'] = df['B'].fillna(df['A']) ~AppDataRoamingPythonPython38site-packagespandascoreseries.py in fillna(self, value, method, axis, inplace, limit, downcast) 4515 downcast=None, 4516 ) -> Optional["Series"]: -> 4517 return super().fillna( 4518 value=value, 4519 method=method, ~AppDataRoamingPythonPython38site-packagespandascoregeneric.py in fillna(self, value, method, axis, inplace, limit, downcast) 6046 ) 6047 -> 6048 new_data = self._mgr.fillna( 6049 value=value, limit=limit, inplace=inplace, downcast=downcast 6050 ) ~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in fillna(self, value, limit, inplace, downcast) 555 556 def fillna(self, value, limit, inplace: bool, downcast) -> "BlockManager": --> 557 return self.apply( 558 "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast 559 ) ~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in apply(self, f, align_keys, **kwargs) 394 applied = b.apply(f, **kwargs) 395 else: --> 396 applied = getattr(b, f)(**kwargs) 397 result_blocks = _extend_blocks(applied, result_blocks) 398 ~AppDataRoamingPythonPython38site-packagespandascoreinternalsblocks.py in fillna(self, value, limit, inplace, downcast) 1773 def fillna(self, value, limit=None, inplace=False, downcast=None): 1774 values = self.values if inplace else self.values.copy() -> 1775 values = values.fillna(value=value, limit=limit) 1776 return [ 1777 self.make_block_same_class( ~AppDataRoamingPythonPython38site-packagespandascorearraysbase.py in fillna(self, value, method, limit) 615 # fill with value 616 new_values = self.copy() --> 617 new_values[mask] = value 618 else: 619 new_values = self.copy() ~AppDataRoamingPythonPython38site-packagespandascorearraysmasked.py in __setitem__(self, key, value) 103 if _is_scalar: 104 value = [value] --> 105 value, mask = self._coerce_to_array(value) 106 107 if _is_scalar: ~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in _coerce_to_array(self, value) 423 424 def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]: --> 425 return coerce_to_array(value, dtype=self.dtype) 426 427 def astype(self, dtype, copy: bool = True) -> ArrayLike: ~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in coerce_to_array(values, dtype, mask, copy) 241 "mixed-integer-float", 242 ]: --> 243 raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") 244 245 elif is_bool_dtype(values) and is_integer_dtype(dtype): TypeError: object cannot be converted to an IntegerDtype
Advertisement
Answer
Use numpy.where
for avoid float
s in output column:
df['new_col'] = np.where(df['B'].isna(), df.A, df.B) print (df) A B new_col 0 Bb00 100080 100080 1 Aa00 <NA> Aa00 2 Cc10 450089 450089
print (df['new_col'].apply(type)) 0 <class 'int'> 1 <class 'str'> 2 <class 'int'> Name: new_col, dtype: object
If need strings in output:
df['new_col'] = np.where(df['B'].isna(), df.A, df.B).astype(str) print (df) A B new_col 0 Bb00 100080 100080 1 Aa00 <NA> Aa00 2 Cc10 450089 450089 print (df['new_col'].apply(type)) 0 <class 'str'> 1 <class 'str'> 2 <class 'str'> Name: new_col, dtype: object