I want to replace nan
values in B
with A
in df
:
df:
JavaScript
x
6
1
A B
2
0 Bb00 100080
3
1 Aa00 <NA>
4
2 Cc10 450089
5
6
df data types:
JavaScript
1
4
1
A object
2
B Int64
3
dtype: object
4
I tried:
JavaScript
1
2
1
df['new_col'] = df['B'].fillna(df['A'])
2
and it caught error:
JavaScript
1
72
72
1
---------------------------------------------------------------------------
2
TypeError Traceback (most recent call last)
3
<ipython-input-120-1d24cd9977ed> in <module>
4
2 print(df.dtypes)
5
3
6
----> 4 df['new_col'] = df['B'].fillna(df['A'])
7
8
~AppDataRoamingPythonPython38site-packagespandascoreseries.py in fillna(self, value, method, axis, inplace, limit, downcast)
9
4515 downcast=None,
10
4516 ) -> Optional["Series"]:
11
-> 4517 return super().fillna(
12
4518 value=value,
13
4519 method=method,
14
15
~AppDataRoamingPythonPython38site-packagespandascoregeneric.py in fillna(self, value, method, axis, inplace, limit, downcast)
16
6046 )
17
6047
18
-> 6048 new_data = self._mgr.fillna(
19
6049 value=value, limit=limit, inplace=inplace, downcast=downcast
20
6050 )
21
22
~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in fillna(self, value, limit, inplace, downcast)
23
555
24
556 def fillna(self, value, limit, inplace: bool, downcast) -> "BlockManager":
25
--> 557 return self.apply(
26
558 "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
27
559 )
28
29
~AppDataRoamingPythonPython38site-packagespandascoreinternalsmanagers.py in apply(self, f, align_keys, **kwargs)
30
394 applied = b.apply(f, **kwargs)
31
395 else:
32
--> 396 applied = getattr(b, f)(**kwargs)
33
397 result_blocks = _extend_blocks(applied, result_blocks)
34
398
35
36
~AppDataRoamingPythonPython38site-packagespandascoreinternalsblocks.py in fillna(self, value, limit, inplace, downcast)
37
1773 def fillna(self, value, limit=None, inplace=False, downcast=None):
38
1774 values = self.values if inplace else self.values.copy()
39
-> 1775 values = values.fillna(value=value, limit=limit)
40
1776 return [
41
1777 self.make_block_same_class(
42
43
~AppDataRoamingPythonPython38site-packagespandascorearraysbase.py in fillna(self, value, method, limit)
44
615 # fill with value
45
616 new_values = self.copy()
46
--> 617 new_values[mask] = value
47
618 else:
48
619 new_values = self.copy()
49
50
~AppDataRoamingPythonPython38site-packagespandascorearraysmasked.py in __setitem__(self, key, value)
51
103 if _is_scalar:
52
104 value = [value]
53
--> 105 value, mask = self._coerce_to_array(value)
54
106
55
107 if _is_scalar:
56
57
~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in _coerce_to_array(self, value)
58
423
59
424 def _coerce_to_array(self, value) -> Tuple[np.ndarray, np.ndarray]:
60
--> 425 return coerce_to_array(value, dtype=self.dtype)
61
426
62
427 def astype(self, dtype, copy: bool = True) -> ArrayLike:
63
64
~AppDataRoamingPythonPython38site-packagespandascorearraysinteger.py in coerce_to_array(values, dtype, mask, copy)
65
241 "mixed-integer-float",
66
242 ]:
67
--> 243 raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
68
244
69
245 elif is_bool_dtype(values) and is_integer_dtype(dtype):
70
71
TypeError: object cannot be converted to an IntegerDtype
72
Advertisement
Answer
Use numpy.where
for avoid float
s in output column:
JavaScript
1
7
1
df['new_col'] = np.where(df['B'].isna(), df.A, df.B)
2
print (df)
3
A B new_col
4
0 Bb00 100080 100080
5
1 Aa00 <NA> Aa00
6
2 Cc10 450089 450089
7
JavaScript
1
6
1
print (df['new_col'].apply(type))
2
0 <class 'int'>
3
1 <class 'str'>
4
2 <class 'int'>
5
Name: new_col, dtype: object
6
If need strings in output:
JavaScript
1
14
14
1
df['new_col'] = np.where(df['B'].isna(), df.A, df.B).astype(str)
2
3
print (df)
4
A B new_col
5
0 Bb00 100080 100080
6
1 Aa00 <NA> Aa00
7
2 Cc10 450089 450089
8
9
print (df['new_col'].apply(type))
10
0 <class 'str'>
11
1 <class 'str'>
12
2 <class 'str'>
13
Name: new_col, dtype: object
14