I want to add new column “Null_Values” in PySpark dataframe as below
JavaScript
x
9
1
=======================================================|
2
ID | Maths | Science | English | NUll_Values |
3
=======================================================|
4
11 | 80 | NULL | 89 | Science |
5
12 | NULL | NULL | 89 | Maths,Science |
6
13 | 90 | 95 | 85 | NULL |
7
14 | NULL | NULL | NULL | Maths,Science,English |
8
=======================================================|
9
Advertisement
Answer
JavaScript
1
15
15
1
df = df.withColumn(
2
"NUll_Values",
3
F.concat_ws(",", *[F.when(F.col(c).isNull(), c) for c in df.columns])
4
)
5
6
df.show(truncate=False)
7
# +---+-----+-------+-------+---------------------+
8
# |ID |Maths|Science|English|NUll_Values |
9
# +---+-----+-------+-------+---------------------+
10
# |11 |80 |null |89 |Science |
11
# |12 |null |null |89 |Maths,Science |
12
# |13 |90 |95 |85 | |
13
# |14 |null |null |null |Maths,Science,English|
14
# +---+-----+-------+-------+---------------------+
15
or
JavaScript
1
15
15
1
df = df.withColumn(
2
"NUll_Values",
3
F.concat_ws(",", *[F.when(F.col(c).isNull(), c) for c in df.columns])
4
).replace("", None, subset=["NUll_Values"])
5
6
df.show(truncate=False)
7
# +---+-----+-------+-------+---------------------+
8
# |ID |Maths|Science|English|NUll_Values |
9
# +---+-----+-------+-------+---------------------+
10
# |11 |80 |null |89 |Science |
11
# |12 |null |null |89 |Maths,Science |
12
# |13 |90 |95 |85 |null |
13
# |14 |null |null |null |Maths,Science,English|
14
# +---+-----+-------+-------+---------------------+
15