I’m struggling to simplify my irregular nested np.where clauses. Is there a way to make the code more readable?
df["COL"] = np.where(
(df["A1"] == df["B1"]) & (df["A1"].notna()),
np.where(
(df["A1"] == df["C"]),
np.where(
(df["A"] == df["B"]) & df["A"].notna() & (df["A"] != df["A1"]),
"Text1",
df["A1"]
),
"Text2"
),
np.where(
(df["A"] == df["B"]) & (df["A"].notna()),
np.where(
(df["A"] == df["C"]),
df["A"],
"Text1"
),
np.where(
(df["C"].notna()),
df["C"],
"Text3"
)
)
)
Advertisement
Answer
Using np.select as suggested by @sammywemmy:
# Create boolean masks
m1 = (df["A1"] == df["B1"]) & (df["A1"].notna())
m11 = (df["A1"] == df["C"])
m12 = (df["A"] == df["B"]) & (df["A"].notna())
m111 = (df["A"] == df["B"]) & df["A"].notna() & (df["A"] != df["A1"])
m121 = (df["A"] == df["C"])
m122 = (df["C"].notna())
# Combine them
condlist = [m1 & m11 & m111,
m1 & m11 & ~m111,
m1 & ~m11,
~m1 & m12 & m121,
~m1 & m12 & ~m121
~m1 & ~m12 & m122,
~m1 & ~m12 & ~m122]
# Values for each combination
choicelist = ["Text1", df["A1"], "Text2", df["A"], "Text1", df["C"], "Text3"]
out = np.select(condlist, choicelist)