Return minimum “sub-DAG” generated from dictionary

Question

I have an input data and some transformation functions t1, t2, t3, t4, t5, t6. Each of them requires some columns as input and outputs some columns. The DAG associated with these transformations is I get as input the columns I want to generate and I should return the sub-DAG required to obtain it (I am not interested only in

Accepted Answer

Build the transpose DAG and do depth-first search from the required columns?import collectionsTransformation = collections.namedtuple("Transformation", ["input_cols", "output_cols"])details = {    "t1": Transformation(input_cols=["C1", "C2"], output_cols=["C8", "C9", "C10"]),    "t2": Transformation(input_cols=["C8", "C10"], output_cols=["C11", "C12"]),    "t3": Transformation(        input_cols=["C11", "C12"], output_cols=["C13", "C14", "C15", "C16"]    ),    "t4": Transformation(        input_cols=["C9", "C3", "C4", "C5", "C6"],        output_cols=["C17", "C18", "C19", "C20"],    ),    "t5": Transformation(input_cols=["C20", "C16"], output_cols=["C21", "C22"]),    "t6": Transformation(input_cols=["C7", "C16"], output_cols=["C23", "C24", "C25"]),}def depth_first_search(graph, roots):    stack = [(root, True) for root in roots]    visited = set()    order = []    while stack:        v, entering = stack.pop()        if entering:            if v in visited:                continue            visited.add(v)            stack.append((v, False))            stack.extend((w, True) for w in graph[v])        else:            order.append(v)    return orderdef sub_dag(required_columns):    graph = collections.defaultdict(set)    for k, v in details.items():        for c in v.input_cols:            graph[k].add(c)        for c in v.output_cols:            graph[c].add(k)    order = depth_first_search(graph, required_columns)    return [t for t in order if t in details]print(    sub_dag(        [            "C1",            "C2",            "C3",            "C4",            "C5",            "C6",            "C7",            "C8",            "C9",            "C10",            "C11",            "C12",            "C13",            "C14",            "C15",            "C16",            "C17",            "C18",            "C19",            "C20",            "C21",            "C22",            "C23",            "C24",            "C25",        ]    ))print(    sub_dag(        [            "C8",            "C9",            "C10",            "C11",            "C12",            "C13",            "C14",            "C15",            "C16",            "C20",            "C21",            "C22",        ]    ))print(    sub_dag(        [            "C1",            "C3",            "C5",            "C8",            "C9",            "C10",            "C11",            "C12",            "C17",            "C18",            "C19",            "C20",        ]    ))print(sub_dag(["C1", "C3", "C5", "C25", "C22"]))

Advertisement

Answer