Skip to content
Advertisement

Adding dataclass fields dynamically with dacite.from_dict

I am using dacite to transform a Python dictionary into a dataclass. Is there a way to dynamically add fields to a dataclass? Like in the example below, where the dataclass “Parameters” has defined only one timeseries “timeseriesA”, but there might be additional ones (provided through the dictionary) that cannot be declared.

from dataclasses import asdict, dataclass
from typing import Dict, List, Optional

from dacite import from_dict

@dataclass(frozen = True)
class TimeSeries:
  name: str
  unit: str
  data: Optional[List[float]]
  
@dataclass(frozen = True)
class Parameters:
  timeseriesA: TimeSeries
  
@dataclass(frozen = True)
class Data:
  parameters: Parameters
  
  @classmethod
  def fromDict(cls, data: Dict) -> 'Data':
    return from_dict(cls, data)

  @classmethod
  def toDict(cls) -> Dict:
    return asdict(cls)

  
def main() -> None:

  d: Dict = {
    'parameters': {
      'timeseriesA': {
        'name': 'nameA',
        'unit': 'USD',
        'data': [10, 20, 30, 40]
      },
      'timeseriesB': {
        'name': 'nameB',
        'unit': 'EUR',
        'data': [60, 30, 40, 50]
      }
    }
  }

  data: Data = Data.fromDict(d)

if __name__ == '__main__':
  main()

In this example, “timeseriesB” will be ignored by dacite, but should be added as field for the “Parameters” dataclass.

Advertisement

Answer

In general, dynamically adding fields to a dataclass, after the class is defined, is not good practice. However, this does present a good use case for using a dict within a dataclass, due to the dynamic nature of fields in the source dict object.

Here is a straightforward example of using a dict field to handle a dynamic mapping of keys in the source object, using the dataclass-wizard which is also a similar JSON serialization library. The approach outlined below handles extraneous data in the dict object like timeseriesB for instance.

from __future__ import annotations

from dataclasses import dataclass
from dataclass_wizard import JSONWizard


@dataclass(frozen=True)
class Data(JSONWizard):
    parameters: dict[str, TimeSeries]


@dataclass(frozen=True)
class TimeSeries:
    name: str
    unit: str
    data: list[float] | None


data: dict = {
    'parameters': {
        'timeseriesA': {
            'name': 'nameA',
            'unit': 'USD',
            'data': [10, 20, 30, 40]
        },
        'timeseriesB': {
            'name': 'nameB',
            'unit': 'EUR',
            'data': [60, 30, 40, 50]
        }
    }
}


def main():
    # deserialize from dict
    d = Data.from_dict(data)
    print(d.parameters['timeseriesB'].unit)  # EUR

    print(repr(d)) 
    # Data(parameters={'timeseriesA': TimeSeries(name='nameA', unit='USD', data=[10.0, 20.0, 30.0, 40.0]),
    #                  'timeseriesB': TimeSeries(name='nameB', unit='EUR', data=[60.0, 30.0, 40.0, 50.0])})


if __name__ == '__main__':
    main()

The dataclass-wizard admittedly doesn’t perform strict type checking like dacite, but instead performs implicit type coercion, like str to annotated int, where possible. Perhaps as a result, it’s overall much faster; the other nice thing is serialization is even slightly faster than builtin dataclasses.asdict too :-)

Here are some quick tests:

from dataclasses import asdict, dataclass
from typing import Dict, List, Optional

from dacite import from_dict
from dataclass_wizard import JSONWizard
from timeit import timeit


@dataclass(frozen=True)
class TimeSeries:
    name: str
    unit: str
    data: Optional[List[float]]


@dataclass(frozen=True)
class Parameters:
    timeseriesA: TimeSeries


@dataclass(frozen=True)
class Data:
    parameters: Parameters

    @classmethod
    def fromDict(cls, data: Dict) -> 'Data':
        return from_dict(cls, data)

    def toDict(self) -> Dict:
        return asdict(self)


@dataclass(frozen=True)
class ParametersWizard:
    # renamed because default key transform is `camelCase` -> `snake_case`
    timeseries_a: TimeSeries


@dataclass(frozen=True)
class DataWizard(JSONWizard):
    # enable debug mode in case of incorrect types etc.
    class _(JSONWizard.Meta):
        debug_enabled = True

    parameters: ParametersWizard


data: Dict = {
    'parameters': {
        'timeseriesA': {
            'name': 'nameA',
            'unit': 'USD',
            'data': [10, 20, 30, 40]
        },
        'timeseriesB': {
            'name': 'nameB',
            'unit': 'EUR',
            'data': [60, 30, 40, 50]
        }
    }
}


def main():
    n = 10_000

    print(f"From Dict:        {timeit('Data.fromDict(data)', globals=globals(), number=n):.3f}")
    print(f"From Dict (Wiz):  {timeit('DataWizard.from_dict(data)', globals=globals(), number=n):.3f}")

    data_1: Data = Data.fromDict(data)
    data_wiz: Data = DataWizard.from_dict(data)

    g = globals().copy()
    g.update(locals())

    print(f"To Dict:        {timeit('data_1.toDict()', globals=g, number=n):.3f}")
    print(f"To Dict (Wiz):  {timeit('data_wiz.to_dict()', globals=g, number=n):.3f}")


if __name__ == '__main__':
    main()

Results, on my PC (Windows):

From Dict:        1.663
From Dict (Wiz):  0.059
To Dict:        0.105
To Dict (Wiz):  0.057
User contributions licensed under: CC BY-SA
7 People found this is helpful
Advertisement