I am using dacite to transform a Python dictionary into a dataclass. Is there a way to dynamically add fields to a dataclass? Like in the example below, where the dataclass “Parameters” has defined only one timeseries “timeseriesA”, but there might be additional ones (provided through the dictionary) that cannot be declared.
from dataclasses import asdict, dataclass
from typing import Dict, List, Optional
from dacite import from_dict
@dataclass(frozen = True)
class TimeSeries:
  name: str
  unit: str
  data: Optional[List[float]]
  
@dataclass(frozen = True)
class Parameters:
  timeseriesA: TimeSeries
  
@dataclass(frozen = True)
class Data:
  parameters: Parameters
  
  @classmethod
  def fromDict(cls, data: Dict) -> 'Data':
    return from_dict(cls, data)
  @classmethod
  def toDict(cls) -> Dict:
    return asdict(cls)
  
def main() -> None:
  d: Dict = {
    'parameters': {
      'timeseriesA': {
        'name': 'nameA',
        'unit': 'USD',
        'data': [10, 20, 30, 40]
      },
      'timeseriesB': {
        'name': 'nameB',
        'unit': 'EUR',
        'data': [60, 30, 40, 50]
      }
    }
  }
  data: Data = Data.fromDict(d)
if __name__ == '__main__':
  main()
In this example, “timeseriesB” will be ignored by dacite, but should be added as field for the “Parameters” dataclass.
Advertisement
Answer
In general, dynamically adding fields to a dataclass, after the class is defined, is not good practice. However, this does present a good use case for using a dict within a dataclass, due to the dynamic nature of fields in the source dict object.
Here is a straightforward example of using a dict field to handle a dynamic mapping of keys in the source object, using the dataclass-wizard which is also a similar JSON serialization library. The approach outlined below handles extraneous data in the dict object like timeseriesB for instance.
from __future__ import annotations
from dataclasses import dataclass
from dataclass_wizard import JSONWizard
@dataclass(frozen=True)
class Data(JSONWizard):
    parameters: dict[str, TimeSeries]
@dataclass(frozen=True)
class TimeSeries:
    name: str
    unit: str
    data: list[float] | None
data: dict = {
    'parameters': {
        'timeseriesA': {
            'name': 'nameA',
            'unit': 'USD',
            'data': [10, 20, 30, 40]
        },
        'timeseriesB': {
            'name': 'nameB',
            'unit': 'EUR',
            'data': [60, 30, 40, 50]
        }
    }
}
def main():
    # deserialize from dict
    d = Data.from_dict(data)
    print(d.parameters['timeseriesB'].unit)  # EUR
    print(repr(d)) 
    # Data(parameters={'timeseriesA': TimeSeries(name='nameA', unit='USD', data=[10.0, 20.0, 30.0, 40.0]),
    #                  'timeseriesB': TimeSeries(name='nameB', unit='EUR', data=[60.0, 30.0, 40.0, 50.0])})
if __name__ == '__main__':
    main()
The dataclass-wizard admittedly doesn’t perform strict type checking like dacite, but instead performs implicit type coercion, like str to annotated int, where possible. Perhaps as a result, it’s overall much faster; the other nice thing is serialization is even slightly faster than builtin dataclasses.asdict too :-)
Here are some quick tests:
from dataclasses import asdict, dataclass
from typing import Dict, List, Optional
from dacite import from_dict
from dataclass_wizard import JSONWizard
from timeit import timeit
@dataclass(frozen=True)
class TimeSeries:
    name: str
    unit: str
    data: Optional[List[float]]
@dataclass(frozen=True)
class Parameters:
    timeseriesA: TimeSeries
@dataclass(frozen=True)
class Data:
    parameters: Parameters
    @classmethod
    def fromDict(cls, data: Dict) -> 'Data':
        return from_dict(cls, data)
    def toDict(self) -> Dict:
        return asdict(self)
@dataclass(frozen=True)
class ParametersWizard:
    # renamed because default key transform is `camelCase` -> `snake_case`
    timeseries_a: TimeSeries
@dataclass(frozen=True)
class DataWizard(JSONWizard):
    # enable debug mode in case of incorrect types etc.
    class _(JSONWizard.Meta):
        debug_enabled = True
    parameters: ParametersWizard
data: Dict = {
    'parameters': {
        'timeseriesA': {
            'name': 'nameA',
            'unit': 'USD',
            'data': [10, 20, 30, 40]
        },
        'timeseriesB': {
            'name': 'nameB',
            'unit': 'EUR',
            'data': [60, 30, 40, 50]
        }
    }
}
def main():
    n = 10_000
    print(f"From Dict:        {timeit('Data.fromDict(data)', globals=globals(), number=n):.3f}")
    print(f"From Dict (Wiz):  {timeit('DataWizard.from_dict(data)', globals=globals(), number=n):.3f}")
    data_1: Data = Data.fromDict(data)
    data_wiz: Data = DataWizard.from_dict(data)
    g = globals().copy()
    g.update(locals())
    print(f"To Dict:        {timeit('data_1.toDict()', globals=g, number=n):.3f}")
    print(f"To Dict (Wiz):  {timeit('data_wiz.to_dict()', globals=g, number=n):.3f}")
if __name__ == '__main__':
    main()
Results, on my PC (Windows):
From Dict: 1.663 From Dict (Wiz): 0.059 To Dict: 0.105 To Dict (Wiz): 0.057
