Data class members

Data class members hold the specific pieces of information that define an individual instance of that class.

Let’s say we have a data class called “Car”:

 1class Car:
 2    def __init__(self, color: str, model: str, engine_size: int = 0):
 3        self.color = color
 4        self.model = model
 5        self.engine_size = engine_size
 6
 7    # prints out car object
 8    def __repr__(self):
 9        return f"Car {self.model=} {self.color=} {self.engine_size=}"
10
11    # compares object
12    def __eq__(self, other):
13        if other.__class__ is self.__class__:
14            return (self.color, self.model, self.engine_size) == (other.color, other.model, other.engine_size)
15        else:
16            return NotImplemented
17    

with dataclass decorator, we could simplify it as follows:

1from dataclasses import dataclass
2
3@dataclass
4class Car:
5  color: str
6  model: str
7  engine_size: int = 0 # assigns a default value 

The dataclass decorator automatically generates these three methods: __init__, __repr__, and __eq__.

1# this method will be added automatically to the class 
2def __init__(self, color: str, model: float, engine_size: int = 0):
3    self.color = color
4    self.model = model
5    self.engine_size = engine_size

Here, color, model, and engine_size are the data class members. They act as blueprints for the information we want to store about each car.

Now, let’s create two instances of our Car class:

 1from dataclasses import dataclass
 2
 3@dataclass
 4class Car:
 5    color: str
 6    model: str
 7    engine_size: int = 0 # assigns a default value 
 8    
 9my_car = Car(color="Blue", model="Toyota Tacoma", engine_size=2500) 
10your_car = Car(color="Red", model="Honda Civic", engine_size=1800)
11print(your_car)
12# Car(color='Blue', model='Toyota Tacoma', engine_size=2500)
13print(my_car)
14# Car(color='Red', model='Honda Civic', engine_size=1800)

In these instances:

  • my_car.color would be “Blue”, my_car.model would be “Toyota Tacoma”, and my_car.engine_size would be 2500.
  • Similarly, your_car has its own set of values for color, model, and engine size.

To take a look at what dataclasses decorator has, actually, implemented for us, we could use inspect

 1from dataclasses import dataclass
 2
 3@dataclass
 4class Car:
 5    color: str
 6    model: str
 7    engine_size: int = 0 # assigns a default value 
 8
 9import inspect
10
11print(inspect.getmembers(Car, inspect.isfunction))
12
13#Output: [('__eq__', <...>), ('__init__', <...>), ('__repr__', <...>)]

Three functions were created for us using the dataclass decorator

Data Classes Args

@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, match_args=True, kw_only=False, slots=False, weakref_slot=False)

arg description
init Automatically creates a “constructor”, __init__ that takes the initial values of your class’s fields.
repr Generates a nice string representation of your class objects.
eq Creates a way to compare if two objects of your class are equal to each other based on their fields.
order If you want to be able to sort your class objects in alphabetical order.
unsafe_hash Makes your class hashable, which is needed if you want to use them as keys in dictionaries.
frozen Makes your class objects immutable – wont be able to reassign values.
match_args This parameter simplifies pattern matching with dataclasses
kw_only The generated __init__ method will only accept keyword arguments.
slots Optimizes memory usage by using slots instead of dictionaries to store object attributes
weakref_slot Weak references allow you to refer to an object without preventing it from being garbage collected.

The match_args=True arg makes the code is more readable and easier to maintain.

 1from dataclasses import dataclass
 2
 3
 4@dataclass(match_args=True)
 5class Car:
 6    make: str
 7    model: str
 8    engine: str
 9
10
11def get_car_details(car: Car):
12    match car:
13        case Car(make="Tesla", model="Model 3"):
14            return "EV Car"
15        case Car(make="Ford", model="Mustang", engine="V8"):
16            return "American muscle car"
17        case Car(make="Toyota", model="Tacoma"):
18            return "Reliable and Fun truck to drive"
19        case _:
20            return f"{car.make=} {car.engine=} {car.model=}. Not on the system."
21
22
23my_car = Car("Toyota", "Tacoma", "Internal Engine Combustion")
24unknown_car = Car("idk", "X", "unknown")
25
26print(get_car_details(my_car))  
27# Reliable and Fun truck to drive
28print(get_car_details(unknown_car))  
29# car.make='idk' car.engine='unknown' car.model='X' Not on the system

Dataclasses Default Values

Python evaluates these default values when it interprets the script, so the class Car will always have the same reference of the list [] which is problematic.

1from dataclasses import dataclass
2
3@dataclass
4class Car:
5    color: str
6    model: str
7    engine_parts: list[str] = [] # this won't work

In line: 7 we get an error if we try to set an empty list as the default value; in order to prevent this, dataclasses provides a factory function, field, that we can use instead

1from dataclasses import dataclass, field
2
3@dataclass
4class Car:
5    color: str
6    model: str
7    engine_parts: list[str] = field(default_factory=list)

Another Example of the field function:

 1import string
 2import random
 3from dataclasses import dataclass, field
 4
 5
 6def generate_vim_number() -> str:
 7    return f"".join(random.choices(string.ascii_uppercase, k=10))
 8
 9def generate_engine_parts()-> list:
10    engine_parts = ["Pistons", "Crankshaft", "Camshaft", "Cylinder Head", "Timing Belt/Chain"]
11    return random.choices(engine_parts, k=3)
12
13@dataclass
14class Car:
15    color: str
16    model: str
17    engine_parts: list[str] = field(default_factory=generate_engine_parts)
18    vim_number: str = field(default_factory=generate_vim_number)
19
20my_car = Car("Blue", "Tacoma")
21print(my_car)
22# Car(color='Blue', model='Tacoma', engine_parts=['Cylinder Head', 'Crankshaft', 'Camshaft'], vim_number='WQALLIAUCV')
23my_second_car = Car("Red", "Volvo")
24print(my_second_car)
25# Car(color='Red', model='Volvo', engine_parts=['Camshaft', 'Pistons', 'Crankshaft'], vim_number='HWAUNFSSGH')

Manually Setting Default Values To The Fields

If we want to set the default values, to the class Car, we can do the following:

 1import string
 2import random
 3from dataclasses import dataclass, field
 4
 5
 6def generate_vim_number() -> str:
 7    return f"".join(random.choices(string.ascii_uppercase, k=10))
 8
 9def generate_engine_parts()-> list:
10    engine_parts = ["Pistons", "Crankshaft", "Camshaft", "Cylinder Head", "Timing Belt/Chain"]
11    return random.choices(engine_parts, k=3)
12
13@dataclass
14class Car:
15    color: str
16    model: str
17    engine_parts: list[str] = field(default_factory=generate_engine_parts)
18    vim_number: str = field(default_factory=generate_vim_number)
19
20my_car = Car(color="Blue", model="Tacoma", engine_parts=[], vim_number="VIM NOT AVAILABLE")
21print(my_car)
22# Car(color='Blue', model='Tacoma', engine_parts=[], vim_number='VIM NOT AVAILABLE')

How To Prevent Setting Default Values In The Fields

We can also prevent users from setting values to the class initializer args see Line: 18

 1import string
 2import random
 3from dataclasses import dataclass, field
 4
 5
 6def generate_vim_number() -> str:
 7    return f"".join(random.choices(string.ascii_uppercase, k=10))
 8
 9def generate_engine_parts()-> list:
10    engine_parts = ["Pistons", "Crankshaft", "Camshaft", "Cylinder Head", "Timing Belt/Chain"]
11    return random.choices(engine_parts, k=3)
12
13@dataclass
14class Car:
15    color: str
16    model: str
17    engine_parts: list[str] = field(default_factory=generate_engine_parts)
18    vim_number: str = field(init=False, default_factory=generate_vim_number)
19
20my_car = Car(color="Blue", model="Tacoma", engine_parts=["Pistons", "Crankshaft", "Camshaft"], vim_number="KEKVYNJMGJ")
21print(my_car)
22
23# my_car = Car(color="Blue", model="Tacoma", engine_parts=["Pistons", "Crankshaft", "Camshaft"], vim_number="KEKVYNJMGJ")
24# TypeError: __init__() got an unexpected keyword argument 'vim_number'

Do not display the field in the string generated by repr

By setting the arg to repr, to False this field will not be included in the string automatically generated by __repr__.

 1import string
 2import random
 3from dataclasses import dataclass, field
 4
 5
 6def generate_vim_number() -> str:
 7    return f"".join(random.choices(string.ascii_uppercase, k=10))
 8
 9def generate_engine_parts()-> list:
10    engine_parts = ["Pistons", "Crankshaft", "Camshaft", "Cylinder Head", "Timing Belt/Chain"]
11    return random.choices(engine_parts, k=3)
12
13@dataclass
14class Car:
15    color: str
16    model: str
17    engine_parts: list[str] = field(default_factory=generate_engine_parts)
18    vim_number: str = field(init=False, default_factory=generate_vim_number)
19    _manager_notes: str = field(default_factory=str, repr=False)
20
21my_car = Car(color="Blue", model="Tacoma", _manager_notes="Car Had An Accident")
22print(my_car)
23
24# Car(color='Blue', model='Tacoma', engine_parts=['Crankshaft', 'Crankshaft', 'Pistons'], vim_number='SIIIVRRCID')
25# it doesnt display the field `_manager_notes`

Read: Even if your script is simple, include if __name__