Data class members
Data class members hold the specific pieces of information that define an individual instance of that class.
Let’s say we have a data class called “Car”:
1class Car:
2 def __init__(self, color: str, model: str, engine_size: int = 0):
3 self.color = color
4 self.model = model
5 self.engine_size = engine_size
6
7 # prints out car object
8 def __repr__(self):
9 return f"Car {self.model=} {self.color=} {self.engine_size=}"
10
11 # compares object
12 def __eq__(self, other):
13 if other.__class__ is self.__class__:
14 return (self.color, self.model, self.engine_size) == (other.color, other.model, other.engine_size)
15 else:
16 return NotImplemented
17
with dataclass decorator, we could simplify it as follows:
1from dataclasses import dataclass
2
3@dataclass
4class Car:
5 color: str
6 model: str
7 engine_size: int = 0 # assigns a default value
The dataclass decorator automatically generates these three methods: __init__
, __repr__
, and __eq__
.
1# this method will be added automatically to the class
2def __init__(self, color: str, model: float, engine_size: int = 0):
3 self.color = color
4 self.model = model
5 self.engine_size = engine_size
Here, color
, model
, and engine_size
are the data class members.
They act as blueprints for the information we want to store about each car.
Now, let’s create two instances of our Car
class:
1from dataclasses import dataclass
2
3@dataclass
4class Car:
5 color: str
6 model: str
7 engine_size: int = 0 # assigns a default value
8
9my_car = Car(color="Blue", model="Toyota Tacoma", engine_size=2500)
10your_car = Car(color="Red", model="Honda Civic", engine_size=1800)
11print(your_car)
12# Car(color='Blue', model='Toyota Tacoma', engine_size=2500)
13print(my_car)
14# Car(color='Red', model='Honda Civic', engine_size=1800)
In these instances:
my_car.color
would be “Blue”,my_car.model
would be “Toyota Tacoma”, andmy_car.engine_size
would be 2500.- Similarly,
your_car
has its own set of values for color, model, and engine size.
To take a look at what dataclasses decorator has, actually, implemented for us, we could use inspect
1from dataclasses import dataclass
2
3@dataclass
4class Car:
5 color: str
6 model: str
7 engine_size: int = 0 # assigns a default value
8
9import inspect
10
11print(inspect.getmembers(Car, inspect.isfunction))
12
13#Output: [('__eq__', <...>), ('__init__', <...>), ('__repr__', <...>)]
Three functions were created for us using the dataclass decorator
Data Classes Args
@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, match_args=True, kw_only=False, slots=False, weakref_slot=False)
arg | description |
---|---|
init |
Automatically creates a “constructor”, __init__ that takes the initial values of your class’s fields. |
repr |
Generates a nice string representation of your class objects. |
eq |
Creates a way to compare if two objects of your class are equal to each other based on their fields. |
order |
If you want to be able to sort your class objects in alphabetical order. |
unsafe_hash |
Makes your class hashable, which is needed if you want to use them as keys in dictionaries. |
frozen |
Makes your class objects immutable – wont be able to reassign values. |
match_args |
This parameter simplifies pattern matching with dataclasses |
kw_only |
The generated __init__ method will only accept keyword arguments. |
slots |
Optimizes memory usage by using slots instead of dictionaries to store object attributes |
weakref_slot |
Weak references allow you to refer to an object without preventing it from being garbage collected. |
The match_args=True
arg makes the code is more readable and easier to maintain.
1from dataclasses import dataclass
2
3
4@dataclass(match_args=True)
5class Car:
6 make: str
7 model: str
8 engine: str
9
10
11def get_car_details(car: Car):
12 match car:
13 case Car(make="Tesla", model="Model 3"):
14 return "EV Car"
15 case Car(make="Ford", model="Mustang", engine="V8"):
16 return "American muscle car"
17 case Car(make="Toyota", model="Tacoma"):
18 return "Reliable and Fun truck to drive"
19 case _:
20 return f"{car.make=} {car.engine=} {car.model=}. Not on the system."
21
22
23my_car = Car("Toyota", "Tacoma", "Internal Engine Combustion")
24unknown_car = Car("idk", "X", "unknown")
25
26print(get_car_details(my_car))
27# Reliable and Fun truck to drive
28print(get_car_details(unknown_car))
29# car.make='idk' car.engine='unknown' car.model='X' Not on the system
Dataclasses Default Values
Python evaluates these default values when it interprets the script,
so the class Car will always have the same reference of the list []
which is problematic.
1from dataclasses import dataclass
2
3@dataclass
4class Car:
5 color: str
6 model: str
7 engine_parts: list[str] = [] # this won't work
In line: 7 we get an error if we try to set an empty list as the default value; in order to prevent this,
dataclasses provides a factory function, field
, that we can use instead
1from dataclasses import dataclass, field
2
3@dataclass
4class Car:
5 color: str
6 model: str
7 engine_parts: list[str] = field(default_factory=list)
Another Example of the field
function:
1import string
2import random
3from dataclasses import dataclass, field
4
5
6def generate_vim_number() -> str:
7 return f"".join(random.choices(string.ascii_uppercase, k=10))
8
9def generate_engine_parts()-> list:
10 engine_parts = ["Pistons", "Crankshaft", "Camshaft", "Cylinder Head", "Timing Belt/Chain"]
11 return random.choices(engine_parts, k=3)
12
13@dataclass
14class Car:
15 color: str
16 model: str
17 engine_parts: list[str] = field(default_factory=generate_engine_parts)
18 vim_number: str = field(default_factory=generate_vim_number)
19
20my_car = Car("Blue", "Tacoma")
21print(my_car)
22# Car(color='Blue', model='Tacoma', engine_parts=['Cylinder Head', 'Crankshaft', 'Camshaft'], vim_number='WQALLIAUCV')
23my_second_car = Car("Red", "Volvo")
24print(my_second_car)
25# Car(color='Red', model='Volvo', engine_parts=['Camshaft', 'Pistons', 'Crankshaft'], vim_number='HWAUNFSSGH')
Manually Setting Default Values To The Fields
If we want to set the default values, to the class Car
, we can do the following:
1import string
2import random
3from dataclasses import dataclass, field
4
5
6def generate_vim_number() -> str:
7 return f"".join(random.choices(string.ascii_uppercase, k=10))
8
9def generate_engine_parts()-> list:
10 engine_parts = ["Pistons", "Crankshaft", "Camshaft", "Cylinder Head", "Timing Belt/Chain"]
11 return random.choices(engine_parts, k=3)
12
13@dataclass
14class Car:
15 color: str
16 model: str
17 engine_parts: list[str] = field(default_factory=generate_engine_parts)
18 vim_number: str = field(default_factory=generate_vim_number)
19
20my_car = Car(color="Blue", model="Tacoma", engine_parts=[], vim_number="VIM NOT AVAILABLE")
21print(my_car)
22# Car(color='Blue', model='Tacoma', engine_parts=[], vim_number='VIM NOT AVAILABLE')
How To Prevent Setting Default Values In The Fields
We can also prevent users from setting values to the class initializer args see Line: 18
1import string
2import random
3from dataclasses import dataclass, field
4
5
6def generate_vim_number() -> str:
7 return f"".join(random.choices(string.ascii_uppercase, k=10))
8
9def generate_engine_parts()-> list:
10 engine_parts = ["Pistons", "Crankshaft", "Camshaft", "Cylinder Head", "Timing Belt/Chain"]
11 return random.choices(engine_parts, k=3)
12
13@dataclass
14class Car:
15 color: str
16 model: str
17 engine_parts: list[str] = field(default_factory=generate_engine_parts)
18 vim_number: str = field(init=False, default_factory=generate_vim_number)
19
20my_car = Car(color="Blue", model="Tacoma", engine_parts=["Pistons", "Crankshaft", "Camshaft"], vim_number="KEKVYNJMGJ")
21print(my_car)
22
23# my_car = Car(color="Blue", model="Tacoma", engine_parts=["Pistons", "Crankshaft", "Camshaft"], vim_number="KEKVYNJMGJ")
24# TypeError: __init__() got an unexpected keyword argument 'vim_number'
Do not display the field in the string generated by repr
By setting the arg to repr
, to False this field will not be included in the string automatically generated by __repr__
.
1import string
2import random
3from dataclasses import dataclass, field
4
5
6def generate_vim_number() -> str:
7 return f"".join(random.choices(string.ascii_uppercase, k=10))
8
9def generate_engine_parts()-> list:
10 engine_parts = ["Pistons", "Crankshaft", "Camshaft", "Cylinder Head", "Timing Belt/Chain"]
11 return random.choices(engine_parts, k=3)
12
13@dataclass
14class Car:
15 color: str
16 model: str
17 engine_parts: list[str] = field(default_factory=generate_engine_parts)
18 vim_number: str = field(init=False, default_factory=generate_vim_number)
19 _manager_notes: str = field(default_factory=str, repr=False)
20
21my_car = Car(color="Blue", model="Tacoma", _manager_notes="Car Had An Accident")
22print(my_car)
23
24# Car(color='Blue', model='Tacoma', engine_parts=['Crankshaft', 'Crankshaft', 'Pistons'], vim_number='SIIIVRRCID')
25# it doesnt display the field `_manager_notes`