02-5. 序列化與反序列化

⏱️ 閱讀時間: 12 分鐘 🎯 難度: ⭐⭐ (基礎)


🤔 一句話解釋

序列化是把 Python 物件轉成 JSON,反序列化是把 JSON 轉回 Python 物件。

Python 物件  ──序列化──▶  JSON/Dict  ──反序列化──▶  Python 物件
  (User)      model_dump()  {"name":...}  model_validate()  (User)

📤 序列化(Python → JSON)

model_dump() - 轉換為字典

from pydantic import BaseModel
from datetime import datetime

class User(BaseModel):
    name: str
    email: str
    age: int
    created_at: datetime

user = User(
    name="John",
    email="john@example.com",
    age=25,
    created_at=datetime(2025, 1, 1, 12, 0, 0)
)

# 基本轉換
print(user.model_dump())
# {
#     'name': 'John',
#     'email': 'john@example.com',
#     'age': 25,
#     'created_at': datetime(2025, 1, 1, 12, 0, 0)
# }

model_dump_json() - 轉換為 JSON 字串

# JSON 字串
print(user.model_dump_json())
# '{"name":"John","email":"john@example.com","age":25,"created_at":"2025-01-01T12:00:00"}'

# 格式化輸出
print(user.model_dump_json(indent=2))
# {
#   "name": "John",
#   "email": "john@example.com",
#   "age": 25,
#   "created_at": "2025-01-01T12:00:00"
# }

控制輸出內容

from pydantic import BaseModel
from typing import Optional

class User(BaseModel):
    name: str
    email: str
    age: int = 0
    bio: Optional[str] = None
    is_active: bool = True

user = User(name="John", email="john@example.com")

# 排除特定欄位
user.model_dump(exclude={"email"})
# {'name': 'John', 'age': 0, 'bio': None, 'is_active': True}

# 只包含特定欄位
user.model_dump(include={"name", "email"})
# {'name': 'John', 'email': 'john@example.com'}

# 排除預設值
user.model_dump(exclude_defaults=True)
# {'name': 'John', 'email': 'john@example.com'}

# 排除未設定的值(未明確傳入的)
user.model_dump(exclude_unset=True)
# {'name': 'John', 'email': 'john@example.com'}

# 排除 None
user.model_dump(exclude_none=True)
# {'name': 'John', 'email': 'john@example.com', 'age': 0, 'is_active': True}

巢狀排除

from pydantic import BaseModel

class Address(BaseModel):
    city: str
    street: str
    zip_code: str

class User(BaseModel):
    name: str
    email: str
    address: Address

user = User(
    name="John",
    email="john@example.com",
    address=Address(city="Taipei", street="Main St", zip_code="100")
)

# 排除巢狀欄位
user.model_dump(exclude={"address": {"zip_code"}})
# {
#     'name': 'John',
#     'email': 'john@example.com',
#     'address': {'city': 'Taipei', 'street': 'Main St'}
# }

# 排除整個巢狀模型
user.model_dump(exclude={"address": True})
# {'name': 'John', 'email': 'john@example.com'}

📥 反序列化(JSON → Python)

model_validate() - 從字典建立

from pydantic import BaseModel

class User(BaseModel):
    name: str
    email: str
    age: int

# 從字典建立
data = {"name": "John", "email": "john@example.com", "age": 25}
user = User.model_validate(data)

# 等同於
user = User(**data)

model_validate_json() - 從 JSON 字串建立

from pydantic import BaseModel

class User(BaseModel):
    name: str
    email: str
    age: int

# 從 JSON 字串建立
json_str = '{"name": "John", "email": "john@example.com", "age": 25}'
user = User.model_validate_json(json_str)

嚴格模式

from pydantic import BaseModel

class User(BaseModel):
    name: str
    age: int

# 一般模式:會自動轉換型別
data = {"name": "John", "age": "25"}  # age 是字串
user = User.model_validate(data)      # ✅ age 會轉成 int

# 嚴格模式:不自動轉換
user = User.model_validate(data, strict=True)  # ❌ ValidationError

🔧 自訂序列化

使用 field_serializer

from pydantic import BaseModel, field_serializer
from datetime import datetime

class User(BaseModel):
    name: str
    created_at: datetime
    balance: float

    @field_serializer('created_at')
    def serialize_datetime(self, value: datetime) -> str:
        return value.strftime("%Y/%m/%d %H:%M")

    @field_serializer('balance')
    def serialize_balance(self, value: float) -> str:
        return f"${value:,.2f}"

user = User(
    name="John",
    created_at=datetime(2025, 1, 1, 12, 30),
    balance=1234.5
)

print(user.model_dump())
# {
#     'name': 'John',
#     'created_at': '2025/01/01 12:30',
#     'balance': '$1,234.50'
# }

條件序列化

from pydantic import BaseModel, field_serializer, SerializationInfo

class User(BaseModel):
    name: str
    email: str
    phone: str

    @field_serializer('email')
    def mask_email(self, value: str, info: SerializationInfo) -> str:
        # 根據 context 決定是否遮罩
        if info.context and info.context.get('mask_sensitive'):
            parts = value.split('@')
            if len(parts) == 2:
                return f"{parts[0][:2]}***@{parts[1]}"
        return value

    @field_serializer('phone')
    def mask_phone(self, value: str, info: SerializationInfo) -> str:
        if info.context and info.context.get('mask_sensitive'):
            return f"{value[:4]}****{value[-2:]}"
        return value

user = User(name="John", email="john@example.com", phone="0912345678")

# 一般輸出
print(user.model_dump())
# {'name': 'John', 'email': 'john@example.com', 'phone': '0912345678'}

# 遮罩敏感資料
print(user.model_dump(context={'mask_sensitive': True}))
# {'name': 'John', 'email': 'jo***@example.com', 'phone': '0912****78'}

計算欄位(computed fields)

from pydantic import BaseModel, computed_field
from typing import List

class OrderItem(BaseModel):
    name: str
    price: float
    quantity: int

    @computed_field
    @property
    def subtotal(self) -> float:
        return self.price * self.quantity

class Order(BaseModel):
    items: List[OrderItem]
    shipping_fee: float = 0

    @computed_field
    @property
    def total(self) -> float:
        items_total = sum(item.subtotal for item in self.items)
        return items_total + self.shipping_fee

order = Order(
    items=[
        OrderItem(name="iPhone", price=999, quantity=1),
        OrderItem(name="Case", price=29, quantity=2),
    ],
    shipping_fee=10
)

print(order.model_dump())
# {
#     'items': [
#         {'name': 'iPhone', 'price': 999.0, 'quantity': 1, 'subtotal': 999.0},
#         {'name': 'Case', 'price': 29.0, 'quantity': 2, 'subtotal': 58.0}
#     ],
#     'shipping_fee': 10.0,
#     'total': 1067.0
# }

🏷️ 欄位別名

使用 alias(序列化和反序列化都用)

from pydantic import BaseModel, Field

class User(BaseModel):
    user_name: str = Field(alias="userName")
    email_address: str = Field(alias="emailAddress")

# 反序列化時使用別名
data = {"userName": "John", "emailAddress": "john@example.com"}
user = User.model_validate(data)

# 序列化時也使用別名
print(user.model_dump(by_alias=True))
# {'userName': 'John', 'emailAddress': 'john@example.com'}

# 不使用別名
print(user.model_dump())
# {'user_name': 'John', 'email_address': 'john@example.com'}

使用 serialization_alias

from pydantic import BaseModel, Field

class User(BaseModel):
    name: str = Field(serialization_alias="userName")
    email: str = Field(serialization_alias="userEmail")

user = User(name="John", email="john@example.com")

# by_alias=True 時使用 serialization_alias
print(user.model_dump(by_alias=True))
# {'userName': 'John', 'userEmail': 'john@example.com'}

使用 validation_alias

from pydantic import BaseModel, Field, AliasChoices

class User(BaseModel):
    # 接受多種輸入名稱
    name: str = Field(validation_alias=AliasChoices("name", "userName", "user_name"))

# 以下都可以
User.model_validate({"name": "John"})
User.model_validate({"userName": "John"})
User.model_validate({"user_name": "John"})

📝 實戰範例:API 回應格式化

from pydantic import BaseModel, Field, computed_field, field_serializer
from typing import List, Optional
from datetime import datetime
from decimal import Decimal
from enum import Enum

class OrderStatus(str, Enum):
    PENDING = "pending"
    PAID = "paid"
    SHIPPED = "shipped"
    DELIVERED = "delivered"

class ProductResponse(BaseModel):
    id: int
    name: str
    price: Decimal
    image_url: Optional[str] = None

    @field_serializer('price')
    def serialize_price(self, value: Decimal) -> str:
        return f"NT${value:,.0f}"

class OrderItemResponse(BaseModel):
    product: ProductResponse
    quantity: int
    unit_price: Decimal

    @computed_field
    @property
    def subtotal(self) -> Decimal:
        return self.unit_price * self.quantity

    @field_serializer('unit_price', 'subtotal')
    def serialize_money(self, value: Decimal) -> str:
        return f"NT${value:,.0f}"

class OrderResponse(BaseModel):
    id: int
    order_number: str = Field(serialization_alias="orderNumber")
    status: OrderStatus
    items: List[OrderItemResponse]
    shipping_fee: Decimal = Field(serialization_alias="shippingFee")
    discount: Decimal = Decimal("0")
    created_at: datetime = Field(serialization_alias="createdAt")
    paid_at: Optional[datetime] = Field(None, serialization_alias="paidAt")

    @computed_field
    @property
    def subtotal(self) -> Decimal:
        return sum(
            item.unit_price * item.quantity
            for item in self.items
        )

    @computed_field
    @property
    def total(self) -> Decimal:
        return self.subtotal + self.shipping_fee - self.discount

    @field_serializer('shipping_fee', 'discount', 'subtotal', 'total')
    def serialize_money(self, value: Decimal) -> str:
        return f"NT${value:,.0f}"

    @field_serializer('created_at', 'paid_at')
    def serialize_datetime(self, value: Optional[datetime]) -> Optional[str]:
        if value is None:
            return None
        return value.strftime("%Y-%m-%d %H:%M:%S")

    @field_serializer('status')
    def serialize_status(self, value: OrderStatus) -> dict:
        status_display = {
            OrderStatus.PENDING: "待付款",
            OrderStatus.PAID: "已付款",
            OrderStatus.SHIPPED: "已出貨",
            OrderStatus.DELIVERED: "已送達",
        }
        return {
            "code": value.value,
            "display": status_display.get(value, value.value)
        }

# 使用範例
order = OrderResponse(
    id=1,
    order_number="ORD-20251217-001",
    status=OrderStatus.PAID,
    items=[
        OrderItemResponse(
            product=ProductResponse(id=1, name="iPhone 15", price=Decimal("35900")),
            quantity=1,
            unit_price=Decimal("35900")
        )
    ],
    shipping_fee=Decimal("100"),
    discount=Decimal("500"),
    created_at=datetime(2025, 12, 17, 10, 30, 0),
    paid_at=datetime(2025, 12, 17, 10, 35, 0)
)

import json
print(json.dumps(order.model_dump(by_alias=True), indent=2, ensure_ascii=False))

輸出:

{
  "id": 1,
  "orderNumber": "ORD-20251217-001",
  "status": {
    "code": "paid",
    "display": "已付款"
  },
  "items": [
    {
      "product": {
        "id": 1,
        "name": "iPhone 15",
        "price": "NT$35,900",
        "image_url": null
      },
      "quantity": 1,
      "unit_price": "NT$35,900",
      "subtotal": "NT$35,900"
    }
  ],
  "shippingFee": "NT$100",
  "discount": "NT$0",
  "createdAt": "2025-12-17 10:30:00",
  "paidAt": "2025-12-17 10:35:00",
  "subtotal": "NT$35,900",
  "total": "NT$35,500"
}

✅ 重點總結

序列化方法

方法輸出用途
model_dump()dict轉成字典
model_dump_json()str轉成 JSON 字串
model_dump(by_alias=True)dict使用別名
model_dump(exclude_unset=True)dict排除未設定的欄位

反序列化方法

方法輸入用途
Model(**data)dict從字典建立
Model.model_validate(data)dict從字典建立(推薦)
Model.model_validate_json(s)str從 JSON 建立

自訂序列化

# 欄位序列化器
@field_serializer('price')
def serialize_price(self, value):
    return f"${value:.2f}"

# 計算欄位
@computed_field
@property
def total(self) -> float:
    return self.price * self.quantity

🎤 面試這樣答

Q: model_dump(exclude_unset=True) 和 exclude_defaults=True 有什麼區別?

答案:

  • exclude_unset=True:排除沒有明確傳入的欄位,即使有預設值
  • exclude_defaults=True:排除值等於預設值的欄位
class User(BaseModel):
    name: str
    age: int = 0

user = User(name="John", age=0)

user.model_dump(exclude_unset=True)
# {'name': 'John', 'age': 0}  # age 有明確傳入

user.model_dump(exclude_defaults=True)
# {'name': 'John'}  # age 值等於預設值,被排除

上一篇: 02-4. 巢狀模型與繼承 下一篇: 02-6. Settings 管理與環境變數


最後更新:2025-12-17

0%