Django 面試準備 10-2:select_related vs Prefetch_related

深入理解 Django ORM 兩大查詢優化利器

10-2. select_related vs prefetch_related

📌 核心區別一覽

特性select_relatedprefetch_related
適用關係ForeignKey, OneToOneFieldManyToManyField, 反向 ForeignKey
SQL 策略JOIN(單次查詢)分別查詢後 Python 組合
查詢次數1 次通常 2 次
適合數據少量關聯大量關聯
SQL 複雜度中等簡單
內存使用較低較高

工作原理

核心: 使用 SQL JOIN 在一次查詢中獲取關聯對象

# models.py
class Author(models.Model):
    name = models.CharField(max_length=100)

class Post(models.Model):
    title = models.CharField(max_length=200)
    author = models.ForeignKey(Author, on_delete=models.CASCADE)

基本用法

# ❌ 沒有優化:N+1 查詢
posts = Post.objects.all()
for post in posts:
    print(post.author.name)  # 每次都查詢數據庫

# SQL:
# SELECT * FROM post;
# SELECT * FROM author WHERE id = 1;
# SELECT * FROM author WHERE id = 2;
# ... (N 次)

# ✅ 使用 select_related
posts = Post.objects.select_related('author').all()
for post in posts:
    print(post.author.name)  # 不再查詢!

# SQL:
# SELECT post.*, author.*
# FROM post
# INNER JOIN author ON (post.author_id = author.id);

結果: 100 條數據,從 101 次查詢 → 1 次查詢


多層關聯

# models.py
class Country(models.Model):
    name = models.CharField(max_length=50)

class City(models.Model):
    name = models.CharField(max_length=50)
    country = models.ForeignKey(Country, on_delete=models.CASCADE)

class Author(models.Model):
    name = models.CharField(max_length=100)
    city = models.ForeignKey(City, on_delete=models.CASCADE)

class Post(models.Model):
    title = models.CharField(max_length=200)
    author = models.ForeignKey(Author, on_delete=models.CASCADE)

# ✅ 使用雙底線跨多層關聯
posts = Post.objects.select_related(
    'author',                  # 第 1 層
    'author__city',           # 第 2 層
    'author__city__country'   # 第 3 層
).all()

for post in posts:
    print(f"{post.title} by {post.author.name} from {post.author.city.country.name}")

生成的 SQL:

SELECT
    post.*,
    author.*,
    city.*,
    country.*
FROM post
INNER JOIN author ON (post.author_id = author.id)
INNER JOIN city ON (author.city_id = city.id)
INNER JOIN country ON (city.country_id = country.id);

OneToOneField 示例

# models.py
class User(models.Model):
    username = models.CharField(max_length=50)

class Profile(models.Model):
    user = models.OneToOneField(User, on_delete=models.CASCADE)
    bio = models.TextField()
    avatar = models.ImageField()

# ✅ 使用 select_related
users = User.objects.select_related('profile').all()

for user in users:
    print(f"{user.username}: {user.profile.bio}")

# ❌ 不能用於 ManyToManyField
class Post(models.Model):
    title = models.CharField(max_length=200)
    tags = models.ManyToManyField('Tag')

# 這樣會報錯!
posts = Post.objects.select_related('tags').all()
# Error: Field 'tags' is a many-to-many field

# ✅ 應該使用 prefetch_related
posts = Post.objects.prefetch_related('tags').all()

工作原理

核心: 執行兩次查詢,然後在 Python 中組合數據

# models.py
class Post(models.Model):
    title = models.CharField(max_length=200)
    author = models.ForeignKey(Author, on_delete=models.CASCADE)

class Comment(models.Model):
    post = models.ForeignKey(Post, on_delete=models.CASCADE, related_name='comments')
    content = models.TextField()

基本用法

# ❌ 沒有優化:N+1 查詢
posts = Post.objects.all()
for post in posts:
    for comment in post.comments.all():  # 每次都查詢!
        print(comment.content)

# SQL:
# SELECT * FROM post;
# SELECT * FROM comment WHERE post_id = 1;
# SELECT * FROM comment WHERE post_id = 2;
# ... (N 次)

# ✅ 使用 prefetch_related
posts = Post.objects.prefetch_related('comments').all()
for post in posts:
    for comment in post.comments.all():  # 不再查詢!
        print(comment.content)

# SQL:
# SELECT * FROM post;
# SELECT * FROM comment WHERE post_id IN (1, 2, 3, ..., 100);

結果: 100 篇文章,從 101 次查詢 → 2 次查詢


Python 中的組合過程

# prefetch_related 的內部邏輯(簡化版)

# 第 1 步:查詢主對象
posts = list(Post.objects.all())  # SELECT * FROM post

# 第 2 步:收集所有 post_id
post_ids = [post.id for post in posts]  # [1, 2, 3, ..., 100]

# 第 3 步:批量查詢評論
comments = Comment.objects.filter(post_id__in=post_ids)
# SELECT * FROM comment WHERE post_id IN (1, 2, 3, ..., 100)

# 第 4 步:在 Python 中組合
comments_by_post = defaultdict(list)
for comment in comments:
    comments_by_post[comment.post_id].append(comment)

# 第 5 步:賦值給每個 post
for post in posts:
    post._prefetched_objects_cache['comments'] = comments_by_post[post.id]

ManyToManyField 示例

# models.py
class Tag(models.Model):
    name = models.CharField(max_length=50)

class Post(models.Model):
    title = models.CharField(max_length=200)
    tags = models.ManyToManyField(Tag, related_name='posts')

# ✅ 使用 prefetch_related
posts = Post.objects.prefetch_related('tags').all()

for post in posts:
    tag_names = ', '.join([tag.name for tag in post.tags.all()])
    print(f"{post.title} - Tags: {tag_names}")

# SQL:
# SELECT * FROM post;
# SELECT * FROM tag
# INNER JOIN post_tags ON (tag.id = post_tags.tag_id)
# WHERE post_tags.post_id IN (1, 2, 3, ..., 100);

反向 ForeignKey 示例

# models.py
class Author(models.Model):
    name = models.CharField(max_length=100)

class Post(models.Model):
    title = models.CharField(max_length=200)
    author = models.ForeignKey(Author, on_delete=models.CASCADE, related_name='posts')

# ✅ 查詢作者及其所有文章
authors = Author.objects.prefetch_related('posts').all()

for author in authors:
    print(f"{author.name} ({author.posts.count()} posts):")
    for post in author.posts.all():
        print(f"  - {post.title}")

# SQL:
# SELECT * FROM author;
# SELECT * FROM post WHERE author_id IN (1, 2, 3, ...);

🔥 進階:Prefetch 對象

自定義預取查詢

from django.db.models import Prefetch

# 只預取已發布的評論
published_comments = Comment.objects.filter(is_published=True).order_by('-created_at')

posts = Post.objects.prefetch_related(
    Prefetch('comments', queryset=published_comments)
).all()

# SQL:
# SELECT * FROM post;
# SELECT * FROM comment
# WHERE post_id IN (1, 2, ...) AND is_published = TRUE
# ORDER BY created_at DESC;

# models.py
class Post(models.Model):
    title = models.CharField(max_length=200)

class Comment(models.Model):
    post = models.ForeignKey(Post, on_delete=models.CASCADE, related_name='comments')
    content = models.TextField()
    user = models.ForeignKey(User, on_delete=models.CASCADE)

# ✅ 預取評論和評論的用戶
posts = Post.objects.prefetch_related(
    Prefetch(
        'comments',
        queryset=Comment.objects.select_related('user')
    )
).all()

for post in posts:
    for comment in post.comments.all():
        print(f"{comment.user.username}: {comment.content}")

# SQL:
# SELECT * FROM post;
# SELECT comment.*, user.*
# FROM comment
# INNER JOIN user ON (comment.user_id = user.id)
# WHERE post_id IN (1, 2, ...);

限制預取數量

# 只預取最近 5 條評論
recent_comments = Comment.objects.order_by('-created_at')[:5]

posts = Post.objects.prefetch_related(
    Prefetch('comments', queryset=recent_comments, to_attr='recent_comments')
).all()

for post in posts:
    print(f"{post.title}:")
    for comment in post.recent_comments:  # 使用自定義屬性
        print(f"  - {comment.content}")

📊 性能對比

場景:100 篇文章,每篇 10 條評論

方法查詢次數查詢時間內存使用數據傳輸
無優化1011.2s15MB大量
prefetch_related20.15s18MB中等

場景:100 篇文章,每篇 1 個作者(ForeignKey)

方法查詢次數查詢時間內存使用SQL 複雜度
無優化1011.0s10MB簡單
select_related10.05s10MB中等(JOIN)
prefetch_related20.08s11MB簡單

結論:

  • ForeignKey 用 select_related 更快(1 次查詢)
  • ManyToMany/反向 FK 只能用 prefetch_related

🎯 組合使用

場景:文章 + 作者 + 評論 + 標籤

# models.py
class Post(models.Model):
    title = models.CharField(max_length=200)
    author = models.ForeignKey(Author, on_delete=models.CASCADE)  # ForeignKey
    tags = models.ManyToManyField(Tag)  # ManyToMany

class Comment(models.Model):
    post = models.ForeignKey(Post, on_delete=models.CASCADE, related_name='comments')
    user = models.ForeignKey(User, on_delete=models.CASCADE)  # ForeignKey

# ✅ 組合使用
posts = Post.objects.select_related(
    'author'  # ForeignKey 用 select_related
).prefetch_related(
    'tags',   # ManyToMany 用 prefetch_related
    Prefetch(
        'comments',
        queryset=Comment.objects.select_related('user')  # 評論的用戶也用 select_related
    )
).all()

# SQL:
# SELECT post.*, author.*
# FROM post
# INNER JOIN author ON (post.author_id = author.id);  -- select_related

# SELECT * FROM tag
# INNER JOIN post_tags ON (tag.id = post_tags.tag_id)
# WHERE post_tags.post_id IN (1, 2, ...);  -- prefetch_related

# SELECT comment.*, user.*
# FROM comment
# INNER JOIN user ON (comment.user_id = user.id)
# WHERE comment.post_id IN (1, 2, ...);  -- prefetch_related

結果: 3 次查詢完成所有數據加載!


💡 何時選擇哪個?

決策樹

需要訪問關聯對象嗎?
    ├─ 否 → 不需要優化
    └─ 是 ↓

什麼類型的關係?
    ├─ ForeignKey / OneToOneField
    │   └─ 使用 select_related ✅
    │
    └─ ManyToManyField / 反向 ForeignKey
        └─ 使用 prefetch_related ✅

需要過濾或排序關聯對象?
    └─ 使用 Prefetch() 對象自定義 ✅

有多層關聯?
    ├─ 都是 ForeignKey → select_related('a__b__c')
    ├─ 混合類型 → 組合使用
    └─ 都是反向/多對多 → prefetch_related + Prefetch

🔍 常見陷阱

陷阱 1:在錯誤的關係上使用

# ❌ 在 ManyToMany 上用 select_related
posts = Post.objects.select_related('tags').all()
# Error: tags is a many-to-many field

# ✅ 正確
posts = Post.objects.prefetch_related('tags').all()

陷阱 2:prefetch 後再過濾

# ❌ prefetch 後再過濾會觸發新查詢
posts = Post.objects.prefetch_related('comments').all()
for post in posts:
    # 觸發新查詢!
    active_comments = post.comments.filter(is_active=True).all()

# ✅ 正確:在 Prefetch 中過濾
active_comments_qs = Comment.objects.filter(is_active=True)
posts = Post.objects.prefetch_related(
    Prefetch('comments', queryset=active_comments_qs)
).all()

陷阱 3:過度 JOIN 導致性能下降

# ⚠️ JOIN 太多會降低性能
posts = Post.objects.select_related(
    'author',
    'author__profile',
    'author__profile__avatar',
    'category',
    'category__parent',
    'category__parent__parent'
).all()

# ✅ 改進:只 JOIN 必要的
posts = Post.objects.select_related(
    'author',
    'category'
).all()

陷阱 4:忘記在序列化器中優化

# serializers.py (DRF)
class PostSerializer(serializers.ModelSerializer):
    author_name = serializers.CharField(source='author.name')  # N+1!
    comments_count = serializers.IntegerField(source='comments.count')  # N+1!

    class Meta:
        model = Post
        fields = ['title', 'author_name', 'comments_count']

# views.py
# ❌ 沒有優化
queryset = Post.objects.all()

# ✅ 正確
queryset = Post.objects.select_related('author').prefetch_related('comments').all()

🎯 實戰案例

案例 1:電商訂單詳情

# models.py
class Order(models.Model):
    user = models.ForeignKey(User, on_delete=models.CASCADE)
    created_at = models.DateTimeField(auto_now_add=True)

class OrderItem(models.Model):
    order = models.ForeignKey(Order, on_delete=models.CASCADE, related_name='items')
    product = models.ForeignKey(Product, on_delete=models.CASCADE)
    quantity = models.IntegerField()

# ✅ 優化查詢
orders = Order.objects.select_related('user').prefetch_related(
    Prefetch(
        'items',
        queryset=OrderItem.objects.select_related('product')
    )
).filter(user=request.user).order_by('-created_at')[:10]

# 使用
for order in orders:
    print(f"Order #{order.id} by {order.user.username}")
    for item in order.items.all():
        print(f"  - {item.product.name} x {item.quantity}")

查詢次數: 2 次


案例 2:社交媒體動態

# models.py
class Post(models.Model):
    user = models.ForeignKey(User, on_delete=models.CASCADE)
    content = models.TextField()

class Like(models.Model):
    post = models.ForeignKey(Post, on_delete=models.CASCADE, related_name='likes')
    user = models.ForeignKey(User, on_delete=models.CASCADE)

class Comment(models.Model):
    post = models.ForeignKey(Post, on_delete=models.CASCADE, related_name='comments')
    user = models.ForeignKey(User, on_delete=models.CASCADE)
    content = models.TextField()

# ✅ 優化查詢
posts = Post.objects.select_related('user').prefetch_related(
    Prefetch('likes', queryset=Like.objects.select_related('user')),
    Prefetch('comments', queryset=Comment.objects.select_related('user').order_by('-created_at')[:5])
).order_by('-created_at')[:20]

# 使用
for post in posts:
    print(f"{post.user.username}: {post.content}")
    print(f"Likes: {post.likes.count()}")
    print("Recent comments:")
    for comment in post.comments.all():
        print(f"  {comment.user.username}: {comment.content}")

查詢次數: 4 次


💡 最佳實踐

1. 在 ORM 層優化,不在模板層

# ❌ 不好
def view(request):
    posts = Post.objects.all()
    return render(request, 'posts.html', {'posts': posts})

# template
{% for post in posts %}
    {{ post.author.name }}  <!-- N+1 -->
{% endfor %}

# ✅ 好
def view(request):
    posts = Post.objects.select_related('author').all()
    return render(request, 'posts.html', {'posts': posts})

2. 使用 only() 和 defer() 配合

# 只獲取必要字段
posts = Post.objects.select_related('author').only(
    'id', 'title',
    'author__name', 'author__email'
).all()

3. 監控查詢性能

from django.test.utils import override_settings
from django.db import connection

@override_settings(DEBUG=True)
def test_view():
    response = client.get('/posts/')

    print(f"查詢次數: {len(connection.queries)}")
    for query in connection.queries:
        print(query['sql'])

💡 面試要點

答:

  • select_related:用 JOIN,1 次查詢,適合 ForeignKey/OneToOne
  • prefetch_related:分別查詢後組合,2+ 次查詢,適合 ManyToMany/反向 FK

答:

  • JOIN 多對多會產生笛卡爾積,數據重複
  • 例如:100 篇文章,每篇 10 個標籤 → JOIN 結果 1000 行
  • prefetch_related 分別查詢更高效

Q3: 什麼時候需要用 Prefetch 對象?

答:

  1. 需要過濾或排序預取的數據
  2. 需要限制預取數量
  3. 需要使用自定義屬性名(to_attr)
  4. 預取的關聯對象本身也有關聯(多層優化)

答:

  • 適度使用不會,但過度 JOIN 會降低性能
  • JOIN 太多表會產生巨大的臨時結果集
  • 建議:只 JOIN 必要的關聯,不要超過 3-4 層

🔗 下一篇

在下一篇文章中,我們將學習 索引優化技巧,了解如何通過合理使用索引提升查詢性能。

閱讀時間:12 分鐘

0%