数据库设计理论 - 规范化与反规范化

person 落叶    watch_later 2026-05-02 20:26:28
visibility 4    class PostgreSQL    bookmark 专栏

一、数据库设计概述

1. 数据库设计生命周期

flowchart LR A[需求分析] --> B[概念设计<br/>ER图] B --> C[逻辑设计<br/>关系模式] C --> D[规范化] D --> E[物理设计<br/>索引/分区] E --> F[实现与优化]

2. 良好数据库设计的原则

  • 数据完整性:确保数据的准确性和一致性
  • 减少冗余:避免数据重复存储
  • 易于维护:结构清晰,便于修改和扩展
  • 查询性能:在规范化和性能之间取得平衡

二、规范化理论

1. 什么是规范化?

规范化是将数据组织成无冗余、结构清晰的关系模式的过程,目的是:

  • 消除数据冗余
  • 避免插入、更新、删除异常
  • 确保数据依赖合理

2. 未规范化的问题示例

-- 反例:未规范化的订单表
CREATE TABLE denormalized_orders (
    order_id INTEGER,
    customer_name VARCHAR(100),
    customer_email VARCHAR(100),
    customer_address TEXT,
    product_name VARCHAR(100),
    product_price DECIMAL(10,2),
    quantity INTEGER,
    order_date DATE
);

-- 存在的问题:
-- 1. 重复存储客户信息(每个订单都存储客户姓名、邮箱、地址)
-- 2. 重复存储产品信息(产品名称、价格)
-- 3. 更新异常:客户改名需要更新所有历史订单
-- 4. 插入异常:无法存储没有订单的客户
-- 5. 删除异常:删除订单时会丢失客户信息

三、范式详解

1. 第一范式(1NF)

定义:表中的每个列都应该是原子的、不可再分的。

-- ❌ 违反1NF:列中包含多个值
CREATE TABLE students_1nf_bad (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    phone_numbers VARCHAR(200)  -- '13800138000, 13800138001'
);

-- ✅ 符合1NF:每个列都是原子值
CREATE TABLE students_1nf_good (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100)
);

CREATE TABLE student_phones (
    student_id INTEGER REFERENCES students(id),
    phone_number VARCHAR(20),
    PRIMARY KEY (student_id, phone_number)
);

-- ❌ 违反1NF:重复列组
CREATE TABLE orders_1nf_bad (
    order_id SERIAL PRIMARY KEY,
    product_1_name VARCHAR(100),
    product_1_price DECIMAL(10,2),
    product_2_name VARCHAR(100),
    product_2_price DECIMAL(10,2),
    product_3_name VARCHAR(100),
    product_3_price DECIMAL(10,2)
);

-- ✅ 符合1NF:使用行而不是列
CREATE TABLE orders_1nf_good (
    order_id INTEGER REFERENCES orders(id),
    product_id INTEGER REFERENCES products(id),
    quantity INTEGER,
    PRIMARY KEY (order_id, product_id)
);

2. 第二范式(2NF)

定义:满足1NF,且所有非主属性完全依赖于主键(消除部分依赖)。

-- ❌ 违反2NF:复合主键,部分依赖
CREATE TABLE order_items_2nf_bad (
    order_id INTEGER,
    product_id INTEGER,
    product_name VARCHAR(100),    -- 只依赖于 product_id,不依赖于 order_id
    product_price DECIMAL(10,2),  -- 只依赖于 product_id
    quantity INTEGER,
    PRIMARY KEY (order_id, product_id)
);

-- ✅ 符合2NF:拆分表
CREATE TABLE products_2nf (
    product_id SERIAL PRIMARY KEY,
    product_name VARCHAR(100) NOT NULL,
    product_price DECIMAL(10,2) NOT NULL
);

CREATE TABLE order_items_2nf (
    order_id INTEGER,
    product_id INTEGER REFERENCES products_2nf(product_id),
    quantity INTEGER NOT NULL,
    PRIMARY KEY (order_id, product_id)
);

3. 第三范式(3NF)

定义:满足2NF,且消除传递依赖(非主属性不依赖于其他非主属性)。

-- ❌ 违反3NF:传递依赖
CREATE TABLE employees_3nf_bad (
    employee_id SERIAL PRIMARY KEY,
    employee_name VARCHAR(100),
    department_id INTEGER,
    department_name VARCHAR(100),  -- 依赖于 department_id,而 department_id 依赖于 employee_id
    department_location VARCHAR(100)
);

-- ✅ 符合3NF:拆分表
CREATE TABLE departments_3nf (
    department_id SERIAL PRIMARY KEY,
    department_name VARCHAR(100) NOT NULL,
    department_location VARCHAR(100)
);

CREATE TABLE employees_3nf (
    employee_id SERIAL PRIMARY KEY,
    employee_name VARCHAR(100) NOT NULL,
    department_id INTEGER REFERENCES departments_3nf(department_id)
);

4. BC范式(BCNF)

定义:满足3NF,且每个决定因素都包含候选键(消除主属性对非主属性的依赖)。

-- ❌ 违反BCNF:存在非候选键决定另一个属性
CREATE TABLE course_enrollment_bcnf_bad (
    student_id INTEGER,
    course_id INTEGER,
    instructor_id INTEGER,
    instructor_office VARCHAR(100),  -- 依赖于 instructor_id,但 instructor_id 不是候选键
    PRIMARY KEY (student_id, course_id)
);

-- 函数依赖:
-- (student_id, course_id) → instructor_id
-- instructor_id → instructor_office

-- ✅ 符合BCNF:拆分表
CREATE TABLE instructors_bcnf (
    instructor_id SERIAL PRIMARY KEY,
    instructor_name VARCHAR(100),
    instructor_office VARCHAR(100)
);

CREATE TABLE courses_bcnf (
    course_id SERIAL PRIMARY KEY,
    course_name VARCHAR(100),
    instructor_id INTEGER REFERENCES instructors_bcnf(instructor_id)
);

CREATE TABLE enrollments_bcnf (
    student_id INTEGER,
    course_id INTEGER REFERENCES courses_bcnf(course_id),
    PRIMARY KEY (student_id, course_id)
);

5. 第四范式(4NF)与第五范式(5NF)

-- 4NF:消除多值依赖
-- ❌ 违反4NF:一个学生有多个爱好和多个课程(独立的多值属性)
CREATE TABLE student_info_4nf_bad (
    student_id INTEGER,
    hobby VARCHAR(100),
    course VARCHAR(100),
    PRIMARY KEY (student_id, hobby, course)
);

-- ✅ 符合4NF:拆分为两个独立的关系
CREATE TABLE student_hobbies_4nf (
    student_id INTEGER,
    hobby VARCHAR(100),
    PRIMARY KEY (student_id, hobby)
);

CREATE TABLE student_courses_4nf (
    student_id INTEGER,
    course VARCHAR(100),
    PRIMARY KEY (student_id, course)
);

-- 5NF(投影连接范式):消除连接依赖
-- 通常在实际设计中较少遇到,这里仅作概念说明

四、范式选择指南

范式 数据冗余 查询性能 更新性能 适用场景
1NF 几乎不使用
2NF 简单系统
3NF 大多数OLTP系统
BCNF 很低 复杂业务逻辑
4NF/5NF 极低 高度规范化需求

实际建议

  • OLTP系统:一般达到3NF即可
  • 数据仓库:通常使用星型/雪花型模型(反规范化)
  • 混合系统:3NF + 适度反规范化

五、反规范化

1. 什么是反规范化?

反规范化是有意引入冗余数据来提升查询性能的策略。

2. 反规范化的常见技术

-- 技术1:预计算汇总列
-- 规范化设计
CREATE TABLE orders_norm (
    order_id SERIAL PRIMARY KEY,
    customer_id INTEGER,
    order_date DATE
);

CREATE TABLE order_items_norm (
    order_id INTEGER REFERENCES orders_norm(order_id),
    product_id INTEGER,
    quantity INTEGER,
    price DECIMAL(10,2)
);

-- 反规范化:在订单表中添加订单总额
CREATE TABLE orders_denorm (
    order_id SERIAL PRIMARY KEY,
    customer_id INTEGER,
    order_date DATE,
    total_amount DECIMAL(12,2)  -- 冗余字段,减少JOIN计算
);
-- 技术2:路径枚举(树形结构)
-- 规范化设计:递归查询
CREATE TABLE categories_norm (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    parent_id INTEGER REFERENCES categories_norm(id)
);

-- 反规范化:添加路径字段
CREATE TABLE categories_denorm (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    parent_id INTEGER,
    path VARCHAR(1000),  -- 如:'1/5/12/'
    level INTEGER
);
-- 技术3:垂直分区(拆分宽表)
-- 反规范化:原始宽表
CREATE TABLE user_profile_wide (
    user_id SERIAL PRIMARY KEY,
    username VARCHAR(50),
    password_hash VARCHAR(255),
    email VARCHAR(100),
    full_name VARCHAR(100),
    bio TEXT,
    avatar BYTEA,
    last_login TIMESTAMP,
    login_count INTEGER,
    preferences JSONB
);

-- 物理反规范化:分离热数据和冷数据
CREATE TABLE user_auth (
    user_id SERIAL PRIMARY KEY,
    username VARCHAR(50),
    password_hash VARCHAR(255)
);

CREATE TABLE user_profile (
    user_id INTEGER REFERENCES user_auth(user_id),
    full_name VARCHAR(100),
    bio TEXT,
    avatar BYTEA
);

CREATE TABLE user_stats (
    user_id INTEGER REFERENCES user_auth(user_id),
    last_login TIMESTAMP,
    login_count INTEGER DEFAULT 0,
    preferences JSONB
);
-- 技术4:水平分区(分表)
-- 按时间范围分区
CREATE TABLE orders_2024_01 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE TABLE orders_2024_02 PARTITION OF orders
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');

3. 反规范化决策矩阵

场景 是否反规范化 推荐策略
读写比例 > 100:1 预计算汇总、冗余字段
实时性要求高 谨慎 使用物化视图
数据一致性要求极高 保持3NF
报表/分析查询 星型模型
数据量极大(>1TB) 分区、分片

六、实体关系模型(ER模型)

1. ER图基本元素

-- 实体:用户、订单、产品等
-- 关系:一对多、多对多、一对一

-- 一对多关系:一个用户有多个订单
CREATE TABLE users (
    user_id SERIAL PRIMARY KEY,
    username VARCHAR(50) NOT NULL
);

CREATE TABLE orders (
    order_id SERIAL PRIMARY KEY,
    user_id INTEGER REFERENCES users(user_id),
    amount DECIMAL(10,2)
);

-- 多对多关系:学生和课程
CREATE TABLE students (
    student_id SERIAL PRIMARY KEY,
    name VARCHAR(100)
);

CREATE TABLE courses (
    course_id SERIAL PRIMARY KEY,
    name VARCHAR(100)
);

CREATE TABLE enrollments (
    student_id INTEGER REFERENCES students(student_id),
    course_id INTEGER REFERENCES courses(course_id),
    enrollment_date DATE,
    PRIMARY KEY (student_id, course_id)
);

-- 一对一关系:用户和认证信息
CREATE TABLE user_auth (
    user_id INTEGER PRIMARY KEY REFERENCES users(user_id),
    password_hash VARCHAR(255),
    last_login TIMESTAMP
);

2. 设计模式示例

-- 模式1:类型/子类型(继承关系)
CREATE TABLE persons (
    person_id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    birth_date DATE,
    person_type VARCHAR(20) CHECK (person_type IN ('employee', 'customer'))
);

CREATE TABLE employees (
    person_id INTEGER PRIMARY KEY REFERENCES persons(person_id),
    employee_number VARCHAR(20),
    hire_date DATE,
    salary DECIMAL(10,2)
);

CREATE TABLE customers (
    person_id INTEGER PRIMARY KEY REFERENCES persons(person_id),
    customer_number VARCHAR(20),
    credit_limit DECIMAL(10,2)
);

-- 模式2:自引用(树形结构)
CREATE TABLE org_hierarchy (
    node_id SERIAL PRIMARY KEY,
    node_name VARCHAR(100),
    parent_id INTEGER REFERENCES org_hierarchy(node_id),
    path LTREE  -- 使用 ltree 扩展
);

CREATE EXTENSION IF NOT EXISTS ltree;

七、实际设计案例

1. 电商系统设计

-- 完整的电商数据库设计(3NF + 适度反规范化)

-- 用户模块
CREATE TABLE users (
    user_id BIGSERIAL PRIMARY KEY,
    username VARCHAR(50) UNIQUE NOT NULL,
    email VARCHAR(100) UNIQUE NOT NULL,
    phone VARCHAR(20),
    password_hash VARCHAR(255) NOT NULL,
    status VARCHAR(20) DEFAULT 'active',
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 用户地址(一对多)
CREATE TABLE user_addresses (
    address_id BIGSERIAL PRIMARY KEY,
    user_id BIGINT NOT NULL REFERENCES users(user_id) ON DELETE CASCADE,
    receiver_name VARCHAR(100) NOT NULL,
    phone VARCHAR(20) NOT NULL,
    province VARCHAR(50),
    city VARCHAR(50),
    district VARCHAR(50),
    detail_address TEXT NOT NULL,
    is_default BOOLEAN DEFAULT FALSE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 产品模块
CREATE TABLE categories (
    category_id SERIAL PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    parent_id INTEGER REFERENCES categories(category_id),
    level SMALLINT DEFAULT 0,
    sort_order INT DEFAULT 0,
    path LTREE
);

CREATE TABLE products (
    product_id BIGSERIAL PRIMARY KEY,
    sku VARCHAR(50) UNIQUE NOT NULL,
    name VARCHAR(200) NOT NULL,
    description TEXT,
    category_id INTEGER REFERENCES categories(category_id),
    price DECIMAL(12,2) NOT NULL CHECK (price >= 0),
    stock INT DEFAULT 0 CHECK (stock >= 0),
    status VARCHAR(20) DEFAULT 'active',
    attributes JSONB,  -- 产品属性(反规范化)
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 订单模块
CREATE TABLE orders (
    order_id BIGSERIAL PRIMARY KEY,
    order_number VARCHAR(50) UNIQUE NOT NULL,
    user_id BIGINT NOT NULL REFERENCES users(user_id),
    address_id BIGINT NOT NULL REFERENCES user_addresses(address_id),
    total_amount DECIMAL(12,2) NOT NULL,
    discount_amount DECIMAL(12,2) DEFAULT 0,
    actual_amount DECIMAL(12,2) NOT NULL,
    status VARCHAR(30) DEFAULT 'pending',
    payment_method VARCHAR(30),
    payment_time TIMESTAMP,
    shipping_time TIMESTAMP,
    completion_time TIMESTAMP,
    remark TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 订单明细(没有订单总额冗余,因为总额会经常变化)
CREATE TABLE order_items (
    item_id BIGSERIAL PRIMARY KEY,
    order_id BIGINT NOT NULL REFERENCES orders(order_id) ON DELETE CASCADE,
    product_id BIGINT NOT NULL REFERENCES products(product_id),
    product_name VARCHAR(200) NOT NULL,  -- 快照产品名称(反规范化)
    product_price DECIMAL(12,2) NOT NULL, -- 快照价格
    quantity INT NOT NULL CHECK (quantity > 0),
    subtotal DECIMAL(12,2) GENERATED ALWAYS AS (product_price * quantity) STORED
);

-- 购物车(反规范化,临时数据)
CREATE TABLE shopping_cart (
    cart_id BIGSERIAL PRIMARY KEY,
    user_id BIGINT NOT NULL REFERENCES users(user_id),
    product_id BIGINT NOT NULL REFERENCES products(product_id),
    quantity INT NOT NULL CHECK (quantity > 0),
    added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    UNIQUE(user_id, product_id)
);

-- 评论模块
CREATE TABLE reviews (
    review_id BIGSERIAL PRIMARY KEY,
    product_id BIGINT NOT NULL REFERENCES products(product_id),
    user_id BIGINT NOT NULL REFERENCES users(user_id),
    rating SMALLINT NOT NULL CHECK (rating BETWEEN 1 AND 5),
    title VARCHAR(200),
    content TEXT,
    is_verified BOOLEAN DEFAULT FALSE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    UNIQUE(product_id, user_id)  -- 一用户一产品只能评论一次
);

-- 产品平均评分(物化视图,反规范化)
CREATE MATERIALIZED VIEW product_ratings AS
SELECT 
    product_id,
    COUNT(*) AS review_count,
    AVG(rating) AS avg_rating
FROM reviews
GROUP BY product_id;

CREATE UNIQUE INDEX idx_product_ratings_id ON product_ratings(product_id);

2. 社交网络设计

-- 用户关系(多对多)
CREATE TABLE user_follows (
    follower_id BIGINT NOT NULL REFERENCES users(user_id),
    followee_id BIGINT NOT NULL REFERENCES users(user_id),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    status VARCHAR(20) DEFAULT 'accepted',
    PRIMARY KEY (follower_id, followee_id),
    CHECK (follower_id != followee_id)
);

-- 帖子
CREATE TABLE posts (
    post_id BIGSERIAL PRIMARY KEY,
    user_id BIGINT NOT NULL REFERENCES users(user_id),
    content TEXT,
    images TEXT[],
    likes_count INT DEFAULT 0,        -- 反规范化
    comments_count INT DEFAULT 0,     -- 反规范化
    shares_count INT DEFAULT 0,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 点赞(使用软删除)
CREATE TABLE post_likes (
    post_id BIGINT NOT NULL REFERENCES posts(post_id),
    user_id BIGINT NOT NULL REFERENCES users(user_id),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    deleted_at TIMESTAMP,
    PRIMARY KEY (post_id, user_id)
);

-- 评论
CREATE TABLE comments (
    comment_id BIGSERIAL PRIMARY KEY,
    post_id BIGINT NOT NULL REFERENCES posts(post_id),
    user_id BIGINT NOT NULL REFERENCES users(user_id),
    parent_comment_id BIGINT REFERENCES comments(comment_id),  -- 嵌套评论
    content TEXT NOT NULL,
    likes_count INT DEFAULT 0,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 动态计数更新触发器
CREATE OR REPLACE FUNCTION update_post_like_count()
RETURNS TRIGGER AS $$
BEGIN
    IF TG_OP = 'INSERT' THEN
        UPDATE posts SET likes_count = likes_count + 1 
        WHERE post_id = NEW.post_id;
    ELSIF TG_OP = 'DELETE' THEN
        UPDATE posts SET likes_count = likes_count - 1 
        WHERE post_id = OLD.post_id;
    END IF;
    RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trigger_update_like_count
    AFTER INSERT OR DELETE ON post_likes
    FOR EACH ROW EXECUTE FUNCTION update_post_like_count();

八、数据库设计检查清单

-- 设计完成后需要检查的项目

-- 1. 是否每个表都有主键?
SELECT tablename FROM pg_tables 
WHERE tablename NOT IN (SELECT tablename FROM pg_indexes WHERE indexdef LIKE '%PRIMARY%');

-- 2. 是否有孤儿外键?
SELECT 
    tc.table_name,
    tc.constraint_name,
    kcu.column_name
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu 
    ON tc.constraint_name = kcu.constraint_name
WHERE tc.constraint_type = 'FOREIGN KEY';

-- 3. 是否有未索引的外键?
SELECT 
    con.conrelid::regclass AS table_name,
    con.conname AS fk_name,
    array_agg(a.attname) AS fk_columns
FROM pg_constraint con
JOIN pg_attribute a ON a.attnum = ANY(con.conkey) AND a.attrelid = con.conrelid
WHERE con.contype = 'f'
  AND NOT EXISTS (
      SELECT 1 FROM pg_index i
      WHERE i.indrelid = con.conrelid
        AND con.conkey <@ i.indkey
  )
GROUP BY con.conrelid, con.conname;

-- 4. 是否有未使用的索引?
SELECT 
    schemaname,
    tablename,
    indexname,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY pg_relation_size(indexname::regclass) DESC;
评论区
评论列表
menu