规范化是将数据组织成无冗余、结构清晰的关系模式的过程,目的是:
-- 反例:未规范化的订单表
CREATE TABLE denormalized_orders (
order_id INTEGER,
customer_name VARCHAR(100),
customer_email VARCHAR(100),
customer_address TEXT,
product_name VARCHAR(100),
product_price DECIMAL(10,2),
quantity INTEGER,
order_date DATE
);
-- 存在的问题:
-- 1. 重复存储客户信息(每个订单都存储客户姓名、邮箱、地址)
-- 2. 重复存储产品信息(产品名称、价格)
-- 3. 更新异常:客户改名需要更新所有历史订单
-- 4. 插入异常:无法存储没有订单的客户
-- 5. 删除异常:删除订单时会丢失客户信息
定义:表中的每个列都应该是原子的、不可再分的。
-- ❌ 违反1NF:列中包含多个值
CREATE TABLE students_1nf_bad (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
phone_numbers VARCHAR(200) -- '13800138000, 13800138001'
);
-- ✅ 符合1NF:每个列都是原子值
CREATE TABLE students_1nf_good (
id SERIAL PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE student_phones (
student_id INTEGER REFERENCES students(id),
phone_number VARCHAR(20),
PRIMARY KEY (student_id, phone_number)
);
-- ❌ 违反1NF:重复列组
CREATE TABLE orders_1nf_bad (
order_id SERIAL PRIMARY KEY,
product_1_name VARCHAR(100),
product_1_price DECIMAL(10,2),
product_2_name VARCHAR(100),
product_2_price DECIMAL(10,2),
product_3_name VARCHAR(100),
product_3_price DECIMAL(10,2)
);
-- ✅ 符合1NF:使用行而不是列
CREATE TABLE orders_1nf_good (
order_id INTEGER REFERENCES orders(id),
product_id INTEGER REFERENCES products(id),
quantity INTEGER,
PRIMARY KEY (order_id, product_id)
);
定义:满足1NF,且所有非主属性完全依赖于主键(消除部分依赖)。
-- ❌ 违反2NF:复合主键,部分依赖
CREATE TABLE order_items_2nf_bad (
order_id INTEGER,
product_id INTEGER,
product_name VARCHAR(100), -- 只依赖于 product_id,不依赖于 order_id
product_price DECIMAL(10,2), -- 只依赖于 product_id
quantity INTEGER,
PRIMARY KEY (order_id, product_id)
);
-- ✅ 符合2NF:拆分表
CREATE TABLE products_2nf (
product_id SERIAL PRIMARY KEY,
product_name VARCHAR(100) NOT NULL,
product_price DECIMAL(10,2) NOT NULL
);
CREATE TABLE order_items_2nf (
order_id INTEGER,
product_id INTEGER REFERENCES products_2nf(product_id),
quantity INTEGER NOT NULL,
PRIMARY KEY (order_id, product_id)
);
定义:满足2NF,且消除传递依赖(非主属性不依赖于其他非主属性)。
-- ❌ 违反3NF:传递依赖
CREATE TABLE employees_3nf_bad (
employee_id SERIAL PRIMARY KEY,
employee_name VARCHAR(100),
department_id INTEGER,
department_name VARCHAR(100), -- 依赖于 department_id,而 department_id 依赖于 employee_id
department_location VARCHAR(100)
);
-- ✅ 符合3NF:拆分表
CREATE TABLE departments_3nf (
department_id SERIAL PRIMARY KEY,
department_name VARCHAR(100) NOT NULL,
department_location VARCHAR(100)
);
CREATE TABLE employees_3nf (
employee_id SERIAL PRIMARY KEY,
employee_name VARCHAR(100) NOT NULL,
department_id INTEGER REFERENCES departments_3nf(department_id)
);
定义:满足3NF,且每个决定因素都包含候选键(消除主属性对非主属性的依赖)。
-- ❌ 违反BCNF:存在非候选键决定另一个属性
CREATE TABLE course_enrollment_bcnf_bad (
student_id INTEGER,
course_id INTEGER,
instructor_id INTEGER,
instructor_office VARCHAR(100), -- 依赖于 instructor_id,但 instructor_id 不是候选键
PRIMARY KEY (student_id, course_id)
);
-- 函数依赖:
-- (student_id, course_id) → instructor_id
-- instructor_id → instructor_office
-- ✅ 符合BCNF:拆分表
CREATE TABLE instructors_bcnf (
instructor_id SERIAL PRIMARY KEY,
instructor_name VARCHAR(100),
instructor_office VARCHAR(100)
);
CREATE TABLE courses_bcnf (
course_id SERIAL PRIMARY KEY,
course_name VARCHAR(100),
instructor_id INTEGER REFERENCES instructors_bcnf(instructor_id)
);
CREATE TABLE enrollments_bcnf (
student_id INTEGER,
course_id INTEGER REFERENCES courses_bcnf(course_id),
PRIMARY KEY (student_id, course_id)
);
-- 4NF:消除多值依赖
-- ❌ 违反4NF:一个学生有多个爱好和多个课程(独立的多值属性)
CREATE TABLE student_info_4nf_bad (
student_id INTEGER,
hobby VARCHAR(100),
course VARCHAR(100),
PRIMARY KEY (student_id, hobby, course)
);
-- ✅ 符合4NF:拆分为两个独立的关系
CREATE TABLE student_hobbies_4nf (
student_id INTEGER,
hobby VARCHAR(100),
PRIMARY KEY (student_id, hobby)
);
CREATE TABLE student_courses_4nf (
student_id INTEGER,
course VARCHAR(100),
PRIMARY KEY (student_id, course)
);
-- 5NF(投影连接范式):消除连接依赖
-- 通常在实际设计中较少遇到,这里仅作概念说明
| 范式 | 数据冗余 | 查询性能 | 更新性能 | 适用场景 |
|---|---|---|---|---|
| 1NF | 高 | 差 | 差 | 几乎不使用 |
| 2NF | 中 | 中 | 中 | 简单系统 |
| 3NF | 低 | 良 | 良 | 大多数OLTP系统 |
| BCNF | 很低 | 良 | 优 | 复杂业务逻辑 |
| 4NF/5NF | 极低 | 良 | 优 | 高度规范化需求 |
实际建议:
反规范化是有意引入冗余数据来提升查询性能的策略。
-- 技术1:预计算汇总列
-- 规范化设计
CREATE TABLE orders_norm (
order_id SERIAL PRIMARY KEY,
customer_id INTEGER,
order_date DATE
);
CREATE TABLE order_items_norm (
order_id INTEGER REFERENCES orders_norm(order_id),
product_id INTEGER,
quantity INTEGER,
price DECIMAL(10,2)
);
-- 反规范化:在订单表中添加订单总额
CREATE TABLE orders_denorm (
order_id SERIAL PRIMARY KEY,
customer_id INTEGER,
order_date DATE,
total_amount DECIMAL(12,2) -- 冗余字段,减少JOIN计算
);
-- 技术2:路径枚举(树形结构)
-- 规范化设计:递归查询
CREATE TABLE categories_norm (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
parent_id INTEGER REFERENCES categories_norm(id)
);
-- 反规范化:添加路径字段
CREATE TABLE categories_denorm (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
parent_id INTEGER,
path VARCHAR(1000), -- 如:'1/5/12/'
level INTEGER
);
-- 技术3:垂直分区(拆分宽表)
-- 反规范化:原始宽表
CREATE TABLE user_profile_wide (
user_id SERIAL PRIMARY KEY,
username VARCHAR(50),
password_hash VARCHAR(255),
email VARCHAR(100),
full_name VARCHAR(100),
bio TEXT,
avatar BYTEA,
last_login TIMESTAMP,
login_count INTEGER,
preferences JSONB
);
-- 物理反规范化:分离热数据和冷数据
CREATE TABLE user_auth (
user_id SERIAL PRIMARY KEY,
username VARCHAR(50),
password_hash VARCHAR(255)
);
CREATE TABLE user_profile (
user_id INTEGER REFERENCES user_auth(user_id),
full_name VARCHAR(100),
bio TEXT,
avatar BYTEA
);
CREATE TABLE user_stats (
user_id INTEGER REFERENCES user_auth(user_id),
last_login TIMESTAMP,
login_count INTEGER DEFAULT 0,
preferences JSONB
);
-- 技术4:水平分区(分表)
-- 按时间范围分区
CREATE TABLE orders_2024_01 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE orders_2024_02 PARTITION OF orders
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
| 场景 | 是否反规范化 | 推荐策略 |
|---|---|---|
| 读写比例 > 100:1 | 是 | 预计算汇总、冗余字段 |
| 实时性要求高 | 谨慎 | 使用物化视图 |
| 数据一致性要求极高 | 否 | 保持3NF |
| 报表/分析查询 | 是 | 星型模型 |
| 数据量极大(>1TB) | 是 | 分区、分片 |
-- 实体:用户、订单、产品等
-- 关系:一对多、多对多、一对一
-- 一对多关系:一个用户有多个订单
CREATE TABLE users (
user_id SERIAL PRIMARY KEY,
username VARCHAR(50) NOT NULL
);
CREATE TABLE orders (
order_id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(user_id),
amount DECIMAL(10,2)
);
-- 多对多关系:学生和课程
CREATE TABLE students (
student_id SERIAL PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE courses (
course_id SERIAL PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE enrollments (
student_id INTEGER REFERENCES students(student_id),
course_id INTEGER REFERENCES courses(course_id),
enrollment_date DATE,
PRIMARY KEY (student_id, course_id)
);
-- 一对一关系:用户和认证信息
CREATE TABLE user_auth (
user_id INTEGER PRIMARY KEY REFERENCES users(user_id),
password_hash VARCHAR(255),
last_login TIMESTAMP
);
-- 模式1:类型/子类型(继承关系)
CREATE TABLE persons (
person_id SERIAL PRIMARY KEY,
name VARCHAR(100),
birth_date DATE,
person_type VARCHAR(20) CHECK (person_type IN ('employee', 'customer'))
);
CREATE TABLE employees (
person_id INTEGER PRIMARY KEY REFERENCES persons(person_id),
employee_number VARCHAR(20),
hire_date DATE,
salary DECIMAL(10,2)
);
CREATE TABLE customers (
person_id INTEGER PRIMARY KEY REFERENCES persons(person_id),
customer_number VARCHAR(20),
credit_limit DECIMAL(10,2)
);
-- 模式2:自引用(树形结构)
CREATE TABLE org_hierarchy (
node_id SERIAL PRIMARY KEY,
node_name VARCHAR(100),
parent_id INTEGER REFERENCES org_hierarchy(node_id),
path LTREE -- 使用 ltree 扩展
);
CREATE EXTENSION IF NOT EXISTS ltree;
-- 完整的电商数据库设计(3NF + 适度反规范化)
-- 用户模块
CREATE TABLE users (
user_id BIGSERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
phone VARCHAR(20),
password_hash VARCHAR(255) NOT NULL,
status VARCHAR(20) DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 用户地址(一对多)
CREATE TABLE user_addresses (
address_id BIGSERIAL PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES users(user_id) ON DELETE CASCADE,
receiver_name VARCHAR(100) NOT NULL,
phone VARCHAR(20) NOT NULL,
province VARCHAR(50),
city VARCHAR(50),
district VARCHAR(50),
detail_address TEXT NOT NULL,
is_default BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 产品模块
CREATE TABLE categories (
category_id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
parent_id INTEGER REFERENCES categories(category_id),
level SMALLINT DEFAULT 0,
sort_order INT DEFAULT 0,
path LTREE
);
CREATE TABLE products (
product_id BIGSERIAL PRIMARY KEY,
sku VARCHAR(50) UNIQUE NOT NULL,
name VARCHAR(200) NOT NULL,
description TEXT,
category_id INTEGER REFERENCES categories(category_id),
price DECIMAL(12,2) NOT NULL CHECK (price >= 0),
stock INT DEFAULT 0 CHECK (stock >= 0),
status VARCHAR(20) DEFAULT 'active',
attributes JSONB, -- 产品属性(反规范化)
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 订单模块
CREATE TABLE orders (
order_id BIGSERIAL PRIMARY KEY,
order_number VARCHAR(50) UNIQUE NOT NULL,
user_id BIGINT NOT NULL REFERENCES users(user_id),
address_id BIGINT NOT NULL REFERENCES user_addresses(address_id),
total_amount DECIMAL(12,2) NOT NULL,
discount_amount DECIMAL(12,2) DEFAULT 0,
actual_amount DECIMAL(12,2) NOT NULL,
status VARCHAR(30) DEFAULT 'pending',
payment_method VARCHAR(30),
payment_time TIMESTAMP,
shipping_time TIMESTAMP,
completion_time TIMESTAMP,
remark TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 订单明细(没有订单总额冗余,因为总额会经常变化)
CREATE TABLE order_items (
item_id BIGSERIAL PRIMARY KEY,
order_id BIGINT NOT NULL REFERENCES orders(order_id) ON DELETE CASCADE,
product_id BIGINT NOT NULL REFERENCES products(product_id),
product_name VARCHAR(200) NOT NULL, -- 快照产品名称(反规范化)
product_price DECIMAL(12,2) NOT NULL, -- 快照价格
quantity INT NOT NULL CHECK (quantity > 0),
subtotal DECIMAL(12,2) GENERATED ALWAYS AS (product_price * quantity) STORED
);
-- 购物车(反规范化,临时数据)
CREATE TABLE shopping_cart (
cart_id BIGSERIAL PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES users(user_id),
product_id BIGINT NOT NULL REFERENCES products(product_id),
quantity INT NOT NULL CHECK (quantity > 0),
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(user_id, product_id)
);
-- 评论模块
CREATE TABLE reviews (
review_id BIGSERIAL PRIMARY KEY,
product_id BIGINT NOT NULL REFERENCES products(product_id),
user_id BIGINT NOT NULL REFERENCES users(user_id),
rating SMALLINT NOT NULL CHECK (rating BETWEEN 1 AND 5),
title VARCHAR(200),
content TEXT,
is_verified BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(product_id, user_id) -- 一用户一产品只能评论一次
);
-- 产品平均评分(物化视图,反规范化)
CREATE MATERIALIZED VIEW product_ratings AS
SELECT
product_id,
COUNT(*) AS review_count,
AVG(rating) AS avg_rating
FROM reviews
GROUP BY product_id;
CREATE UNIQUE INDEX idx_product_ratings_id ON product_ratings(product_id);
-- 用户关系(多对多)
CREATE TABLE user_follows (
follower_id BIGINT NOT NULL REFERENCES users(user_id),
followee_id BIGINT NOT NULL REFERENCES users(user_id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
status VARCHAR(20) DEFAULT 'accepted',
PRIMARY KEY (follower_id, followee_id),
CHECK (follower_id != followee_id)
);
-- 帖子
CREATE TABLE posts (
post_id BIGSERIAL PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES users(user_id),
content TEXT,
images TEXT[],
likes_count INT DEFAULT 0, -- 反规范化
comments_count INT DEFAULT 0, -- 反规范化
shares_count INT DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 点赞(使用软删除)
CREATE TABLE post_likes (
post_id BIGINT NOT NULL REFERENCES posts(post_id),
user_id BIGINT NOT NULL REFERENCES users(user_id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
deleted_at TIMESTAMP,
PRIMARY KEY (post_id, user_id)
);
-- 评论
CREATE TABLE comments (
comment_id BIGSERIAL PRIMARY KEY,
post_id BIGINT NOT NULL REFERENCES posts(post_id),
user_id BIGINT NOT NULL REFERENCES users(user_id),
parent_comment_id BIGINT REFERENCES comments(comment_id), -- 嵌套评论
content TEXT NOT NULL,
likes_count INT DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 动态计数更新触发器
CREATE OR REPLACE FUNCTION update_post_like_count()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'INSERT' THEN
UPDATE posts SET likes_count = likes_count + 1
WHERE post_id = NEW.post_id;
ELSIF TG_OP = 'DELETE' THEN
UPDATE posts SET likes_count = likes_count - 1
WHERE post_id = OLD.post_id;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trigger_update_like_count
AFTER INSERT OR DELETE ON post_likes
FOR EACH ROW EXECUTE FUNCTION update_post_like_count();
-- 设计完成后需要检查的项目
-- 1. 是否每个表都有主键?
SELECT tablename FROM pg_tables
WHERE tablename NOT IN (SELECT tablename FROM pg_indexes WHERE indexdef LIKE '%PRIMARY%');
-- 2. 是否有孤儿外键?
SELECT
tc.table_name,
tc.constraint_name,
kcu.column_name
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
WHERE tc.constraint_type = 'FOREIGN KEY';
-- 3. 是否有未索引的外键?
SELECT
con.conrelid::regclass AS table_name,
con.conname AS fk_name,
array_agg(a.attname) AS fk_columns
FROM pg_constraint con
JOIN pg_attribute a ON a.attnum = ANY(con.conkey) AND a.attrelid = con.conrelid
WHERE con.contype = 'f'
AND NOT EXISTS (
SELECT 1 FROM pg_index i
WHERE i.indrelid = con.conrelid
AND con.conkey <@ i.indkey
)
GROUP BY con.conrelid, con.conname;
-- 4. 是否有未使用的索引?
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY pg_relation_size(indexname::regclass) DESC;