索引是数据库中用于加速数据检索的数据结构,类似于书籍的目录。PostgreSQL 提供了多种索引类型,每种都有其特定的使用场景。
-- 查看表是否有索引
SELECT
tablename,
indexname,
indexdef
FROM pg_indexes
WHERE tablename = 'users';
-- 查看索引使用情况统计
SELECT
schemaname,
tablename,
indexname,
idx_scan AS times_used,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
-- 索引的代价体现在三个方面:
-- 1. 存储空间:索引通常占用表大小的 10%-50%
-- 2. 写入性能:INSERT/UPDATE/DELETE 需要同步更新索引
-- 3. 维护成本:VACUUM 和 ANALYZE 需要处理索引
-- 查看索引占用空间
SELECT
tablename,
indexname,
pg_size_pretty(pg_relation_size(indexname::regclass)) AS index_size
FROM pg_indexes
WHERE tablename = 'large_table'
ORDER BY pg_relation_size(indexname::regclass) DESC;
B-Tree 是最通用的索引类型,适用于等值查询和范围查询。
-- 创建 B-Tree 索引
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_age ON users(age);
-- 适用的查询模式
-- 1. 等值查询
SELECT * FROM users WHERE email = 'user@example.com';
-- 2. 范围查询
SELECT * FROM users WHERE age BETWEEN 20 AND 30;
SELECT * FROM users WHERE age > 18;
SELECT * FROM users WHERE created_at > '2024-01-01';
-- 3. 排序查询(避免单独排序)
SELECT * FROM users ORDER BY email LIMIT 10;
-- 4. 前缀模糊匹配(仅限左匹配)
SELECT * FROM users WHERE email LIKE 'user@%';
-- B-Tree 索引与排序方向
CREATE INDEX idx_users_age_email ON users(age ASC, email DESC);
Hash 索引仅支持等值查询,但理论上比 B-Tree 更快。
-- 创建 Hash 索引(PostgreSQL 10+ 已支持 WAL 日志)
CREATE INDEX idx_users_phone_hash ON users USING hash(phone);
-- 适用场景:非常明确的等值查询
SELECT * FROM users WHERE phone = '13800138000';
-- Hash 索引限制
-- ❌ 不支持范围查询
-- ❌ 不支持排序
-- ❌ 不支持部分索引
-- ❌ 不支持多列索引
-- 查看 Hash 索引是否被使用
EXPLAIN SELECT * FROM users WHERE phone = '13800138000';
GiST(Generalized Search Tree)是一种平衡树结构,适用于复杂数据类型。
-- 创建 GiST 索引
CREATE EXTENSION IF NOT EXISTS btree_gist;
-- 1. 范围类型索引
CREATE INDEX idx_reservations_time ON reservations USING gist(time_range);
-- 查询重叠的时间段
SELECT * FROM reservations
WHERE time_range && tstzrange('2024-06-01 09:00', '2024-06-01 17:00');
-- 2. 几何类型索引
CREATE INDEX idx_locations_point ON locations USING gist(geo_point);
-- 查询附近的地点
SELECT * FROM locations
WHERE geo_point <-> point(40.7128, -74.0060) < 10000; -- 10公里内
-- 3. 全文搜索(通常用 GIN,GiST 也支持)
CREATE INDEX idx_articles_search_gist ON articles USING gist(search_vector);
-- 4. 数组重叠查询(需要 btree_gist 扩展)
CREATE INDEX idx_tags_gist ON articles USING gist(tags);
SELECT * FROM articles WHERE tags && ARRAY['postgresql', 'database'];
GIN(Generalized Inverted Index)是倒排索引,适用于包含多个值的复合数据类型。
-- GIN 索引适用场景
-- 1. JSONB 查询
CREATE INDEX idx_products_data ON products USING gin(product_data);
SELECT * FROM products WHERE product_data @> '{"category": "Electronics"}';
SELECT * FROM products WHERE product_data ? 'warranty';
-- 2. 数组查询
CREATE INDEX idx_articles_tags ON articles USING gin(tags);
SELECT * FROM articles WHERE tags @> ARRAY['postgresql', 'database'];
SELECT * FROM articles WHERE tags && ARRAY['postgresql'];
-- 3. 全文搜索
CREATE INDEX idx_articles_search ON articles USING gin(search_vector);
SELECT * FROM articles
WHERE search_vector @@ to_tsquery('english', 'postgresql & performance');
-- 4. 文本相似度(pg_trgm)
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX idx_users_name_trgm ON users USING gin(name gin_trgm_ops);
SELECT * FROM users WHERE name % 'Jonh'; -- 模糊匹配
-- GIN 索引优化参数
CREATE INDEX idx_gin_optimized ON large_table USING gin(data)
WITH (fastupdate = on, gin_pending_list_limit = 4096);
BRIN(Block Range Index)是一种块范围索引,适用于超大表中自然排序的列。
-- BRIN 索引特点:极小、但可能扫描更多块
-- 适用条件:数据物理顺序与逻辑顺序一致
-- 创建 BRIN 索引
CREATE INDEX idx_logs_created_at ON logs USING brin(created_at);
-- 每页块数量(pages_per_range)
CREATE INDEX idx_brin_custom ON huge_table USING brin(id)
WITH (pages_per_range = 128); -- 默认 128
-- 查看 BRIN 索引摘要
SELECT
brinrelid::regclass AS table_name,
brinindexid::regclass AS index_name,
brinallvisible
FROM pg_brin_index_stats;
-- BRIN 最佳实践
-- ✅ 适用:时序数据(日志、监控)、自增 ID
-- ❌ 不适用:随机分布的数据、频繁更新的列
-- 维护 BRIN 索引
VACUUM huge_table; -- 更新 BRIN 摘要信息
SP-GiST(Space-Partitioned GiST)适用于空间分区的数据。
-- SP-GiST 适用场景:点、线、圆等空间数据
CREATE INDEX idx_points_spgist ON points USING spgist(geom);
-- 相比 GiST,SP-GiST 在某些空间查询中性能更好
-- 适用于四叉树、k-d 树等空间分区算法
部分索引只索引表的一部分行,极大地减少了索引大小。
-- 只索引活跃用户
CREATE INDEX idx_active_users_email ON users(email)
WHERE status = 'active';
-- 查询会自动使用部分索引
SELECT * FROM users WHERE status = 'active' AND email = 'alice@example.com';
-- 部分索引的好处
-- 1. 索引更小
-- 2. 维护成本更低
-- 3. 查询更快
-- 多条件部分索引
CREATE INDEX idx_high_value_orders ON orders(created_at)
WHERE total_amount > 10000;
-- 排除 NULL 值的部分索引
CREATE INDEX idx_users_phone_notnull ON users(phone)
WHERE phone IS NOT NULL;
表达式索引基于函数或表达式创建,而非直接基于列。
-- 基于函数的索引
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- 查询时使用相同的表达式
SELECT * FROM users WHERE LOWER(email) = 'alice@example.com';
-- 基于 JSONB 字段的索引
CREATE INDEX idx_products_price ON products(((product_data->>'price')::NUMERIC));
SELECT * FROM products
WHERE (product_data->>'price')::NUMERIC > 100;
-- 基于日期函数的索引
CREATE INDEX idx_orders_year_month ON orders(EXTRACT(YEAR FROM created_at), EXTRACT(MONTH FROM created_at));
SELECT * FROM orders
WHERE EXTRACT(YEAR FROM created_at) = 2024
AND EXTRACT(MONTH FROM created_at) = 6;
-- 多个表达式的复合索引
CREATE INDEX idx_users_name_age ON users(LOWER(name), age);
覆盖索引在索引中额外存储非搜索列,实现索引只读查询。
-- 覆盖索引语法(PostgreSQL 11+)
CREATE INDEX idx_users_covering ON users(id) INCLUDE (name, email, status);
-- 查询可以完全不访问表
SELECT id, name, email, status FROM users WHERE id = 1;
-- 所有数据都从索引获取
-- 查看是否为覆盖索引
EXPLAIN (ANALYZE, BUFFERS)
SELECT id, name, email FROM users WHERE id = 1;
-- 应该显示 "Heap Fetches: 0"
-- 覆盖索引的最佳实践
-- ✅ 常用的大表查询
-- ✅ 高并发只读查询
-- ❌ 频繁更新的列
-- ❌ 太大的列(TEXT、JSONB、BYTEA)
-- 复合索引的列顺序至关重要
CREATE INDEX idx_composite ON orders(user_id, status, created_at);
-- 列顺序决策原则:
-- 1. 等值条件列优先
-- 2. 范围条件列次之
-- 3. 排序列最后
-- ✅ 能使用的查询
SELECT * FROM orders WHERE user_id = 1; -- 使用第一列
SELECT * FROM orders WHERE user_id = 1 AND status = 'active'; -- 使用前两列
SELECT * FROM orders WHERE user_id = 1 AND status = 'active'
AND created_at > '2024-01-01'; -- 使用全部
-- ❌ 不能使用的查询
SELECT * FROM orders WHERE status = 'active'; -- 不使用索引
SELECT * FROM orders WHERE created_at > '2024-01-01'; -- 不使用索引
-- 选择性 = 不同值数量 / 总行数
-- 选择性越高(接近 1),索引效率越好
-- 查看列的选择性
SELECT
COUNT(DISTINCT email) AS distinct_count,
COUNT(*) AS total_count,
ROUND(COUNT(DISTINCT email) * 100.0 / COUNT(*), 2) AS selectivity_pct
FROM users;
-- 低选择性列不合适单独作为索引
-- 示例:status 列只有几个值(active, pending, deleted)
-- 选择性很低,B-Tree 索引效果差
-- 低选择性列的替代方案
-- 1. 部分索引
CREATE INDEX idx_active_users ON users(id) WHERE status = 'active';
-- 2. 复合索引(放在后面)
CREATE INDEX idx_users_status_created ON users(status, created_at);
-- 3. BRIN 索引(如果数据有序)
CREATE INDEX idx_users_status_brin ON users USING brin(status);
-- 模式1:等值+范围
CREATE INDEX idx_user_created ON orders(user_id, created_at);
-- 适用于:特定用户的时间范围查询
-- 模式2:多等值+排序
CREATE INDEX idx_user_status_date ON orders(user_id, status, created_at DESC);
-- 适用于:特定用户的特定状态,按时间倒序
-- 模式3:覆盖索引
CREATE INDEX idx_order_covering ON orders(user_id, status, created_at)
INCLUDE (total_amount, order_number);
-- 适用于:高频查询,避免回表
-- 模式4:部分复合索引
CREATE INDEX idx_active_user_orders ON orders(user_id, created_at)
WHERE status != 'cancelled';
-- 使用 EXPLAIN 分析索引使用情况
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT * FROM users WHERE email = 'alice@example.com';
-- 关键输出解读:
-- Index Scan: 使用了索引
-- Seq Scan: 全表扫描(可能需要优化)
-- Bitmap Index Scan + Bitmap Heap Scan: 组合索引扫描
-- Index Only Scan: 覆盖索引,无需回表
-- 查看索引扫描效率
SELECT
relname AS table_name,
indexrelname AS index_name,
idx_scan AS index_scans,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched
FROM pg_stat_user_indexes
WHERE idx_scan > 0
ORDER BY idx_tup_fetch / NULLIF(idx_scan, 0) DESC;
-- 查找从未使用的索引
SELECT
schemaname,
tablename,
indexname,
idx_scan,
pg_size_pretty(pg_relation_size(indexname::regclass)) AS size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY pg_relation_size(indexname::regclass) DESC;
-- 查找重复索引
SELECT
pg_get_indexdef(indexrelid) AS index_definition,
array_agg(indexrelid::regclass) AS duplicate_indexes
FROM pg_index
WHERE indrelid = 'users'::regclass
GROUP BY pg_get_indexdef(indexrelid)
HAVING COUNT(*) > 1;
-- 查找相似索引(部分重合)
SELECT
a.indexrelid::regclass AS index1,
b.indexrelid::regclass AS index2,
a.indkey AS columns1,
b.indkey AS columns2
FROM pg_index a, pg_index b
WHERE a.indrelid = b.indrelid
AND a.indexrelid < b.indexrelid
AND a.indkey @> b.indkey;
-- 计算索引命中率
SELECT
schemaname,
tablename,
idx_scan,
seq_scan,
ROUND(100.0 * idx_scan / NULLIF(idx_scan + seq_scan, 0), 2) AS index_usage_pct
FROM pg_stat_user_tables
WHERE idx_scan + seq_scan > 0
ORDER BY index_usage_pct;
-- 缓存命中率(与索引相关)
SELECT
'index hit rate' AS name,
SUM(idx_blks_hit) * 100.0 / NULLIF(SUM(idx_blks_hit) + SUM(idx_blks_read), 0) AS hit_ratio
FROM pg_statio_user_indexes;
-- 重建索引(消除膨胀)
REINDEX INDEX idx_users_email;
REINDEX TABLE users; -- 重建表上所有索引
REINDEX DATABASE mydb; -- 重建整个数据库的索引
-- 在线重建(不阻塞写入)
REINDEX INDEX CONCURRENTLY idx_users_email;
-- 分析索引统计信息
ANALYZE VERBOSE users;
-- 重置索引统计
SELECT pg_stat_reset();
SELECT pg_stat_reset_single_table_counters('users'::regclass);
-- 设置索引的填充因子(减少页分裂)
CREATE INDEX idx_fillfactor ON large_table(id) WITH (fillfactor = 90);
ALTER INDEX idx_fillfactor SET (fillfactor = 85);
-- 检测索引膨胀
CREATE EXTENSION IF NOT EXISTS pgstattuple;
SELECT
indexrelid::regclass AS index_name,
pg_size_pretty(pg_relation_size(indexrelid)) AS size,
version,
leaf_fragmentation AS fragmentation_pct,
dead_tuple_count AS dead_tuples
FROM pgstatindex('idx_users_email');
-- 查找需要重建的索引(膨胀率 > 20%)
SELECT
schemaname,
tablename,
indexname,
pg_size_pretty(pg_relation_size(indexname::regclass)) AS size,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
WHERE pg_relation_size(indexname::regclass) > 10 * 1024 * 1024 -- > 10MB
AND idx_scan < 100 -- 使用较少
ORDER BY pg_relation_size(indexname::regclass) DESC;
-- PostgreSQL 自身不提供自动索引推荐,但可以手动分析
-- 1. 分析慢查询中的过滤条件
-- 查看 pg_stat_statements 中最耗时的查询
SELECT
query,
calls,
mean_exec_time,
stddev_exec_time,
rows
FROM pg_stat_statements
WHERE query NOT LIKE '%pg_%'
ORDER BY mean_exec_time DESC
LIMIT 20;
-- 2. 分析缺少索引的表
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
idx_scan,
ROUND(seq_tup_read / NULLIF(seq_scan, 0)) AS avg_rows_per_seq_scan
FROM pg_stat_user_tables
WHERE seq_scan > 100
ORDER BY seq_scan DESC;
-- 3. 分析 WHERE 条件中的列
-- 需要开启 track_activities 和 track_counts
SELECT
schemaname,
tablename,
attname,
n_distinct,
correlation
FROM pg_stats
WHERE tablename = 'orders'
ORDER BY correlation;
-- 传统分页(OFFSET 效率低)
SELECT * FROM users ORDER BY id LIMIT 10 OFFSET 100000;
-- OFFSET 会扫描前 100010 行
-- 优化方案1:游标分页(Seek Method)
SELECT * FROM users
WHERE id > 100000
ORDER BY id LIMIT 10;
-- 优化方案2:延迟关联
SELECT u.*
FROM users u
INNER JOIN (
SELECT id FROM users
ORDER BY id
LIMIT 10 OFFSET 100000
) AS sub ON u.id = sub.id;
-- 需要对应的索引
CREATE INDEX idx_users_id ON users(id);
-- 时间戳分页
SELECT * FROM events
WHERE created_at < '2024-01-01 00:00:00'
ORDER BY created_at DESC LIMIT 20;
-- 需要索引:CREATE INDEX idx_events_created ON events(created_at);
-- 避免文件排序(filesort)
-- 创建与 ORDER BY 匹配的索引
-- ✅ 索引顺序与排序一致
CREATE INDEX idx_created_at ON orders(created_at);
SELECT * FROM orders ORDER BY created_at;
-- ✅ 多列排序
CREATE INDEX idx_user_date ON orders(user_id, created_at DESC);
SELECT * FROM orders WHERE user_id = 1 ORDER BY created_at DESC;
-- ❌ 无法使用索引的排序
CREATE INDEX idx_date ON orders(created_at);
SELECT * FROM orders ORDER BY created_at DESC; -- 可以使用反向扫描
SELECT * FROM orders ORDER BY created_at NULLS LAST; -- 需要匹配 NULLS 选项
-- 表达式排序
CREATE INDEX idx_lower_name ON users(LOWER(name));
SELECT * FROM users ORDER BY LOWER(name);
-- 全文搜索索引参数调优
CREATE INDEX idx_articles_search ON articles USING gin(search_vector)
WITH (fastupdate = off, gin_pending_list_limit = 8192);
-- 字典优化
CREATE TEXT SEARCH CONFIGURATION custom_english (COPY = english);
ALTER TEXT SEARCH CONFIGURATION custom_english
ALTER MAPPING FOR word, asciiword WITH english_stem, thesaurus_english;
-- 权重优化
CREATE INDEX idx_weighted_search ON articles USING gin(
setweight(to_tsvector('english', title), 'A') ||
setweight(to_tsvector('english', content), 'B')
);
-- tsvector 预计算(生成列)
ALTER TABLE articles ADD COLUMN search_vector tsvector
GENERATED ALWAYS AS (
setweight(to_tsvector('english', title), 'A') ||
setweight(to_tsvector('english', content), 'B')
) STORED;
-- JSONB 索引策略
-- 1. 默认 GIN 索引
CREATE INDEX idx_jsonb_gin ON products USING gin(product_data);
-- 2. 指定操作符的 GIN 索引(更小更快)
CREATE INDEX idx_jsonb_gin_ops ON products USING gin(product_data jsonb_path_ops);
-- 3. 针对特定路径的索引
CREATE INDEX idx_product_category ON products((product_data->>'category'));
CREATE INDEX idx_product_price ON products(((product_data->>'price')::NUMERIC));
-- 4. 表达式索引
CREATE INDEX idx_product_category_price ON products(
(product_data->>'category'),
((product_data->>'price')::NUMERIC)
);
-- JSONB 索引使用示例
EXPLAIN SELECT * FROM products
WHERE product_data @> '{"category": "Electronics", "price": 500}';
-- 使用 jsonb_path_ops 的查询
SELECT * FROM products
WHERE product_data @@ '$.category == "Electronics"';
-- ✅ 应该创建索引的场景
-- 1. WHERE 条件中频繁使用的列
-- 2. JOIN 的关联列(外键)
-- 3. ORDER BY 和 GROUP BY 的列
-- 4. 唯一性约束的列
-- 5. 覆盖索引用于高频查询
-- ❌ 不建议创建索引的场景
-- 1. 小表(< 1000 行)
-- 2. 频繁更新的列
-- 3. 低选择性的列(如 gender、status)
-- 4. 很少使用的查询
-- 5. BLOB/TEXT 长字段(除非使用全文搜索)
-- 索引创建决策矩阵
SELECT
'是否需要索引' AS question,
CASE
WHEN seq_scan > 1000 AND seq_tup_read / seq_scan > 100 THEN '是'
WHEN idx_scan > 0 AND idx_tup_fetch / idx_scan < 10 THEN '可能不需要'
ELSE '分析具体查询'
END AS suggestion
FROM pg_stat_user_tables
WHERE tablename = 'your_table';
-- 推荐命名规范
-- 普通索引:idx_表名_列名
CREATE INDEX idx_users_email ON users(email);
-- 唯一索引:uq_表名_列名
CREATE UNIQUE INDEX uq_users_email ON users(email);
-- 复合索引:idx_表名_列1_列2
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
-- 表达式索引:idx_表名_表达式
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- 部分索引:idx_表名_列名_条件
CREATE INDEX idx_active_users_email ON users(email) WHERE status = 'active';
-- 每个表的索引数量建议
-- 小表(< 10万行):最多 3-5 个索引
-- 中表(10万-1000万行):最多 5-8 个索引
-- 大表(>1000万行):最多 8-12 个索引
-- 查看表的索引数量
SELECT
tablename,
COUNT(*) AS index_count,
SUM(pg_relation_size(indexname::regclass)) AS total_index_size
FROM pg_indexes
GROUP BY tablename
ORDER BY index_count DESC;
-- 索引维护成本评估
SELECT
idx_scan AS reads,
pg_relation_size(indexrelid) AS size,
idx_tup_fetch AS rows_fetched,
idx_scan + idx_tup_fetch AS total_work
FROM pg_stat_user_indexes;
-- 安全的索引变更流程(零停机时间)
-- 步骤1:创建索引(CONCURRENTLY 不阻塞写入)
CREATE INDEX CONCURRENTLY idx_new_column ON large_table(new_column);
-- 步骤2:验证索引
SELECT * FROM pg_indexes WHERE indexname = 'idx_new_column';
-- 步骤3:测试查询性能
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM large_table WHERE new_column = 'value';
-- 步骤4:(可选)删除旧索引
DROP INDEX CONCURRENTLY IF EXISTS idx_old_column;
-- 批量索引创建
DO $$
DECLARE
idx_name TEXT;
BEGIN
FOR idx_name IN
SELECT indexname FROM pg_indexes
WHERE tablename = 'large_table' AND indexname NOT LIKE '%_concurrently'
LOOP
EXECUTE format('DROP INDEX CONCURRENTLY %I', idx_name);
EXECUTE format('CREATE INDEX CONCURRENTLY %I ON large_table USING btree(new_column)', idx_name);
END LOOP;
END;
$$ LANGUAGE plpgsql;