2026/2/22 7:12:46
网站建设
项目流程
嘉兴网站制作优化,品牌推广,广东做网站公司有哪些,2021东莞封城最新消息案例1、lazyagg#xff1a;延迟聚合运算目的#xff1a;消除子查询中的聚合运算。应用场景#xff1a;当子查询中有GROUP BY#xff0c;子查询中的表很大#xff0c;子查询与外面的表(比较小/过滤完之后数据量少)#xff0c;进行关联之后还有GROUP BY#xff0c;就可以开…案例1、lazyagg延迟聚合运算目的消除子查询中的聚合运算。应用场景当子查询中有GROUP BY子查询中的表很大子查询与外面的表(比较小/过滤完之后数据量少)进行关联之后还有GROUP BY就可以开启lazyagg特性加快SQL性能。准备工作建表插入数据语句-- 创建两个测试表 -- 销售订单表大表 CREATE TABLE sales_orders ( order_id SERIAL PRIMARY KEY, customer_id INT NOT NULL, product_id INT NOT NULL, quantity INT NOT NULL, unit_price DECIMAL(10,2) NOT NULL, order_date DATE NOT NULL, region VARCHAR(50) ); -- 客户信息表小表 CREATE TABLE customers ( customer_id INT PRIMARY KEY, customer_name VARCHAR(100) NOT NULL, customer_type VARCHAR(50) NOT NULL, registration_date DATE NOT NULL, country VARCHAR(50) ); -- 创建索引以优化查询性能 CREATE INDEX idx_sales_customer ON sales_orders(customer_id); CREATE INDEX idx_customers_type ON customers(customer_type); CREATE INDEX idx_sales_order_date ON sales_orders(order_date); -- 插入客户数据小表1000条记录 INSERT INTO customers (customer_id, customer_name, customer_type, registration_date, country) SELECT i, Customer_ || i, CASE WHEN i % 5 0 THEN VIP WHEN i % 5 1 THEN Regular WHEN i % 5 2 THEN Wholesale WHEN i % 5 3 THEN Enterprise ELSE Retail END, CURRENT_DATE - (i % 1000), CASE WHEN i % 3 0 THEN USA WHEN i % 3 1 THEN UK ELSE Germany END FROM generate_series(1, 1000) i; -- 插入销售订单数据大表500000条记录 INSERT INTO sales_orders (customer_id, product_id, quantity, unit_price, order_date, region) SELECT (random() * 9999)::int 1, -- 客户ID在1-10000之间 (random() * 999)::int 1, -- 产品ID在1-1000之间 (random() * 10)::int 1, -- 数量1-10 (random() * 1000)::decimal 50, -- 单价50-1050 CURRENT_DATE - (random() * 365)::int, -- 过去一年内的日期 CASE WHEN (random() * 100)::int 30 THEN North WHEN (random() * 100)::int 60 THEN South WHEN (random() * 100)::int 80 THEN East ELSE West END FROM generate_series(1, 500000); -- 收集统计信息 ANALYZE sales_orders; ANALYZE customers; --1、不开启lazyagg EXPLAIN ANALYZE SELECT c.customer_type, SUM(order_summary.total_sales) as total_revenue FROM customers c, (SELECT customer_id, SUM(quantity * unit_price) as total_sales FROM sales_orders GROUP BY customer_id) order_summary WHERE c.customer_id order_summary.customer_id AND c.customer_type VIP GROUP BY c.customer_type;-- 2、开启lazyagg优化 EXPLAIN ANALYZE SELECT /* set(rewrite_rule lazyagg) */ c.customer_type, SUM(order_summary.total_sales) as total_revenue FROM customers c, (SELECT customer_id, SUM(quantity * unit_price) as total_sales FROM sales_orders GROUP BY customer_id) order_summary WHERE c.customer_id order_summary.customer_id AND c.customer_type VIP GROUP BY c.customer_type;使用lazyagg注意点想要lazyagg查询改写规则生效必须满足三个条件1.子查询中有GROUP BY2.子查询与外面的表关联之后还有GROUP BY3. 能确保转换前后语句是等价的如果子查询与外面的表关联之后没有GROUP BYlazyagg查询改写规则不会生效这个时候请使用谓词推入如果子查询中的GROUP BY和外面的表关联之后GROUP BY无法消除为1个lazyagg查询改写规则不会生效请使用谓词推入如果子查询包含union请使用谓词推入lazyagg查询改写规则不会生效有union alllazyagg查询改写规则可以生效。总结外层查询有过滤条件过滤后数据量少,内层子查询需要聚合大表数据,最终结果只需要聚合少量数据延迟子查询的聚合先让子查询表与外表join避免了先聚合全部数据再连接的浪费,延迟聚合到连接之后只聚合需要的数据,减少了中间结果集的大小和内存使用从而提升性能。案例2、magicset主查询表中的where条件下推到子查询目的将主查询中的表放到where子查询多关联一次。应用场景当主查询的表通过谓词过滤后的数据放在子查询中能快速缩小子查询的数据量时。-- 创建两个测试表 -- 创建大表orders_big CREATE TABLE orders_big ( order_id SERIAL PRIMARY KEY, customer_id INT NOT NULL, order_date DATE NOT NULL, amount DECIMAL(10,2) NOT NULL, status VARCHAR(20) ); -- 创建小表customers_small CREATE TABLE customers_small ( customer_id INT PRIMARY KEY, customer_name VARCHAR(100) NOT NULL, customer_type VARCHAR(50), country VARCHAR(50), registration_date DATE ); INSERT INTO customers_small (customer_id, customer_name, customer_type, country, registration_date) SELECT i, Customer_ || i, CASE WHEN i % 20 0 THEN VIP ELSE Regular END, CASE WHEN i % 10 0 THEN USA WHEN i % 10 1 THEN UK ELSE Germany END, CURRENT_DATE - (i % 365 * 2) FROM generate_series(1, 1000) i; INSERT INTO orders_big (customer_id, order_date, amount, status) SELECT i, CURRENT_DATE - (random() * 365 * 3)::int, (random() * 1000) 100, CASE WHEN random() 0.8 THEN Completed ELSE Processing END FROM generate_series(1, 100000) i; -- 创建索引 CREATE INDEX idx_orders_big_customer_id ON orders_big(customer_id); CREATE INDEX idx_orders_big_date ON orders_big(customer_id, order_date DESC); CREATE INDEX idx_customers_type_country1 ON customers_small(customer_type, country); -- 收集统计信息 ANALYZE orders_big; ANALYZE customers_small; --1、不开启magicset EXPLAIN ANALYZE SELECT /* set(rewrite_rule none) */ o.order_id, o.amount, o.order_date, c.customer_name, c.customer_type FROM orders_big o JOIN customers_small c ON o.customer_id c.customer_id WHERE o.order_date ( SELECT MAX(o2.order_date) FROM orders_big o2 WHERE o2.customer_id c.customer_id ) AND c.customer_type VIP AND c.country USA;--2、开启magicset EXPLAIN ANALYZE SELECT /* set(rewrite_rule magicset) */ o.order_id, o.amount, o.order_date, c.customer_name, c.customer_type FROM orders_big o JOIN customers_small c ON o.customer_id c.customer_id WHERE o.order_date ( SELECT MAX(o2.order_date) FROM orders_big o2 WHERE o2.customer_id c.customer_id ) AND c.customer_type VIP AND c.country USA;-- 3、不开启magicset对子查询使用no_expand优化 EXPLAIN ANALYZE SELECT /* set(rewrite_rule none) */ o.order_id, o.amount, o.order_date, c.customer_name, c.customer_type FROM orders_big o JOIN customers_small c ON o.customer_id c.customer_id WHERE o.order_date ( SELECT /* no_expand */ MAX(o2.order_date) FROM orders_big o2 WHERE o2.customer_id c.customer_id ) AND c.customer_type VIP AND c.country USA;添加HINT /* no_expand */ 禁止子查询展开,从执行计划发现多了limit原sql没有limit原因是优化器对sql进行了改写将子查询SELECT MAX(o2.order_date) FROM orders_big o2 WHERE o2.customer_id c.customer_id;转换成了SELECT order_date FROM orders_big o2 WHERE o2.customer_id c.customer_id ORDER BY order_date DESC LIMIT 1;走了orders_big表上idx_orders_big_date索引。magicset相当于对原sql进行了等价改写在子查询中多join了一次customers表用来减少子查询的数据量。 SELECT o.*, c.customer_name FROM orders o JOIN customers c ON o.customer_id c.customer_id WHERE o.order_date ( SELECT MAX(o2.order_date) FROM orders o2 JOIN customers c2 ON o2.customer_id c2.customer_id -- 多关联一次 WHERE o2.customer_id c.customer_id AND c2.type VIP -- 下推的条件 AND c2.country USA -- 下推的条件 ) AND c.type VIP AND c.country USA;总结数据特征主查询过滤后结果集较大子查询表数据量大但推入条件选择性高能过滤掉大部分数据。计划变化执行计划从对子查询结果集进行全量扫描和连接如Hash Join变为将条件推入子查询内部提前进行高效过滤如使用索引。案例3、enable_sublink_pullup_enhanced 子查询上拉目的将子查询嵌套调整成与外表连接进行连接。应用场景主表数据量较大且连接前缺少高效过滤子查询结果集较小或子查询表在关联字段上有高效索引子查询聚合后的条件对主表的数据刷选比较好。CREATE TABLE products_large ( product_id SERIAL PRIMARY KEY, category_id INT NOT NULL, price DECIMAL(10,2) NOT NULL, stock_quantity INT NOT NULL, last_updated DATE NOT NULL, product_name VARCHAR(200), description TEXT ); CREATE TABLE categories_small ( category_id INT PRIMARY KEY, category_name VARCHAR(100) NOT NULL, min_price_threshold DECIMAL(10,2), max_discount_rate DECIMAL(5,2), is_active BOOLEAN ); -- 插入 categories_small 数据10000个分类 INSERT INTO categories_small (category_id, category_name, min_price_threshold, max_discount_rate, is_active) SELECT i, Category_ || i, random() * 1000, random() * 1000, CASE WHEN random() 0.8 THEN true ELSE false END FROM generate_series(1, 10000) i; INSERT INTO products_large (category_id, price, stock_quantity, last_updated, product_name, description) SELECT (random() * 1000)::int 1, (random() * 1000)::int 1, (random() * 1000)::int, CURRENT_DATE - (random() * 365 * 2)::int, Product_ || i, Description for product || i FROM generate_series(1, 1000000) i; -- 创建索引 CREATE INDEX idx_products_category ON products_large(category_id); CREATE INDEX idx_products_price ON products_large(price); CREATE INDEX idx_categories_active ON categories_small(is_active); -- 收集统计信息 ANALYZE products_large; ANALYZE categories_small; EXPLAIN ANALYZE SELECT /* set(rewrite_rule none) */ Pl.* FROM products_large pl WHERE pl.price ( SELECT max(cs2.min_price_threshold) FROM categories_small cs2 WHERE cs2.min_price_threshold );EXPLAIN ANALYZE SELECT /* set(rewrite_rule enable_sublink_pullup_enhanced) */ pl.* FROM products_large pl WHERE pl.price ( SELECT max(cs2.min_price_threshold) FROM categories_small cs2 WHERE cs2.min_price_threshold );总结此规则将为外层每一行执行一次子查询的模式转变为先执行子查询一次再与外表整体连接的模式并非总是有效如果子查询本身非常庞大且无法有效缩减例如需要全表扫描聚合上拉后强制进行 Hash Join 或 Nested Loop 的成本可能会高于原执行计划中高效的多次索引扫描SubPlan反而导致性能下降的典型案例有兴趣的话可以把上面的max改成min对比会发现enable_sublink_pullup_enhanced开启后性能反而会劣化不能和disable_pullup_expr_sublink一起使用。案例4、 intargetlist 将标量子查询改写为left join目的将 SELECT 目标列中的相关标量子查询即每行返回一个值的子查询改写为 LEFT JOIN其根本目的是避免为外层查询的每一行都重复执行一次子查询N次循环从而将执行复杂度从 O(N * M) 降为 O(M)实现性能的指数级提升。应用场景当外层结果集很大N大而内层表相对较小或聚合后结果集很小M小 时收益最显著。优化消除了N次循环只需一次扫描和连接。-- 1. 创建表 CREATE TABLE test_outer_large ( id BIGINT PRIMARY KEY, category TEXT, flag INT ); CREATE TABLE test_inner_small ( id BIGINT PRIMARY KEY, category TEXT UNIQUE, -- 假设类别是唯一的这样聚合后行数很少 data TEXT ); INSERT INTO test_inner_small SELECT i, CAT_ || i, md5(i::text) FROM generate_series(1, 100) i; INSERT INTO test_outer_large SELECT i, CAT_ || ((i % 100) 1), CASE WHEN i % 10 7 THEN 1 ELSE 999 END -- 让约70%的行7万行的 flag1 FROM generate_series(1, 100000) i; CREATE INDEX idx_inner_cat ON test_inner_small(category); ANALYZE test_outer_large; ANALYZE test_inner_small;EXPLAIN (ANALYZE, COSTS OFF, TIMING ON) SELECT /* set(rewrite_rule none) */ o.category, (SELECT COUNT(*) FROM test_inner_small i WHERE i.category o.category) FROM test_outer_large o WHERE o.flag 1;如下图所示如果外层结果集极小N很小而内层表极其庞大M巨大且关联列有高效索引时原始执行计划SubPlan可能更优。因为 SubPlan 会进行N次快速的索引扫描而改写后需要一次性全量扫描内层大表进行聚合成本可能远超N次小范围查询。总结标量子查询本质就是每查询一次主表子查询就要循环一次当主表数据量为N时子查询就要执行N次intargetlist消除了循环遍历仔细观察执行计划就会发现开启intargetlis时loops从主表的数据量N改变成1避免了循环引起的性能劣化。缺点把原来两张表都读到内存中进行HashAggregate。比较依赖work_mem内存大小如果work_mem无法存放大表的所有数据此时数据会落盘产生临时文件导致性能下降。