MySQL聚合查询与联合查询及优化实践
聚合查询的核心技术与应用场景
1. 基础聚合函数实践
五个核心函数构成数据处理基石:
-- 统计注册用户总数
SELECT COUNT(*) AS total_users FROM users WHERE status=1;
-- 计算商品销售总额
SELECT SUM(price * quantity) AS total_sales FROM order_details;
-- 获取最高客单价记录
SELECT MAX(total_amount) AS max_order FROM orders;
-- 分析用户年龄分布
SELECT AVG(age) AS avg_age, MIN(age) AS youngest, MAX(age) AS oldest FROM members;
 
  分组统计时常见的陷阱:
-- 错误示例:未聚合字段直接查询
SELECT department, employee_name, AVG(salary) 
FROM employees
GROUP BY department;
-- 正确做法(MySQL 8.0+)
SELECT department, 
       ANY_VALUE(employee_name) AS sample_name,
       AVG(salary) AS avg_salary
FROM employees
GROUP BY department;
 
  2. 多维度分组技巧
WITH ROLLUP实现数据透视:
SELECT YEAR(order_date) AS order_year,
       MONTH(order_date) AS order_month,
       product_category,
       SUM(amount) AS total_sales
FROM sales_records
GROUP BY order_year, order_month, product_category
WITH ROLLUP;
 
  分级统计结果示例:
+------------+-------------+------------------+-------------+
| order_year | order_month | product_category | total_sales |
+------------+-------------+------------------+-------------+
| 2023       | 01          | Electronics      | 150000      |
| 2023       | 01          | Furniture        | 89000       |
| 2023       | 01          | NULL             | 239000      | -- 月度小计
| 2023       | NULL        | NULL             | 750000      | -- 年度合计
| NULL       | NULL        | NULL             | 1850000     | -- 全局总计
+------------+-------------+------------------+-------------+
 
  3. 条件筛选的进阶用法
HAVING与WHERE的差异对比:
-- 筛选总销售额超过10万的部门
SELECT department_id, SUM(sales) AS total
FROM monthly_sales
WHERE quarter = 2  -- 先过滤季度
GROUP BY department_id
HAVING total > 100000  -- 后筛选分组结果
ORDER BY total DESC;
 
  复杂条件组合示例:
SELECT product_id,
       COUNT(*) AS order_count,
       AVG(quantity) AS avg_qty
FROM order_items
WHERE discount_rate < 0.2  -- 基础筛选
GROUP BY product_id
HAVING order_count > 50 AND avg_qty >= 3  -- 聚合后筛选
       AND MAX(quantity) < 10;  -- 混合非聚合条件
 
  联合查询的深度解析
1. 表连接类型全解析
可视化连接示意图:
[INNER JOIN]        [LEFT JOIN]         [RIGHT JOIN]        [FULL JOIN]
 A   B              A   B               A   B               A   B
●-∩-●             ●--∩-●              ●-∩--●             ●---∩---●
 
  实际业务场景应用:
-- 获取客户完整订单信息
SELECT c.customer_id, c.name, o.order_date, o.total_amount
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_date BETWEEN '2023-01-01' AND '2023-12-31'
   OR o.order_id IS NULL;  -- 包含未下单客户
-- 多表级联查询
SELECT e.emp_name, d.dept_name, p.project_name
FROM employees e
INNER JOIN departments d ON e.dept_id = d.dept_id
LEFT JOIN projects p ON e.emp_id = p.leader_id;
 
  2. 集合操作的特殊场景
UNION去重机制示例:
-- 合并不同来源的客户数据
(SELECT name, phone FROM web_contacts WHERE reg_date > '2023-01-01')
UNION
(SELECT contact_name, mobile FROM offline_clients WHERE status=1)
ORDER BY name;
 
  UNION ALL性能优化案例:
-- 合并日分区表数据(已知无重复)
SELECT * FROM sales_2023_q1
UNION ALL
SELECT * FROM sales_2023_q2
UNION ALL
SELECT * FROM sales_2023_q3;
 
  3. 子查询的进阶模式
相关子查询示例:
-- 查找各部门薪资高于平均的员工
SELECT e1.emp_name, e1.salary, e1.department
FROM employees e1
WHERE salary > (
    SELECT AVG(e2.salary)
    FROM employees e2
    WHERE e2.department = e1.department
);
 
  FROM子句中的派生表:
-- 计算产品月销售额排名
SELECT month, product_id, monthly_sales,
       RANK() OVER (PARTITION BY month ORDER BY monthly_sales DESC) AS sales_rank
FROM (
    SELECT DATE_FORMAT(order_date, '%Y-%m') AS month,
           product_id,
           SUM(quantity * price) AS monthly_sales
    FROM orders
    GROUP BY month, product_id
) AS sales_data;
 
  性能优化关键策略
1. 索引优化实战
复合索引设计示例:
-- 为分组查询优化索引
ALTER TABLE sales_records
ADD INDEX idx_region_category (sales_region, product_category);
-- 排序优化索引
CREATE INDEX idx_order_date_amount ON orders(order_date DESC, total_amount);
 
  索引失效的典型场景:
-- 函数处理导致索引失效
SELECT * FROM users WHERE YEAR(created_at) = 2023;  -- 坏查询
-- 优化后版本
SELECT * FROM users 
WHERE created_at BETWEEN '2023-01-01' AND '2023-12-31';
 
  2. 执行计划分析方法
EXPLAIN输出解读要点:
+----+-------------+-------+------------+------+---------------+-----+---------+------+------+----------+-----------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref  | rows | filtered | Extra           |
+----+-------------+-------+------------+------+---------------+-----+---------+------+------+----------+-----------------+
| 1  | SIMPLE      | orders| NULL       | ref  | idx_customer  | idx_customer | 5    | const| 100  | 100.00   | Using index     |
+----+-------------+-------+------------+------+---------------+-----+---------+------+------+----------+-----------------+
 
  关键指标解析:
- type列:system > const > eq_ref > ref > range > index > ALL
 - Extra列:Using filesort(需要优化), Using temporary(可能创建临时表)
 - rows列:预估扫描行数
 
3. 查询重写技巧
优化分组查询示例:
-- 原始低效查询
SELECT department, COUNT(*) 
FROM employees
GROUP BY department
HAVING COUNT(*) > 100;
-- 优化版本(提前过滤)
SELECT department, emp_count
FROM (
    SELECT department, COUNT(*) AS emp_count
    FROM employees
    GROUP BY department
) AS dept_stats
WHERE emp_count > 100;
 
  连接顺序优化原则:
-- 小表驱动大表原则
SELECT /*+ STRAIGHT_JOIN */ s.*, p.product_name
FROM (SELECT * FROM sales WHERE date > '2023-01-01') s  -- 小结果集
JOIN products p ON s.product_id = p.id;  -- 大表
 
  典型业务场景解决方案
1. 分层统计报表
多级汇总实现:
SELECT 
    IF(GROUPING(region), '所有大区', region) AS region,
    IF(GROUPING(city), '全区汇总', city) AS city,
    SUM(sales) AS total_sales
FROM sales_data
GROUP BY region, city WITH ROLLUP;
 
  2. 用户行为分析
留存率计算示例:
SELECT 
    a.register_date,
    COUNT(DISTINCT a.user_id) AS new_users,
    COUNT(DISTINCT b.user_id) / COUNT(DISTINCT a.user_id) AS day1_retention
FROM registrations a
LEFT JOIN login_events b 
    ON a.user_id = b.user_id
    AND b.login_date = a.register_date + INTERVAL 1 DAY
GROUP BY a.register_date;
 
  3. 库存管理系统
实时库存查询:
SELECT 
    p.product_id,
    p.product_name,
    p.initial_stock + IFNULL(SUM(s.stock_change), 0) AS current_stock
FROM products p
LEFT JOIN stock_logs s ON p.product_id = s.product_id
GROUP BY p.product_id
HAVING current_stock < p.minimum_stock;
 
  常见问题排查指南
1. 分组字段选择错误
典型错误示例:
-- 错误:选择了非聚合字段
SELECT department, employee_name, AVG(salary)
FROM employees
GROUP BY department;
-- 修正方案
SELECT 
    department, 
    GROUP_CONCAT(employee_name) AS members,
    AVG(salary) AS avg_salary
FROM employees
GROUP BY department;
 
  2. NULL值处理
聚合函数中的NULL行为:
-- COUNT的不同表现
SELECT 
    COUNT(*) AS total_rows,  -- 包含NULL
    COUNT(email) AS valid_emails,  -- 忽略NULL
    COUNT(DISTINCT phone) AS unique_phones
FROM contacts;
 
  3. 性能断崖式下降
大数据量处理方案:
-- 分页优化技巧
SELECT product_id, SUM(sales)
FROM big_sales_table
WHERE sale_date BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY product_id
ORDER BY SUM(sales) DESC
LIMIT 10000, 20;
-- 优化为基于游标的分页
SELECT product_id, SUM(sales) AS total
FROM big_sales_table
WHERE sale_date BETWEEN '2023-01-01' AND '2023-12-31'
  AND product_id > ?last_product_id
GROUP BY product_id
ORDER BY product_id
LIMIT 20;
 
 
                        正文到此结束
                        
                        
                    相关文章
热门推荐
评论插件初始化中...