今天实践下mysql百万级数据分区的影响,首先是产生百万级别的数据量

//创建带分区的数据表
CREATE TABLE `part_person` (
  `id` bigint(20) unsigned NOT NULL,
  `username` varchar(100) NOT NULL,
  `born` date NOT NULL DEFAULT '1970-01-01',
  `sex` tinyint(1) unsigned NOT NULL,
  PRIMARY KEY (`id`,`born`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8
PARTITION BY RANGE (year(born))
(PARTITION p0 VALUES LESS THAN (1980) ENGINE = MyISAM,
 PARTITION p1 VALUES LESS THAN (1990) ENGINE = MyISAM,
 PARTITION p2 VALUES LESS THAN (2000) ENGINE = MyISAM,
 PARTITION p3 VALUES LESS THAN (2010) ENGINE = MyISAM,
 PARTITION p4 VALUES LESS THAN (2020) ENGINE = MyISAM,
 PARTITION p5 VALUES LESS THAN MAXVALUE ENGINE = MyISAM);

//创建不带分区的数据表
CREATE TABLE `no_part_person` (
  `id` bigint(20) unsigned NOT NULL,
  `username` varchar(100) NOT NULL,
  `born` date NOT NULL DEFAULT '1970-01-01',
  `sex` tinyint(1) unsigned NOT NULL,
  PRIMARY KEY (`id`,`born`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;

//填充数据,创建procedure向数据表插入数据
CREATE PROCEDURE `part_generate`(IN num INT)  
BEGIN     
    DECLARE char_str varchar(100) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
    DECLARE username VARCHAR(25) DEFAULT '';    
    DECLARE id int UNSIGNED;  
    DECLARE len int;  
    set id=1;  
    DELETE from person;  
    WHILE id <= num DO  
        set len = FLOOR(1 + RAND()*25);  
        set username = '';  
        WHILE len > 0 DO  
            SET username = CONCAT(username,substring(char_str,FLOOR(1 + RAND()*62),1));  
            SET len = len - 1;  
        END WHILE;   
        INSERT into part_person VALUES (id,username, ADDDATE('1970-01-01',INTERVAL RAND()*365*60 DAY), FLOOR(RAND()*2));  
        set id = id + 1;  
    END WHILE;  
END

//执行procedure插入600万数据
call part_generate(6000000)

//向未分区表插入数据 
insert into no_part_person select * from part_person; 

现在有了数据,对比一下有没有分区对查询的影响
图片描述图片描述
查询不是按照该列分区的数据时分区反而更慢一些,查询born数据时不跨区时分区效果提升显著,当数据跨区时提升效果没那么显著,但也有提升。


爱种地的码农
28 声望8 粉丝