本文介绍Clickhouse的MergeTree家族的SummingMergeTree使用方法,并给出一个操作的实际样例。
注意:SummingMergeTree 不再保留原始数据,而是将相同键值的数据值做sum合并。
目录导航
语法
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = SummingMergeTree([columns])
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
columns
参与SUM的列的名字,不能包含主键列。
样例
建表
我们采用了id,id2,id3 三个列做维度, v1和v2两个列做数据列做sum。在SummingMergeTree中,如果不指定聚合列,则默认所有不在排序列的的(文档上说主键列,但前面也写了,更准确的讲是排序列)。
localhost :) create table tt5(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree() order by (id,id2,id3) primary key id;
CREATE TABLE tt5
(
`id` int,
`id2` int,
`id3` int,
`v1` decimal(16, 3),
`v2` decimal(16, 3)
)
ENGINE = SummingMergeTree
PRIMARY KEY id
ORDER BY (id, id2, id3)
Query id: f9e3490a-a992-4882-b6af-654cd375a39f
Ok.
0 rows in set. Elapsed: 0.027 sec.
localhost :) show create table tt5;
SHOW CREATE TABLE tt5
Query id: 0726fa7c-a23f-4526-9c20-91d1d5b67a97
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE testdb.tt5
(
`id` Int32,
`id2` Int32,
`id3` Int32,
`v1` Decimal(16, 3),
`v2` Decimal(16, 3)
)
ENGINE = SummingMergeTree
PRIMARY KEY id
ORDER BY (id, id2, id3)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.070 sec.
插入数据
插入2行数据,并计算下sum值的SQL。 select id,sum(v1),sum(v2) from tt5 group by id;
localhost :) insert into tt5 values(1,2,3,2.222,3.333);
INSERT INTO tt5 VALUES
Query id: 2b3d3776-620b-4016-8bc7-3e33a2081f99
Ok.
1 rows in set. Elapsed: 0.003 sec.
localhost :) select * from tt5;
SELECT *
FROM tt5
Query id: a4884598-a58b-415c-ab8d-6d162449c548
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 3 │ 2.222 │ 3.333 │
└────┴─────┴─────┴───────┴───────┘
1 rows in set. Elapsed: 0.028 sec.
localhost :) insert into tt5 values(1,2,3,1.234,2.345);
INSERT INTO tt5 VALUES
Query id: 79b07f94-4c4a-4a3d-aae1-55aa8a3370e1
Ok.
1 rows in set. Elapsed: 0.005 sec.
localhost :) select * from tt5;
SELECT *
FROM tt5
Query id: e1f65519-b2d4-45eb-99ed-aa62fa3be1fe
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 3 │ 2.222 │ 3.333 │
└────┴─────┴─────┴───────┴───────┘
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 3 │ 1.234 │ 2.345 │
└────┴─────┴─────┴───────┴───────┘
2 rows in set. Elapsed: 0.147 sec.
localhost :) select id,sum(v1),sum(v2) from tt5 group by id;
SELECT
id,
sum(v1),
sum(v2)
FROM tt5
GROUP BY id
Query id: b1bb39e0-3ccf-4c30-9f70-1b573da63309
┌─id─┬─sum(v1)─┬─sum(v2)─┐
│ 1 │ 3.456 │ 5.678 │
└────┴─────────┴─────────┘
1 rows in set. Elapsed: 0.005 sec.
做一次合并 optimize
可以看到v1,v2的数据被合并成了sum后的值,原始数据值已经不存在了。
localhost :) optimize table tt5;
OPTIMIZE TABLE tt5
Query id: e1833b2e-33f4-4730-b91e-a6a7e64553a6
Ok.
0 rows in set. Elapsed: 0.003 sec.
localhost :) select * from tt5;
SELECT *
FROM tt5
Query id: 8f4fc4fd-7434-485a-be2e-ba65f9d0162f
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 3 │ 3.456 │ 5.678 │
└────┴─────┴─────┴───────┴───────┘
1 rows in set. Elapsed: 0.005 sec.
再次插入些不同的id的数据
localhost :) insert into tt5 values(1,2,4,3.333,4.444);
INSERT INTO tt5 VALUES
Query id: 1591174e-34a8-4c34-ab2e-060cf4d97923
Ok.
1 rows in set. Elapsed: 0.018 sec.
localhost :) select * from tt5;
SELECT *
FROM tt5
Query id: e0f64cfa-eab7-4eab-93e8-95214ec61fef
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 3 │ 3.456 │ 5.678 │
└────┴─────┴─────┴───────┴───────┘
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘
2 rows in set. Elapsed: 0.024 sec.
localhost :) optimize table tt5;
OPTIMIZE TABLE tt5
Query id: 13a92e03-9b6c-49d5-9a99-e871eb57186c
Ok.
0 rows in set. Elapsed: 0.005 sec.
localhost :) select * from tt5;
SELECT *
FROM tt5
Query id: 519763fb-1afd-45b2-9341-772b78f418c6
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 3 │ 3.456 │ 5.678 │
│ 1 │ 2 │ 4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘
2 rows in set. Elapsed: 0.005 sec.
localhost :) insert into tt5 values(2,2,4,3.333,4.444);
INSERT INTO tt5 VALUES
Query id: b36c19b4-6705-4628-a9ce-28764c87f4cd
Ok.
1 rows in set. Elapsed: 0.006 sec.
localhost :) insert into tt5 values(3,2,4,3.333,4.444);
INSERT INTO tt5 VALUES
Query id: 8ba89964-b261-45f2-8c9b-f36d7bfca3a9
Ok.
1 rows in set. Elapsed: 0.051 sec.
localhost :) select * from tt5;
SELECT *
FROM tt5
Query id: 7c7856d0-92cb-4008-93e1-683be7538d0c
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 3 │ 3.456 │ 5.678 │
│ 1 │ 2 │ 4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 2 │ 2 │ 4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 3 │ 2 │ 4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘
4 rows in set. Elapsed: 0.005 sec.
再次合并
localhost :) optimize table tt5;
OPTIMIZE TABLE tt5
Query id: 59c755af-be53-4e8a-88d3-006ad2d29295
Ok.
0 rows in set. Elapsed: 0.004 sec.
localhost :) select * from tt5;
SELECT *
FROM tt5
Query id: 3ee1b7e7-4f6c-4b69-a07e-67d8c8a2b99b
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│ 1 │ 2 │ 3 │ 3.456 │ 5.678 │
│ 1 │ 2 │ 4 │ 3.333 │ 4.444 │
│ 2 │ 2 │ 4 │ 3.333 │ 4.444 │
│ 3 │ 2 │ 4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘
4 rows in set. Elapsed: 0.005 sec.
在做sum查询
等于直接从预处理结果拿数据了,原始数据再多也不影响,都已经sum聚合好了。
localhost :) select id,sum(v1),sum(v2) from tt5 group by id;
SELECT
id,
sum(v1),
sum(v2)
FROM tt5
GROUP BY id
Query id: 4e231f56-1f42-45b6-9f01-78194d1b160a
┌─id─┬─sum(v1)─┬─sum(v2)─┐
│ 3 │ 3.333 │ 4.444 │
│ 2 │ 3.333 │ 4.444 │
│ 1 │ 6.789 │ 10.122 │
└────┴─────────┴─────────┘
3 rows in set. Elapsed: 0.015 sec.
其它样例
指定聚合列columns
localhost :) create table tt6(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree((v1,v2)) order by (id,id2,id3) primary key id;
CREATE TABLE tt6
(
`id` int,
`id2` int,
`id3` int,
`v1` decimal(16, 3),
`v2` decimal(16, 3)
)
ENGINE = SummingMergeTree((v1, v2))
PRIMARY KEY id
ORDER BY (id, id2, id3)
Query id: 2a3811ba-32c3-48b8-87f4-c77399ca71f0
Ok.
0 rows in set. Elapsed: 0.021 sec.
localhost :)
指定包含了order 列的情况
从实际效果看,并不影响,后面数据的聚合,依然是以order 的key是维度,其它的是数值为准。
localhost :) create table tt6(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree((id2,id3,v1,v2)) order by (id,id2,id3) primary key id;
CREATE TABLE tt6
(
`id` int,
`id2` int,
`id3` int,
`v1` decimal(16, 3),
`v2` decimal(16, 3)
)
ENGINE = SummingMergeTree((id2, id3, v1, v2))
PRIMARY KEY id
ORDER BY (id, id2, id3)
Query id: 2a3811ba-32c3-48b8-87f4-c77399ca71f0
Ok.
0 rows in set. Elapsed: 0.021 sec.
localhost :)
指定了包含主键列的情况
从结果看,同样无影响,并没有报错。数据依然是order 的键没有sum合并。
localhost :) create table tt7(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree((id,id2,id3,v1,v2)) order by (id,id2,id3) primary key id;
CREATE TABLE tt7
(
`id` int,
`id2` int,
`id3` int,
`v1` decimal(16, 3),
`v2` decimal(16, 3)
)
ENGINE = SummingMergeTree((id, id2, id3, v1, v2))
PRIMARY KEY id
ORDER BY (id, id2, id3)
Query id: 8e9ea6dc-2c06-4ee7-b9aa-72fbefee102a
Ok.
0 rows in set. Elapsed: 0.041 sec.
localhost :) insert into tt7 values(1,2,4,33,44);
INSERT INTO tt7 VALUES
Query id: c4c9166d-1596-40fc-ae15-18799bfd6594
Ok.
1 rows in set. Elapsed: 0.004 sec.
localhost :) insert into tt7 values(1,2,3,11,22);
INSERT INTO tt7 VALUES
Query id: ee2d5f5b-6176-44ad-914a-80683949579d
Ok.
1 rows in set. Elapsed: 0.006 sec.
localhost :) optimize table tt7;
OPTIMIZE TABLE tt7
Query id: 60d6a958-53c5-469d-ab0a-ceb08c5509f7
Ok.
0 rows in set. Elapsed: 0.004 sec.
localhost :) select * from tt7;
SELECT *
FROM tt7
Query id: 7284e429-7f19-4d86-b0e6-7a46080cce80
┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│ 1 │ 2 │ 3 │ 11.000 │ 22.000 │
│ 1 │ 2 │ 4 │ 33.000 │ 44.000 │
└────┴─────┴─────┴────────┴────────┘
2 rows in set. Elapsed: 0.005 sec.
localhost :) insert into tt7 values(1,2,4,55,66);
INSERT INTO tt7 VALUES
Query id: e34029b1-d933-419c-abc9-4cee5daa7737
Ok.
1 rows in set. Elapsed: 0.004 sec.
localhost :) optimize table tt7;
OPTIMIZE TABLE tt7
Query id: b8be951b-7748-4c92-9039-aba1eec45637
Ok.
0 rows in set. Elapsed: 0.003 sec.
localhost :) select * from tt7;
SELECT *
FROM tt7
Query id: 95c4c386-e3c6-49c6-97dc-0d25f7d1805c
┌─id─┬─id2─┬─id3─┬─────v1─┬──────v2─┐
│ 1 │ 2 │ 3 │ 11.000 │ 22.000 │
│ 1 │ 2 │ 4 │ 88.000 │ 110.000 │
└────┴─────┴─────┴────────┴─────────┘
2 rows in set. Elapsed: 0.006 sec.
指定部分数值列的情况
从结果看,确实只有指定的列参与了sum, 其它的保持了某个原始值。如下例子中,v2列参与了合并,而 v1列没有。
localhost :) create table tt8(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree((v2)) order by (id,id2,id3) primary key id;
CREATE TABLE tt8
(
`id` int,
`id2` int,
`id3` int,
`v1` decimal(16, 3),
`v2` decimal(16, 3)
)
ENGINE = SummingMergeTree(v2)
PRIMARY KEY id
ORDER BY (id, id2, id3)
Query id: f4cd6e32-c4ad-4ca0-bcbb-b190bb11f051
Ok.
0 rows in set. Elapsed: 0.040 sec.
localhost :) show create table tt8;
SHOW CREATE TABLE tt8
Query id: 4217919a-6675-490b-a5a5-091eb6b4b8af
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE testdb.tt8
(
`id` Int32,
`id2` Int32,
`id3` Int32,
`v1` Decimal(16, 3),
`v2` Decimal(16, 3)
)
ENGINE = SummingMergeTree(v2)
PRIMARY KEY id
ORDER BY (id, id2, id3)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.004 sec.
localhost :) insert into tt8 values(1,2,3,11,22);
INSERT INTO tt8 VALUES
Query id: 8b2a0673-a29a-470e-8a34-5e86faebde3f
Ok.
1 rows in set. Elapsed: 0.003 sec.
localhost :) insert into tt8 values(1,2,4,33,44);
INSERT INTO tt8 VALUES
Query id: 9df38744-8e59-4ac2-93dc-6d00c6ac54fa
Ok.
1 rows in set. Elapsed: 0.003 sec.
localhost :) select * from tt8;
SELECT *
FROM tt8
Query id: 9d94a70b-99c9-4c7a-ac8a-3643f6c1ae8d
┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│ 1 │ 2 │ 3 │ 11.000 │ 22.000 │
└────┴─────┴─────┴────────┴────────┘
┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│ 1 │ 2 │ 4 │ 33.000 │ 44.000 │
└────┴─────┴─────┴────────┴────────┘
2 rows in set. Elapsed: 0.003 sec.
localhost :) optimize table tt8;
OPTIMIZE TABLE tt8
Query id: 471eba62-97f8-408f-993c-d28cd3f1cc5c
Ok.
0 rows in set. Elapsed: 0.003 sec.
localhost :) select * from tt8;
SELECT *
FROM tt8
Query id: c6143c05-0d28-4d48-a458-866cb1f314dd
┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│ 1 │ 2 │ 3 │ 11.000 │ 22.000 │
│ 1 │ 2 │ 4 │ 33.000 │ 44.000 │
└────┴─────┴─────┴────────┴────────┘
2 rows in set. Elapsed: 0.003 sec.
localhost :) insert into tt8 values(1,2,4,55,66);
INSERT INTO tt8 VALUES
Query id: 4097047f-c6b1-4782-993a-208b71097647
Ok.
1 rows in set. Elapsed: 0.003 sec.
localhost :) select * from tt8;
SELECT *
FROM tt8
Query id: 8eec66a5-b032-48c0-ac84-9df5452cbfa4
┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│ 1 │ 2 │ 3 │ 11.000 │ 22.000 │
│ 1 │ 2 │ 4 │ 33.000 │ 44.000 │
└────┴─────┴─────┴────────┴────────┘
┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│ 1 │ 2 │ 4 │ 55.000 │ 66.000 │
└────┴─────┴─────┴────────┴────────┘
3 rows in set. Elapsed: 0.003 sec.
localhost :) optimize table tt8;
OPTIMIZE TABLE tt8
Query id: 741b9e91-010f-4f6d-a758-eda6731ca37e
Ok.
0 rows in set. Elapsed: 0.003 sec.
localhost :) select * from tt8;
SELECT *
FROM tt8
Query id: 317c904d-a67b-49e6-857b-4703bbd22696
┌─id─┬─id2─┬─id3─┬─────v1─┬──────v2─┐
│ 1 │ 2 │ 3 │ 11.000 │ 22.000 │
│ 1 │ 2 │ 4 │ 33.000 │ 110.000 │
└────┴─────┴─────┴────────┴─────────┘
2 rows in set. Elapsed: 0.011 sec.