Clickhouse 的SummingMergeTree使用样例

本文介绍Clickhouse的MergeTree家族的SummingMergeTree使用方法,并给出一个操作的实际样例。

注意:SummingMergeTree 不再保留原始数据,而是将相同键值的数据值做sum合并。

语法

CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
    ...
) ENGINE = SummingMergeTree([columns])
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]

columns

参与SUM的列的名字,不能包含主键列。

样例

建表

我们采用了id,id2,id3 三个列做维度, v1和v2两个列做数据列做sum。在SummingMergeTree中,如果不指定聚合列,则默认所有不在排序列的的(文档上说主键列,但前面也写了,更准确的讲是排序列)。

localhost :) create table tt5(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree() order by (id,id2,id3) primary key id;

CREATE TABLE tt5
(
    `id` int,
    `id2` int,
    `id3` int,
    `v1` decimal(16, 3),
    `v2` decimal(16, 3)
)
ENGINE = SummingMergeTree
PRIMARY KEY id
ORDER BY (id, id2, id3)

Query id: f9e3490a-a992-4882-b6af-654cd375a39f

Ok.

0 rows in set. Elapsed: 0.027 sec.

localhost :) show create table tt5;

SHOW CREATE TABLE tt5

Query id: 0726fa7c-a23f-4526-9c20-91d1d5b67a97

┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE testdb.tt5
(
    `id` Int32,
    `id2` Int32,
    `id3` Int32,
    `v1` Decimal(16, 3),
    `v2` Decimal(16, 3)
)
ENGINE = SummingMergeTree
PRIMARY KEY id
ORDER BY (id, id2, id3)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

1 rows in set. Elapsed: 0.070 sec.

插入数据

插入2行数据,并计算下sum值的SQL。 select id,sum(v1),sum(v2) from tt5 group by id;

localhost :) insert into tt5 values(1,2,3,2.222,3.333);

INSERT INTO tt5 VALUES

Query id: 2b3d3776-620b-4016-8bc7-3e33a2081f99

Ok.

1 rows in set. Elapsed: 0.003 sec.

localhost :) select * from tt5;

SELECT *
FROM tt5

Query id: a4884598-a58b-415c-ab8d-6d162449c548

┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   3 │ 2.222 │ 3.333 │
└────┴─────┴─────┴───────┴───────┘

1 rows in set. Elapsed: 0.028 sec.

localhost :) insert into tt5 values(1,2,3,1.234,2.345);

INSERT INTO tt5 VALUES

Query id: 79b07f94-4c4a-4a3d-aae1-55aa8a3370e1

Ok.

1 rows in set. Elapsed: 0.005 sec.

localhost :) select * from tt5;

SELECT *
FROM tt5

Query id: e1f65519-b2d4-45eb-99ed-aa62fa3be1fe

┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   3 │ 2.222 │ 3.333 │
└────┴─────┴─────┴───────┴───────┘
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   3 │ 1.234 │ 2.345 │
└────┴─────┴─────┴───────┴───────┘

2 rows in set. Elapsed: 0.147 sec.

localhost :) select id,sum(v1),sum(v2) from tt5 group by id;

SELECT
    id,
    sum(v1),
    sum(v2)
FROM tt5
GROUP BY id

Query id: b1bb39e0-3ccf-4c30-9f70-1b573da63309

┌─id─┬─sum(v1)─┬─sum(v2)─┐
│  1 │   3.456 │   5.678 │
└────┴─────────┴─────────┘

1 rows in set. Elapsed: 0.005 sec.

做一次合并 optimize

可以看到v1,v2的数据被合并成了sum后的值,原始数据值已经不存在了。

localhost :) optimize table tt5;

OPTIMIZE TABLE tt5

Query id: e1833b2e-33f4-4730-b91e-a6a7e64553a6

Ok.

0 rows in set. Elapsed: 0.003 sec.

localhost :) select * from tt5;

SELECT *
FROM tt5

Query id: 8f4fc4fd-7434-485a-be2e-ba65f9d0162f

┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   3 │ 3.456 │ 5.678 │
└────┴─────┴─────┴───────┴───────┘

1 rows in set. Elapsed: 0.005 sec.

再次插入些不同的id的数据

localhost :) insert into tt5 values(1,2,4,3.333,4.444);

INSERT INTO tt5 VALUES

Query id: 1591174e-34a8-4c34-ab2e-060cf4d97923

Ok.

1 rows in set. Elapsed: 0.018 sec.

localhost :) select * from tt5;

SELECT *
FROM tt5

Query id: e0f64cfa-eab7-4eab-93e8-95214ec61fef

┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   3 │ 3.456 │ 5.678 │
└────┴─────┴─────┴───────┴───────┘
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘

2 rows in set. Elapsed: 0.024 sec.

localhost :) optimize table tt5;

OPTIMIZE TABLE tt5

Query id: 13a92e03-9b6c-49d5-9a99-e871eb57186c

Ok.

0 rows in set. Elapsed: 0.005 sec.

localhost :) select * from tt5;

SELECT *
FROM tt5

Query id: 519763fb-1afd-45b2-9341-772b78f418c6

┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   3 │ 3.456 │ 5.678 │
│  1 │   2 │   4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘

2 rows in set. Elapsed: 0.005 sec.

localhost :) insert into tt5 values(2,2,4,3.333,4.444);

INSERT INTO tt5 VALUES

Query id: b36c19b4-6705-4628-a9ce-28764c87f4cd

Ok.

1 rows in set. Elapsed: 0.006 sec.

localhost :) insert into tt5 values(3,2,4,3.333,4.444);

INSERT INTO tt5 VALUES

Query id: 8ba89964-b261-45f2-8c9b-f36d7bfca3a9

Ok.

1 rows in set. Elapsed: 0.051 sec.

localhost :) select * from tt5;

SELECT *
FROM tt5

Query id: 7c7856d0-92cb-4008-93e1-683be7538d0c

┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   3 │ 3.456 │ 5.678 │
│  1 │   2 │   4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  2 │   2 │   4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘
┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  3 │   2 │   4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘

4 rows in set. Elapsed: 0.005 sec.

再次合并

localhost :) optimize table tt5;

OPTIMIZE TABLE tt5

Query id: 59c755af-be53-4e8a-88d3-006ad2d29295

Ok.

0 rows in set. Elapsed: 0.004 sec.

localhost :) select * from tt5;

SELECT *
FROM tt5

Query id: 3ee1b7e7-4f6c-4b69-a07e-67d8c8a2b99b

┌─id─┬─id2─┬─id3─┬────v1─┬────v2─┐
│  1 │   2 │   3 │ 3.456 │ 5.678 │
│  1 │   2 │   4 │ 3.333 │ 4.444 │
│  2 │   2 │   4 │ 3.333 │ 4.444 │
│  3 │   2 │   4 │ 3.333 │ 4.444 │
└────┴─────┴─────┴───────┴───────┘

4 rows in set. Elapsed: 0.005 sec.

在做sum查询

等于直接从预处理结果拿数据了,原始数据再多也不影响,都已经sum聚合好了。

localhost :) select id,sum(v1),sum(v2) from tt5 group by id;

SELECT
    id,
    sum(v1),
    sum(v2)
FROM tt5
GROUP BY id

Query id: 4e231f56-1f42-45b6-9f01-78194d1b160a

┌─id─┬─sum(v1)─┬─sum(v2)─┐
│  3 │   3.333 │   4.444 │
│  2 │   3.333 │   4.444 │
│  1 │   6.789 │  10.122 │
└────┴─────────┴─────────┘

3 rows in set. Elapsed: 0.015 sec.

其它样例

指定聚合列columns

localhost :) create table tt6(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree((v1,v2)) order by (id,id2,id3) primary key id;

CREATE TABLE tt6
(
    `id` int,
    `id2` int,
    `id3` int,
    `v1` decimal(16, 3),
    `v2` decimal(16, 3)
)
ENGINE = SummingMergeTree((v1, v2))
PRIMARY KEY id
ORDER BY (id, id2, id3)

Query id: 2a3811ba-32c3-48b8-87f4-c77399ca71f0

Ok.

0 rows in set. Elapsed: 0.021 sec.

localhost :)

指定包含了order 列的情况

从实际效果看,并不影响,后面数据的聚合,依然是以order 的key是维度,其它的是数值为准。

localhost :) create table tt6(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree((id2,id3,v1,v2)) order by (id,id2,id3) primary key id;

CREATE TABLE tt6
(
    `id` int,
    `id2` int,
    `id3` int,
    `v1` decimal(16, 3),
    `v2` decimal(16, 3)
)
ENGINE = SummingMergeTree((id2, id3, v1, v2))
PRIMARY KEY id
ORDER BY (id, id2, id3)

Query id: 2a3811ba-32c3-48b8-87f4-c77399ca71f0

Ok.

0 rows in set. Elapsed: 0.021 sec.

localhost :)

指定了包含主键列的情况

从结果看,同样无影响,并没有报错。数据依然是order 的键没有sum合并。

localhost :) create table tt7(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree((id,id2,id3,v1,v2)) order by (id,id2,id3) primary key id;

CREATE TABLE tt7
(
    `id` int,
    `id2` int,
    `id3` int,
    `v1` decimal(16, 3),
    `v2` decimal(16, 3)
)
ENGINE = SummingMergeTree((id, id2, id3, v1, v2))
PRIMARY KEY id
ORDER BY (id, id2, id3)

Query id: 8e9ea6dc-2c06-4ee7-b9aa-72fbefee102a

Ok.

0 rows in set. Elapsed: 0.041 sec.

localhost :) insert into tt7 values(1,2,4,33,44);

INSERT INTO tt7 VALUES

Query id: c4c9166d-1596-40fc-ae15-18799bfd6594

Ok.

1 rows in set. Elapsed: 0.004 sec.

localhost :) insert into tt7 values(1,2,3,11,22);

INSERT INTO tt7 VALUES

Query id: ee2d5f5b-6176-44ad-914a-80683949579d

Ok.

1 rows in set. Elapsed: 0.006 sec.

localhost :) optimize table tt7;

OPTIMIZE TABLE tt7

Query id: 60d6a958-53c5-469d-ab0a-ceb08c5509f7

Ok.

0 rows in set. Elapsed: 0.004 sec.

localhost :) select * from tt7;

SELECT *
FROM tt7

Query id: 7284e429-7f19-4d86-b0e6-7a46080cce80

┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│  1 │   2 │   3 │ 11.000 │ 22.000 │
│  1 │   2 │   4 │ 33.000 │ 44.000 │
└────┴─────┴─────┴────────┴────────┘

2 rows in set. Elapsed: 0.005 sec.

localhost :) insert into tt7 values(1,2,4,55,66);

INSERT INTO tt7 VALUES

Query id: e34029b1-d933-419c-abc9-4cee5daa7737

Ok.

1 rows in set. Elapsed: 0.004 sec.

localhost :) optimize table tt7;

OPTIMIZE TABLE tt7

Query id: b8be951b-7748-4c92-9039-aba1eec45637

Ok.

0 rows in set. Elapsed: 0.003 sec.

localhost :) select * from tt7;

SELECT *
FROM tt7

Query id: 95c4c386-e3c6-49c6-97dc-0d25f7d1805c

┌─id─┬─id2─┬─id3─┬─────v1─┬──────v2─┐
│  1 │   2 │   3 │ 11.000 │  22.000 │
│  1 │   2 │   4 │ 88.000 │ 110.000 │
└────┴─────┴─────┴────────┴─────────┘

2 rows in set. Elapsed: 0.006 sec.

指定部分数值列的情况

从结果看,确实只有指定的列参与了sum, 其它的保持了某个原始值。如下例子中,v2列参与了合并,而 v1列没有。

localhost :) create table tt8(id int, id2 int, id3 int, v1 decimal(16,3),v2 decimal(16,3))engine=SummingMergeTree((v2)) order by (id,id2,id3) primary key id;

CREATE TABLE tt8
(
    `id` int,
    `id2` int,
    `id3` int,
    `v1` decimal(16, 3),
    `v2` decimal(16, 3)
)
ENGINE = SummingMergeTree(v2)
PRIMARY KEY id
ORDER BY (id, id2, id3)

Query id: f4cd6e32-c4ad-4ca0-bcbb-b190bb11f051

Ok.

0 rows in set. Elapsed: 0.040 sec.

localhost :) show create table tt8;

SHOW CREATE TABLE tt8

Query id: 4217919a-6675-490b-a5a5-091eb6b4b8af

┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE testdb.tt8
(
    `id` Int32,
    `id2` Int32,
    `id3` Int32,
    `v1` Decimal(16, 3),
    `v2` Decimal(16, 3)
)
ENGINE = SummingMergeTree(v2)
PRIMARY KEY id
ORDER BY (id, id2, id3)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

1 rows in set. Elapsed: 0.004 sec.

localhost :) insert into tt8 values(1,2,3,11,22);

INSERT INTO tt8 VALUES

Query id: 8b2a0673-a29a-470e-8a34-5e86faebde3f

Ok.

1 rows in set. Elapsed: 0.003 sec.

localhost :) insert into tt8 values(1,2,4,33,44);

INSERT INTO tt8 VALUES

Query id: 9df38744-8e59-4ac2-93dc-6d00c6ac54fa

Ok.

1 rows in set. Elapsed: 0.003 sec.

localhost :) select * from tt8;

SELECT *
FROM tt8

Query id: 9d94a70b-99c9-4c7a-ac8a-3643f6c1ae8d

┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│  1 │   2 │   3 │ 11.000 │ 22.000 │
└────┴─────┴─────┴────────┴────────┘
┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│  1 │   2 │   4 │ 33.000 │ 44.000 │
└────┴─────┴─────┴────────┴────────┘

2 rows in set. Elapsed: 0.003 sec.

localhost :) optimize table tt8;

OPTIMIZE TABLE tt8

Query id: 471eba62-97f8-408f-993c-d28cd3f1cc5c

Ok.

0 rows in set. Elapsed: 0.003 sec.

localhost :) select * from tt8;

SELECT *
FROM tt8

Query id: c6143c05-0d28-4d48-a458-866cb1f314dd

┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│  1 │   2 │   3 │ 11.000 │ 22.000 │
│  1 │   2 │   4 │ 33.000 │ 44.000 │
└────┴─────┴─────┴────────┴────────┘

2 rows in set. Elapsed: 0.003 sec.

localhost :) insert into tt8 values(1,2,4,55,66);

INSERT INTO tt8 VALUES

Query id: 4097047f-c6b1-4782-993a-208b71097647

Ok.

1 rows in set. Elapsed: 0.003 sec.

localhost :) select * from tt8;

SELECT *
FROM tt8

Query id: 8eec66a5-b032-48c0-ac84-9df5452cbfa4

┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│  1 │   2 │   3 │ 11.000 │ 22.000 │
│  1 │   2 │   4 │ 33.000 │ 44.000 │
└────┴─────┴─────┴────────┴────────┘
┌─id─┬─id2─┬─id3─┬─────v1─┬─────v2─┐
│  1 │   2 │   4 │ 55.000 │ 66.000 │
└────┴─────┴─────┴────────┴────────┘

3 rows in set. Elapsed: 0.003 sec.

localhost :) optimize table tt8;

OPTIMIZE TABLE tt8

Query id: 741b9e91-010f-4f6d-a758-eda6731ca37e

Ok.

0 rows in set. Elapsed: 0.003 sec.

localhost :) select * from tt8;

SELECT *
FROM tt8

Query id: 317c904d-a67b-49e6-857b-4703bbd22696

┌─id─┬─id2─┬─id3─┬─────v1─┬──────v2─┐
│  1 │   2 │   3 │ 11.000 │  22.000 │
│  1 │   2 │   4 │ 33.000 │ 110.000 │
└────┴─────┴─────┴────────┴─────────┘

2 rows in set. Elapsed: 0.011 sec.