Clickhouse 测试tpc-h的SQL1语句,物化视图优化提升百倍

本文针对TPC-h的SQL1 ,在Clickhouse数据库上做性能测试和优化,特别是物化视图预聚合部分。其中原始表59986052行,查询耗时6.047秒,物化视图3817行,查询耗时0.025;数据量差距15715倍,性能差距241.88倍。

SQL1

tpc-h的SQL是典型的单表聚合预算,有多个字段的聚合或者计算表达式聚合,2个列的group。

select
	l_returnflag,
	l_linestatus,
	sum(l_quantity) as sum_qty,
	sum(l_extendedprice) as sum_base_price,
	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
	avg(l_quantity) as avg_qty,
	avg(l_extendedprice) as avg_price,
	avg(l_discount) as avg_disc,
	count(*) as count_order
from
	lineitem
where
	l_shipdate <= date '1998-12-01' - interval '90' day 
group by
	l_returnflag,
	l_linestatus
order by
	l_returnflag,
	l_linestatus;

表结构

原始表结构

create table lineitem (
        l_orderkey bigint,
        l_partkey bigint,
        l_suppkey bigint,
        l_linenumber bigint,
        l_quantity decimal(15,2),
        l_extendedprice decimal(15,2),
        l_discount decimal(15,2),
        l_tax decimal(15,2),
        l_returnflag char(1),
        l_linestatus char(1),
        l_shipdate date,
        l_commitdate date,
        l_receiptdate date,
        l_shipinstruct char(25),
        l_shipmode char(10),
        l_comment varchar(50)
)

clickhouse的表结构

根据l_shipdate,l_returnflag,l_linestatus进行排序分组。

create table lineitem (
        l_orderkey bigint,
        l_partkey bigint,
        l_suppkey bigint,
        l_linenumber bigint,
        l_quantity decimal(15,2),
        l_extendedprice decimal(15,2),
        l_discount decimal(15,2),
        l_tax decimal(15,2),
        l_returnflag char(1),
        l_linestatus char(1),
        l_shipdate date,
        l_commitdate date,
        l_receiptdate date,
        l_shipinstruct char(25),
        l_shipmode char(10),
        l_comment varchar(50)
)ENGINE=MergeTree
ORDER BY (l_shipdate,l_returnflag,l_linestatus);

clichouse的物化视图

CREATE MATERIALIZED VIEW IF NOT EXISTS tpch.mv_lineitem
ENGINE = SummingMergeTree
ORDER BY (l_shipdate,l_returnflag,l_linestatus) AS
select
    l_shipdate,
	l_returnflag,
	l_linestatus,
	sum(l_quantity) as sum_qty,
	sum(l_extendedprice) as sum_base_price,
	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
    sum(l_quantity) l_quantity_sum,
	count(l_quantity) as l_quantity_cound,
    sum(l_extendedprice) as l_extendedprice_sum,
	count(l_extendedprice) as l_extendedprice_cound,
    sum(l_discount) as l_discount_sum,
	count(l_discount) as l_discount_cound,
	count(*) as count_order
from
	lineitem
group by
    l_shipdate,
	l_returnflag,
	l_linestatus

批量入库

[root@localhost data]# time clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.lineitem format CSV" < /data/tpch10/lineitem.tbl

real    4m21.729s
user    3m24.727s
sys     0m9.835s

数据量

采用10s的tpc-h程序生成,文件大小为

[root@localhost data]# ll /data/tpch10/ -h
total 11G
-rw-r--r--. 1 root root 234M May  7 09:05 customer.tbl
-rw-r--r--. 1 root root 7.3G May  7 09:05 lineitem.tbl
-rw-r--r--. 1 root root 2.1K May  7 09:05 nation.tbl
-rw-r--r--. 1 root root 1.7G May  7 09:05 orders.tbl
-rw-r--r--. 1 root root 1.2G May  7 09:05 partsupp.tbl
-rw-r--r--. 1 root root 233M May  7 09:05 part.tbl
-rw-r--r--. 1 root root  396 May  7 09:05 region.tbl
-rw-r--r--. 1 root root  14M May  7 09:05 supplier.tbl


localhost :) select count(*) from lineitem;

SELECT count(*)
FROM lineitem

Query id: 095fc4de-edbc-4091-9ca2-1891dff75123

┌──count()─┐
│ 59986052 │
└──────────┘

1 rows in set. Elapsed: 0.124 sec.


localhost :) select count(*) from mv_lineitem;

SELECT count(*)
FROM mv_lineitem

Query id: 524e2909-c573-4ef7-9cc2-bac4dc298daa

┌─count()─┐
│    3817 │
└─────────┘

1 rows in set. Elapsed: 0.004 sec. Processed 3.82 thousand rows, 7.63 KB (941.37 thousand rows/s., 1.88 MB/s.)

localhost :)

查询性能

查询耗时6.047秒,物化视图3817行,查询耗时0.025;数据量差距15715倍,性能差距241.88倍。

原始表

耗时6.047 sec

localhost :) select
:-] l_returnflag,
:-] l_linestatus,
:-] sum(l_quantity) as sum_qty,
:-] sum(l_extendedprice) as sum_base_price,
:-] sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
:-] sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
:-] avg(l_quantity) as avg_qty,
:-] avg(l_extendedprice) as avg_price,
:-] avg(l_discount) as avg_disc,
:-] count(*) as count_order
:-] from
:-] lineitem
:-] where
:-] l_shipdate <= date '1998-12-01' - interval '90' day
:-] group by
:-] l_returnflag,
:-] l_linestatus
:-] order by
:-] l_returnflag,
:-] l_linestatus;

SELECT
    l_returnflag,
    l_linestatus,
    sum(l_quantity) AS sum_qty,
    sum(l_extendedprice) AS sum_base_price,
    sum(l_extendedprice * (1 - l_discount)) AS sum_disc_price,
    sum((l_extendedprice * (1 - l_discount)) * (1 + l_tax)) AS sum_charge,
    avg(l_quantity) AS avg_qty,
    avg(l_extendedprice) AS avg_price,
    avg(l_discount) AS avg_disc,
    count(*) AS count_order
FROM lineitem
WHERE l_shipdate <= (toDate('1998-12-01') - toIntervalDay('90'))
GROUP BY
    l_returnflag,
    l_linestatus
ORDER BY
    l_returnflag ASC,
    l_linestatus ASC

Query id: 0e1334eb-e9cc-425b-adf7-a4f29451bd29

┌─l_returnflag─┬─l_linestatus─┬─────────sum_qty─┬──────sum_base_price─┬───────────sum_disc_price─┬────────────────────sum_charge─┬────────────avg_qty─┬──────────avg_price─┬────────────avg_disc─┬─count_order─┐
│ A            │ F            │ 377518399.00000 │  566065727797.25000 │  537759104278.0656000000 │  559276670892.116819000000000 │ 25.500975103007097 │  38237.15100895854 │ 0.05000657454024321 │    14804077 │
│ N            │ F            │   9851614.00000 │   14767438399.17000 │   14028805792.2114000000 │   14590490998.366737000000000 │ 25.522448302840946 │  38257.81066008114 │ 0.04997336773765667 │      385998 │
│ N            │ O            │ 743124873.00000 │ 1114302286901.88000 │ 1058580922144.9638000000 │ 1100937000170.591854000000000 │ 25.498075870689316 │  38233.90292348181 │ 0.05000081182113131 │    29144351 │
│ R            │ F            │ 377732830.00000 │  566431054976.00000 │  538110922664.7677000000 │  559634780885.086257000000000 │  25.50838478968014 │ 38251.219273559764 │ 0.04999679231408742 │    14808183 │
└──────────────┴──────────────┴─────────────────┴─────────────────────┴──────────────────────────┴───────────────────────────────┴────────────────────┴────────────────────┴─────────────────────┴─────────────┘

4 rows in set. Elapsed: 6.047 sec. Processed 59.17 million rows, 5.09 GB (9.79 million rows/s., 841.62 MB/s.)

localhost :)

物化视图

耗时0.025 sec

localhost :) select
:-] l_returnflag,
:-] l_linestatus,
:-] sum(sum_qty) as sum_qty,
:-] sum(sum_base_price) as sum_base_price,
:-] sum(sum_disc_price) as sum_disc_price,
:-] sum(sum_charge) as sum_charge,
:-]     sum(l_quantity_sum)/ sum(l_quantity_cound) as avg_qty,
:-]     sum(l_extendedprice_sum) / sum(l_extendedprice_cound) as avg_price,
:-]     sum(l_discount_sum)/sum(l_discount_cound) as avg_disc,
:-] sum(count_order) as count_order
:-] from
:-] mv_lineitem
:-] where
:-] l_shipdate <= date '1998-12-01' - interval '90' day
:-] group by
:-] l_returnflag,
:-] l_linestatus;

SELECT
    l_returnflag,
    l_linestatus,
    sum(sum_qty) AS sum_qty,
    sum(sum_base_price) AS sum_base_price,
    sum(sum_disc_price) AS sum_disc_price,
    sum(sum_charge) AS sum_charge,
    sum(l_quantity_sum) / sum(l_quantity_cound) AS avg_qty,
    sum(l_extendedprice_sum) / sum(l_extendedprice_cound) AS avg_price,
    sum(l_discount_sum) / sum(l_discount_cound) AS avg_disc,
    sum(count_order) AS count_order
FROM mv_lineitem
WHERE l_shipdate <= (toDate('1998-12-01') - toIntervalDay('90'))
GROUP BY
    l_returnflag,
    l_linestatus

Query id: 2616d8b2-567a-4165-b24b-ca4df8f72eff

┌─l_returnflag─┬─l_linestatus─┬─────────sum_qty─┬──────sum_base_price─┬───────────sum_disc_price─┬────────────────────sum_charge─┬──avg_qty─┬───avg_price─┬─avg_disc─┬─count_order─┐
│ R            │ F            │ 377732830.00000 │  566431054976.00000 │  538110922664.7677000000 │  559634780885.086257000000000 │ 25.50838 │ 38251.21927 │  0.04999 │    14808183 │
│ A            │ F            │ 377518399.00000 │  566065727797.25000 │  537759104278.0656000000 │  559276670892.116819000000000 │ 25.50097 │ 38237.15100 │  0.05000 │    14804077 │
│ N            │ F            │   9851614.00000 │   14767438399.17000 │   14028805792.2114000000 │   14590490998.366737000000000 │ 25.52244 │ 38257.81066 │  0.04997 │      385998 │
│ N            │ O            │ 743124873.00000 │ 1114302286901.88000 │ 1058580922144.9638000000 │ 1100937000170.591854000000000 │ 25.49807 │ 38233.90292 │  0.05000 │    29144351 │
└──────────────┴──────────────┴─────────────────┴─────────────────────┴──────────────────────────┴───────────────────────────────┴──────────┴─────────────┴──────────┴─────────────┘

4 rows in set. Elapsed: 0.025 sec. Processed 3.82 thousand rows, 633.62 KB (152.30 thousand rows/s., 25.28 MB/s.)