连接 Clickhouse
1./clickhouse-client -h 9.218.16.160 --port 9000 -u 'yyy' --password 'zzzzz'
2
3# 查看 clickhouse 进行状态
4ps -ef|grep clickhouse |grep -v grep
查询库表信息
1use databases;
2select * from table;
3show create table;
查询数据库数据量大小
1##按照数据量倒序
2SELECT database, formatReadableSize(on_disk) FROM (SELECT database, sum(bytes_on_disk) AS on_disk FROM system.parts GROUP BY database) ORDER BY on_disk DESC;
3
4SELECT database, formatReadableSize(sum(bytes_on_disk)) AS on_disk FROM system.parts GROUP BY database ORDER BY on_disk DESC;
查询表数据量大小
1select database, table, sum(rows) as "总行数", formatReadableSize(sum(data_uncompressed_bytes)) as "原始大小", formatReadableSize(sum(data_compressed_bytes)) as "压缩大小", 2*CEILING(sum(data_compressed_bytes)/(10*1024*1024*1024*1024)/0.7)*3+3 as "计算单元数", 2*CEILING(sum(data_compressed_bytes)/(10*1024*1024*1024*1024)/0.7)*2+3 as "存储单元数", round(sum(data_compressed_bytes) / sum(data_uncompressed_bytes) * 100, 0) "压缩率" from system.parts group by database,table order by sum(data_compressed_bytes) desc limit 100;
2
3
4select database, table, sum(rows) as "总行数", formatReadableSize(sum(data_uncompressed_bytes)) as "原始大小", formatReadableSize(sum(data_compressed_bytes)) as "压缩大小", round(sum(data_compressed_bytes) / sum(data_uncompressed_bytes) * 100, 0) "压缩率" from system.parts group by database,table order by sum(data_compressed_bytes) desc limit 100;
5
6select database, table, formatReadableSize(sum(data_uncompressed_bytes)) as "原始大小", formatReadableSize(sum(data_compressed_bytes)) as "压缩大小" from system.parts group by database,table order by sum(data_compressed_bytes) desc limit 100;
设置数据显示格式
1SELECT * FROM TABLE FORMAT Vertical
查询某范围时间的数据
1## 查询近几小时
2SELECT stactics_date FROM table where subtractHours(stactics_date, 7) > toDate(now());
3
4## 查询今天
5SELECT stactics_date FROM table where toDate(stactics_date) = today();
6
7## 查询昨天
8SELECT stactics_date FROM table where toDate(stactics_date) = yesterday();
9
10##查询近几天
11SELECT stactics_date FROM table where subtractDays(stactics_date, 7) > toDate(now());
12
13## 查询本周
14SELECT stactics_date FROM table where stactics_date BETWEEN subtractDays(stactics_date, toDayOfWeek(stactics_date)) and subtractDays(stactics_date, toDayOfWeek(tactics_date) + 7);
15
16##查询本月
17SELECT stactics_date FROM table where toMonth(tactics_date) = toMonth(now());
18
19##按照自定义时间段(小时维度)
20SELECT stactics_date FROM table where stactics_date BETWEEN yesterday() and subtractHours(stactics_date, 1);
21
22##按照自定义时间段(字符时间格式加减天数)
23SELECT stactics_date FROM table where stactics_date BETWEEN
24DATE_SUB( str_to_date({startTime},'YYYY-MM-DD HH24:MI:SS'), INTERVAL 1 DAY )
25AND DATE_SUB( str_to_date({endTime},'YYYY-MM-DD HH24:MI:SS'), INTERVAL 1 DAY );
26
27## 字符串日期自定义时间
28SELECT COUNT(*) from table where parseDateTimeBestEffort(import_date) BETWEEN parseDateTimeBestEffort('20220831') AND parseDateTimeBestEffort('20230217');
29
30## 每天时间段内
31SELECT stactics_date FROM table where toYYYYMMDDhhmmss(tactics_date)%1000000 BETWEEN '123000' and '160000';
32
33## 按小时分组
34SELECT
35 stactics_name,
36 stactics_id,
37 concat(toString(toHour(stactics_date)), '-', toString(toHour(stactics_date) + 1), '点') stacticsDate,count(*) stacticsCount
38FROM table GROUP BY stactics_name, stactics_id,toHour(stactics_date);
39
40## 按天分组
41SELECT stactics_name,stactics_id,toDate(stactics_date) stacticsDate,count(*) stacticsCount
42FROM table GROUP BY stactics_name,stactics_id,toDate(stactics_date);
添加表字段
1ALTER TABLE t_all_xxx_test ON CLUSTER 'default_cluster' ADD COLUMN is_valid_click Int8 AFTER act_type;
2
3ALTER TABLE t_xx_test ON CLUSTER 'default_cluster' MODIFY COLUMN is_valid_click Int8 COMMENT '是否有效点击';
导出数据
1clickhouse-client -h 1.138.211.21 --port 9000 -u 'xxxx' --password 'xxxx' --database='xxx_ad' --query="select imp_date,user_id,action_time,action_type,context_id,action_item_object_id,action_item_object_type,title,url from data_cube_ad.t_xxx_view where imp_date=20241119 and context_id_cn='xxx' and action_type_cn='阅读' and action_item_object_type_cn='文章' and title!='' and url='' limit 5 FORMAT CSV" > article.csv
导入数据
1clickhouse-client -h 127.0.0.1 --database='db' --query='insert into db.test FORMAT CSV' < ./test.csv
删除数据
1# 1、首先查询出分区列表
2SELECT partition AS partition FROM system.parts WHERE table = 're_xxxx_aid_action_analy_sku_about_order_action_90d' AND partition >= '20190101' AND partition < '20250317' order by partition desc;
3
4# 2、使用文本编辑器处理,最终格式如下:
5DROP PARTITION '20240113',DROP PARTITION '20240112',DROP PARTITION '20240111',DROP PARTITION '20240110',DROP PARTITION '20240109',DROP PARTITION '20240108';
6
7# 3、执行删除语句
8ALTER TABLE middle_ad_expo_xx_user_daily_local on cluster default_cluster DROP PARTITION '20240113',DROP PARTITION '20240112',DROP PARTITION '20240111',DROP PARTITION '20240110',DROP PARTITION '20240109',DROP PARTITION '20240108';
查看集群节点信息
1select cluster,replica_num, count(*) from system.clusters group by cluster,replica_num order by cluster limit 40;
近7天查询热点
1select user,databases,tables,count(*) as sum from system.query_log where user<>'clickhouse_admin' and event_date >=subtractDays(toDate(now()),1) and databases not in (['system'],[]) group by user,databases,tables order by sum desc,databases,tables limit 20;
查询连接状态
1# 当前练结束
2SELECT * FROM system.metrics WHERE metric LIKE '%Connection';
3
4# 当前正在执行的查询
5SELECT query_id, user, address, query FROM system.processes ORDER BY query_id;
6SELECT * from system.processes where query_id='xxxxxxxxxxxxxxxx'
7
8# 终止查询
9KILL QUERY WHERE query_id='fb6a7792-b733-4a19-b46f-f6943abe53e4';
10KILL QUERY WHERE lower(query) like '%xxxxxxxxxx%';
11
12# DDL操作查询
13SELECT database, table, mutation_id, command, create_time, is_done FROM system.mutations where is_done=0;
14
15# 终止DDL操作
16KILL MUTATION where mutation_id = 'xxxxxxxxxxxxxxxx';
17
18# 分析最近查询量
19select initial_user,query_kind,count(1) from clusterAllReplicas('default_cluster','system','query_log') where event_time>='2023-11-30 00:00:00' and initial_user<>'clickhouse_admin' group by initial_user,query_kind;
20
21# 近一天SQL耗时报表
22SELECT event_date AS date,toDecimal32(quantile(0.90)(query_duration_ms) , 2) AS "P90",
23toDecimal32(quantile(0.95)(query_duration_ms) , 2) AS "P95",toDecimal32(quantile(0.99)(query_duration_ms) , 2) AS "P99",toDecimal32(avg(query_duration_ms) , 2) AS "AVG",COUNT(*) AS count FROM system.query_log WHERE event_time>=toDate(toUnixTimestamp(now())-24*3600*7) AND (query not ilike '%SELECT 1%') AND (query not ilike 'INSERT%') AND written_rows=0 AND type=2 GROUP BY date ORDER BY date DESC LIMIT 1000;
查询 JSON 字符串
1select live_products from t_xxx_view where imp_date=20250603 and action_item_object_type_cn = 'xxx商品' and JSONExtract(arrayJoin(JSONExtractArrayRaw(live_products)), 'product_id', 'String') = '10000081318088' limit 5 Format Vertical;
2
3select JSONExtract(row, 'product_id', 'String') AS id from
4(
5select arrayJoin(JSONExtractArrayRaw(live_products)) as row from t_xxx_view where imp_date=20250603 and action_item_object_type_cn = '直播间商品' limit 5
6)
7
8select JSONExtract(row, 'id', 'Int32') AS id
9 ,JSONExtract(row, 'name', 'String') AS name
10 ,JSONExtract(arrayJoin(JSONExtractArrayRaw(row,'priceTypeList')),'priceTypeId','String') AS priceTypeId
11 ,JSONExtract(arrayJoin(JSONExtractArrayRaw(row,'priceTypeList')),'priceTypeName','String') AS priceTypeName
12 ,JSONExtract(arrayJoin(JSONExtractArrayRaw(row,'priceTypeList')),'unitName','String') AS unitName
13 ,JSONExtract(arrayJoin(JSONExtractArrayRaw(row,'priceTypeList')),'unitId','String') AS unitId
14 ,JSONExtract(arrayJoin(JSONExtractArrayRaw(row,'priceTypeList')),'unitPrice','String') AS unitPrice
15from
16(
17select arrayJoin(JSONExtractArrayRaw('[{"id":1,"name":"月测","priceTypeList":[{"priceTypeId":1,"priceTypeName":"手绘","unitId":1,"unitName":"张","unitPrice":2.1}]},{"id":2,"name":"美术","priceTypeList":[{"priceTypeId":2,"priceTypeName":"矢量插画","unitId":1,"unitName":"张","unitPrice":10},{"priceTypeId":1,"priceTypeName":"手绘","unitId":1,"unitName":"张","unitPrice":5.23}]}]')) as row
18)
重启 Clickhouse
1systemctl restart clickhouse-server
2systemctl start clickhouse-server
3systemctl stop clickhouse-server
批量删除 ZK 任务
1./delete_zk_node.sh 1.1.7.2:2181 /clickhouse/task_queue/ddl