本文主要介绍 clickhouse 的基础用法
安装
下面以 Debain/Ubuntu系统为例,安装方式
1 | sudo apt-get install -y apt-transport-https ca-certificates dirmngr |
创建表
- MergeTree类型
以下是一个记录用户访问日志的表
1
2
3
4
5
6
7
8
9
10CREATE TABLE IF NOT EXISTS visit_log (
user_id UInt32,
url String,
count UInt8,
created_at DateTime
) Engine = MergeTree
PARTITION BY toYYYYMMDD(created_at) -- 按照日期分区
ORDER BY created_at -- 按照时间排序
TTL created_at + toIntervalDay(180) -- 指定180天自动删除过期数据
SETTINGS index_granularity = 8192 -- 设置索引颗粒度 - 创建视图
以下视图是为了按小时统计url的访问次数
1
2
3
4
5
6
7
8
9
10
11
12
13CREATE MATERIALIZED VIEW url_stat
ENGINE = AggregatingMergeTree()
ORDER BY (hour_time, url)
TTL hour_time + toIntervalDay(1)
AS SELECT
url,
toStartOfHour(created_at) AS hour_time, --格式化为小时
sumState(count) AS visit_count_state, -- 统计总和
minState(created_at) AS first_time_state, -- 记录最早访问时间
maxState(created_at) AS last_time_state -- 记录最晚访问时间
FROM visit_log
WHERE count>0
GROUP BY hour_time,url
操作
这里使用go sdk实现相关demo: https://github.com/ClickHouse/clickhouse-go
- 写入数据
因为 clickhouse 一般都是数据量较大的应用场景,所以一般都是用batch执行插入,也可以用Exec执行单条插入,但不常用
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48package main
import (
"context"
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
"log"
"math/rand"
"os"
"time"
)
var (
conn driver.Conn
)
func init() {
opt, err := clickhouse.ParseDSN(os.Getenv("CLICKHOUSE_DSN"))
if err != nil {
log.Fatalf("parse dsn failed:%v", err)
}
opt.ConnMaxLifetime = time.Minute * 1
conn, err = clickhouse.Open(opt)
if err != nil {
log.Fatalf("open dsn failed:%v", err)
}
log.Println("connect clickhouse success")
}
func main() {
err := insert(context.Background())
log.Println("insert return: %v", err)
}
func insert(ctx context.Context) error {
batch, err := conn.PrepareBatch(ctx, "INSERT INTO visit_log(user_id,url,count,created_at) VALUES ")
if err != nil {
log.Fatalf("run PrepareBatch failed:%v", err)
}
for i := 0; i < 1000; i++ {
batch.Append(rand.Int(), "http://www.baidu.com", 1, time.Now())
}
return batch.Send()
} - 查询数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47// query 从表里根据 userId查询数据
func query(ctx context.Context, userId int) error {
sql := fmt.Sprintf("select url from visit_log where user_id = %d", userId)
rows, err := conn.Query(ctx, sql)
if err != nil {
return err
}
for rows.Next() {
var url string
err = rows.Scan(&url)
if err != nil {
return err
}
log.Printf("url: %s", url)
}
rows.Close()
return rows.Err()
}
// queryFromView 根据时间从视图查询统计数据
func queryFromView(ctx context.Context, hourTime string) error {
sql := fmt.Sprintf("select url,sumMerge(visit_count_state) as visit_count,"+
"minMerge(first_time_state) as first_time, "+
"maxMerge(last_time_state) as last_time from url_stat where hour_time='%s' "+
"GROUP BY hour_time,url ", hourTime)
rows, err := conn.Query(ctx, sql)
if err != nil {
return err
}
for rows.Next() {
var url string
var count int64
var firstTime, lastTime time.Time
var err = rows.Scan(&url, &count, &firstTime, &lastTime)
if err != nil {
return err
}
log.Printf("url: %s, visit_count: %d, fisrt_time: %v, last_time: %v", url, count, firstTime, lastTime)
}
rows.Close()
return rows.Err()
}
- 本文作者: Hongker
- 本文链接: https://hongker.github.io/2024/06/25/clickhouse-usage/
- 版权声明: 本博客所有文章除特别声明外,均采用 MIT 许可协议。转载请注明出处!