引入lumberjack和fluentbit自动分发
This commit is contained in:
123
deploy/clickhouse/optimize_schema.sql
Normal file
123
deploy/clickhouse/optimize_schema.sql
Normal file
@@ -0,0 +1,123 @@
|
||||
-- =============================================================================
|
||||
-- ClickHouse logs_ingest 表优化脚本
|
||||
--
|
||||
-- 说明:
|
||||
-- - 所有 ALTER 操作均为在线操作,无需停服
|
||||
-- - 建议按阶段顺序执行,每阶段执行后观察 system.parts 确认生效
|
||||
-- - 压缩编解码器变更仅影响新写入的 part,存量数据需等 merge 或手动 OPTIMIZE
|
||||
--
|
||||
-- 执行方式:
|
||||
-- clickhouse-client --host 127.0.0.1 --port 9000 --user default --password 'xxx' < optimize_schema.sql
|
||||
-- =============================================================================
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 1:大字段压缩优化(效果最显著)
|
||||
-- =============================================
|
||||
|
||||
-- 大文本字段改用 ZSTD(3),对 JSON / HTTP 文本压缩率远优于默认 LZ4
|
||||
-- 预期效果:磁盘占用减少 40%-60%
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN request_headers String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN request_body String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN response_headers String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN response_body String CODEC(ZSTD(3));
|
||||
|
||||
-- 中等长度文本字段用 ZSTD(1),平衡压缩率与 CPU 开销
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN ua String CODEC(ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN path String CODEC(ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN referer String CODEC(ZSTD(1));
|
||||
|
||||
-- 低基数字段改用 LowCardinality(内存+磁盘双降)
|
||||
-- method 的基数极低(GET/POST/PUT/DELETE 等),host 基数取决于站点数量
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN method LowCardinality(String);
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN log_type LowCardinality(String);
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN host LowCardinality(String);
|
||||
|
||||
-- 数值字段使用 Delta + ZSTD 编码(利用相邻行的时间/大小相关性)
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN bytes_in UInt64 CODEC(Delta, ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN bytes_out UInt64 CODEC(Delta, ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN cost_ms UInt32 CODEC(Delta, ZSTD(1));
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 2:添加 Skipping Index(加速高频过滤查询)
|
||||
-- =============================================
|
||||
|
||||
-- trace_id 精确查找(查看日志详情 FindByTraceId)
|
||||
-- bloom_filter(0.01) = 1% 误判率,GRANULARITY 4 = 每 4 个 granule 一个 bloom block
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_trace_id trace_id TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- IP 精确查找
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_ip ip TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- host 模糊查询支持(tokenbf_v1 对 LIKE '%xxx%' 有效)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_host host TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
|
||||
|
||||
-- firewall_policy_id 过滤(HasFirewallPolicy: > 0)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_fw_policy firewall_policy_id TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- status 范围过滤(HasError: status >= 400)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_status status TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 3:物化索引到现有数据(对存量数据生效)
|
||||
-- =============================================
|
||||
-- 注意:MATERIALIZE INDEX 会触发后台 mutation,大表可能需要一定时间
|
||||
-- 可通过 SELECT * FROM system.mutations WHERE is_done = 0 监控进度
|
||||
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_trace_id;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_ip;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_host;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_fw_policy;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_status;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- dns_logs_ingest 表优化(DNS 日志表)
|
||||
-- =============================================================================
|
||||
|
||||
-- 大文本字段压缩
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN content_json String CODEC(ZSTD(3));
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN error String CODEC(ZSTD(1));
|
||||
|
||||
-- 低基数字段
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN question_type LowCardinality(String);
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN record_type LowCardinality(String);
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN networking LowCardinality(String);
|
||||
|
||||
-- request_id 精确查找
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_request_id request_id TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- remote_addr 精确查找
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_remote_addr remote_addr TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- question_name 模糊查询
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_question_name question_name TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
|
||||
|
||||
-- domain_id 过滤
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_domain_id domain_id TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- 物化索引到现有数据
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_request_id;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_remote_addr;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_question_name;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_domain_id;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- 验证命令(执行完上述 ALTER 后运行)
|
||||
-- =============================================================================
|
||||
|
||||
-- 查看列的压缩编解码器
|
||||
-- SELECT name, type, compression_codec FROM system.columns WHERE table = 'logs_ingest' AND database = currentDatabase();
|
||||
|
||||
-- 查看表的压缩率
|
||||
-- SELECT table, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed, round(sum(data_uncompressed_bytes) / sum(data_compressed_bytes), 2) AS ratio FROM system.columns WHERE table IN ('logs_ingest', 'dns_logs_ingest') GROUP BY table;
|
||||
|
||||
-- 查看各列占用的磁盘空间(找出最大的列)
|
||||
-- SELECT name, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed FROM system.columns WHERE table = 'logs_ingest' GROUP BY name ORDER BY sum(data_compressed_bytes) DESC;
|
||||
|
||||
-- 查看 mutation 进度
|
||||
-- SELECT database, table, mutation_id, command, is_done, parts_to_do FROM system.mutations WHERE is_done = 0;
|
||||
|
||||
-- 强制触发 merge(可选,让压缩编解码器变更对存量数据生效)
|
||||
-- OPTIMIZE TABLE logs_ingest FINAL;
|
||||
-- OPTIMIZE TABLE dns_logs_ingest FINAL;
|
||||
Reference in New Issue
Block a user