引入lumberjack和fluentbit自动分发

This commit is contained in:
robin
2026-02-13 22:36:17 +08:00
parent c6da67db79
commit e9093baffb
47 changed files with 4589 additions and 317 deletions

111
deploy/clickhouse/README.md Normal file
View File

@@ -0,0 +1,111 @@
# ClickHouse + Fluent Bit 使用手册Ubuntu 22.04 / Amazon Linux 2023
## 1. 支持范围
- Ubuntu 22.04
- Amazon Linux 2023AWS
安装脚本:`install_clickhouse_linux.sh`(自动识别上述系统)。
## 2. 安装 ClickHouse
```bash
cd /path/to/waf-platform/deploy/clickhouse
chmod +x install_clickhouse_linux.sh
sudo ./install_clickhouse_linux.sh
```
可选:安装时初始化 `default` 用户密码:
```bash
sudo CLICKHOUSE_DEFAULT_PASSWORD='YourStrongPassword' ./install_clickhouse_linux.sh
```
## 3. 开启 HTTPS默认仅 crt+key
脚本默认生成 `server.crt + server.key`(带 SAN并启用 8443
```bash
cd /path/to/waf-platform/deploy/clickhouse
chmod +x configure_clickhouse_https.sh
sudo CH_HTTPS_PORT=8443 \
CH_CERT_CN=clickhouse.example.com \
CH_CERT_DNS=clickhouse.example.com \
CH_CERT_IP=<CLICKHOUSE_IP> \
./configure_clickhouse_https.sh
```
使用已有证书:
```bash
sudo SRC_CERT=/path/to/server.crt \
SRC_KEY=/path/to/server.key \
CH_HTTPS_PORT=8443 \
./configure_clickhouse_https.sh
```
## 4. 初始化日志表(含优化)
```bash
cd /path/to/waf-platform/deploy/clickhouse
chmod +x init_waf_logs_tables.sh
sudo CH_HOST=127.0.0.1 \
CH_PORT=9000 \
CH_USER=default \
CH_PASSWORD='YourStrongPassword' \
CH_DATABASE=default \
./init_waf_logs_tables.sh
```
说明:
- `init_waf_logs_tables.sql` 已内置主要优化(`CODEC``LowCardinality`、跳数索引)。
- `optimize_schema.sql` 主要用于历史表补齐优化,不是首次建表必需步骤。
## 5. 平台侧配置EdgeAdmin
在 ClickHouse 设置页配置:
- HostClickHouse 地址
- Port`8443`
- Database`default`
- Scheme`https`
当前实现说明:
- 前端不再提供 `TLS跳过校验``TLS Server Name` 配置项。
- 后端固定 `TLSSkipVerify=true`(默认不校验证书)。
保存后点击“测试连接”。
## 6. Fluent Bit 配置方式
推荐平台托管模式(在线安装/升级 Node、DNS 时自动下发):
- `/etc/fluent-bit/fluent-bit.conf`
- `/etc/fluent-bit/.edge-managed.env`
- `/etc/fluent-bit/.edge-managed.json`
检查状态:
```bash
sudo systemctl status fluent-bit --no-pager
sudo cat /etc/fluent-bit/.edge-managed.json
```
## 7. 验证与排障
查看 Fluent Bit 日志:
```bash
sudo journalctl -u fluent-bit -f
```
查看写入:
```sql
SELECT count() FROM default.logs_ingest;
SELECT count() FROM default.dns_logs_ingest;
```
常见错误:
- `connection refused`8443 未监听或网络未放行。
- `legacy Common Name`:证书缺 SAN需重签。

View File

@@ -0,0 +1,227 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ "${EUID}" -ne 0 ]]; then
echo "[ERROR] please run as root"
exit 1
fi
if [[ ! -f /etc/os-release ]]; then
echo "[ERROR] /etc/os-release not found"
exit 1
fi
# shellcheck disable=SC1091
source /etc/os-release
os_id="$(echo "${ID:-}" | tr '[:upper:]' '[:lower:]')"
os_ver="${VERSION_ID:-}"
is_ubuntu22=false
is_amzn2023=false
if [[ "${os_id}" == "ubuntu" && "${os_ver}" == 22.04* ]]; then
is_ubuntu22=true
fi
if [[ "${os_id}" == "amzn" && "${os_ver}" == 2023* ]]; then
is_amzn2023=true
fi
if [[ "${is_ubuntu22}" != "true" && "${is_amzn2023}" != "true" ]]; then
echo "[ERROR] only Ubuntu 22.04 or Amazon Linux 2023 is supported. current: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"
exit 1
fi
if ! command -v openssl >/dev/null 2>&1 || ! command -v curl >/dev/null 2>&1; then
if [[ "${is_ubuntu22}" == "true" ]]; then
apt-get update -y
DEBIAN_FRONTEND=noninteractive apt-get install -y openssl curl ca-certificates
else
dnf makecache -y
dnf install -y openssl curl ca-certificates
fi
fi
CH_HTTPS_PORT="${CH_HTTPS_PORT:-8443}"
CH_LISTEN_HOST="${CH_LISTEN_HOST:-::}"
CH_CERT_CN="${CH_CERT_CN:-$(hostname -f 2>/dev/null || hostname)}"
CH_CERT_DNS="${CH_CERT_DNS:-}"
CH_CERT_IP="${CH_CERT_IP:-}"
CH_CERT_DAYS="${CH_CERT_DAYS:-825}"
CH_GENERATE_CA="${CH_GENERATE_CA:-false}"
SRC_CERT="${SRC_CERT:-}"
SRC_KEY="${SRC_KEY:-}"
SRC_CA="${SRC_CA:-}"
CH_DIR="/etc/clickhouse-server"
CH_CONFIG_D_DIR="${CH_DIR}/config.d"
PKI_DIR="${CH_DIR}/pki"
SERVER_CERT="${CH_DIR}/server.crt"
SERVER_KEY="${CH_DIR}/server.key"
CA_CERT="${CH_DIR}/ca.crt"
OVERRIDE_FILE="${CH_CONFIG_D_DIR}/waf-https.xml"
mkdir -p "${CH_CONFIG_D_DIR}" "${PKI_DIR}"
split_csv() {
local raw="$1"
if [[ -z "${raw}" ]]; then
return 0
fi
IFS=',' read -r -a arr <<<"${raw}"
for item in "${arr[@]}"; do
item="$(echo "${item}" | xargs)"
if [[ -n "${item}" ]]; then
echo "${item}"
fi
done
}
build_san_line() {
local san_entries=()
while IFS= read -r dns_item; do
san_entries+=("DNS:${dns_item}")
done < <(split_csv "${CH_CERT_DNS}")
while IFS= read -r ip_item; do
san_entries+=("IP:${ip_item}")
done < <(split_csv "${CH_CERT_IP}")
if [[ ${#san_entries[@]} -eq 0 ]]; then
san_entries+=("DNS:${CH_CERT_CN}")
fi
local san_line
san_line="$(IFS=,; echo "${san_entries[*]}")"
echo "${san_line}"
}
generate_self_signed_cert() {
echo "[INFO] generating self-signed server certificate (crt+key only) ..."
local server_key="${PKI_DIR}/server.key"
local server_csr="${PKI_DIR}/server.csr"
local server_crt="${PKI_DIR}/server.crt"
local ext_file="${PKI_DIR}/server.ext"
local san_line
san_line="$(build_san_line)"
openssl genrsa -out "${server_key}" 2048
openssl req -new -key "${server_key}" -out "${server_csr}" -subj "/CN=${CH_CERT_CN}"
cat >"${ext_file}" <<EOF
subjectAltName=${san_line}
keyUsage=digitalSignature,keyEncipherment
extendedKeyUsage=serverAuth
EOF
openssl x509 -req -in "${server_csr}" -signkey "${server_key}" \
-out "${server_crt}" -days "${CH_CERT_DAYS}" -sha256 -extfile "${ext_file}"
cp -f "${server_crt}" "${SERVER_CERT}"
cp -f "${server_key}" "${SERVER_KEY}"
rm -f "${CA_CERT}"
}
generate_cert_with_ca() {
echo "[INFO] generating local CA and server certificate ..."
local ca_key="${PKI_DIR}/ca.key"
local ca_crt="${PKI_DIR}/ca.crt"
local server_key="${PKI_DIR}/server.key"
local server_csr="${PKI_DIR}/server.csr"
local server_crt="${PKI_DIR}/server.crt"
local ext_file="${PKI_DIR}/server.ext"
local san_line
san_line="$(build_san_line)"
openssl genrsa -out "${ca_key}" 4096
openssl req -x509 -new -nodes -key "${ca_key}" -sha256 -days 3650 \
-out "${ca_crt}" -subj "/CN=ClickHouse Local CA"
openssl genrsa -out "${server_key}" 2048
openssl req -new -key "${server_key}" -out "${server_csr}" -subj "/CN=${CH_CERT_CN}"
cat >"${ext_file}" <<EOF
subjectAltName=${san_line}
keyUsage=digitalSignature,keyEncipherment
extendedKeyUsage=serverAuth
EOF
openssl x509 -req -in "${server_csr}" -CA "${ca_crt}" -CAkey "${ca_key}" -CAcreateserial \
-out "${server_crt}" -days "${CH_CERT_DAYS}" -sha256 -extfile "${ext_file}"
cp -f "${server_crt}" "${SERVER_CERT}"
cp -f "${server_key}" "${SERVER_KEY}"
cp -f "${ca_crt}" "${CA_CERT}"
}
if [[ -n "${SRC_CERT}" || -n "${SRC_KEY}" ]]; then
if [[ -z "${SRC_CERT}" || -z "${SRC_KEY}" ]]; then
echo "[ERROR] SRC_CERT and SRC_KEY must be provided together"
exit 1
fi
echo "[INFO] using provided certificate files ..."
cp -f "${SRC_CERT}" "${SERVER_CERT}"
cp -f "${SRC_KEY}" "${SERVER_KEY}"
if [[ -n "${SRC_CA}" ]]; then
cp -f "${SRC_CA}" "${CA_CERT}"
else
rm -f "${CA_CERT}"
fi
else
case "$(echo "${CH_GENERATE_CA}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on)
generate_cert_with_ca
;;
*)
generate_self_signed_cert
;;
esac
fi
chown clickhouse:clickhouse "${SERVER_CERT}" "${SERVER_KEY}" || true
chmod 0644 "${SERVER_CERT}"
chmod 0640 "${SERVER_KEY}"
if [[ -f "${CA_CERT}" ]]; then
chown clickhouse:clickhouse "${CA_CERT}" || true
chmod 0644 "${CA_CERT}"
fi
echo "[INFO] writing ClickHouse HTTPS override config ..."
cat >"${OVERRIDE_FILE}" <<EOF
<clickhouse>
<https_port>${CH_HTTPS_PORT}</https_port>
<listen_host>${CH_LISTEN_HOST}</listen_host>
<openSSL>
<server>
<certificateFile>${SERVER_CERT}</certificateFile>
<privateKeyFile>${SERVER_KEY}</privateKeyFile>
<verificationMode>none</verificationMode>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<invalidCertificateHandler>
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</server>
</openSSL>
</clickhouse>
EOF
echo "[INFO] restarting clickhouse-server ..."
systemctl restart clickhouse-server
sleep 2
echo "[INFO] service status ..."
systemctl --no-pager -l status clickhouse-server | sed -n '1,15p'
echo "[INFO] verifying HTTPS endpoint ..."
curl -sk "https://127.0.0.1:${CH_HTTPS_PORT}/?query=SELECT%201" || true
echo
echo "[OK] ClickHouse HTTPS setup finished"
echo " HTTPS port : ${CH_HTTPS_PORT}"
echo " cert file : ${SERVER_CERT}"
echo " key file : ${SERVER_KEY}"
if [[ -f "${CA_CERT}" ]]; then
echo " CA file : ${CA_CERT}"
echo " import this CA file into API/Fluent Bit hosts if tls.verify=On"
fi

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SQL_FILE="${SCRIPT_DIR}/init_waf_logs_tables.sql"
if [[ ! -f "${SQL_FILE}" ]]; then
echo "[ERROR] SQL file not found: ${SQL_FILE}"
exit 1
fi
if ! command -v clickhouse-client >/dev/null 2>&1; then
echo "[ERROR] clickhouse-client not found. Please install ClickHouse client first."
exit 1
fi
CH_HOST="${CH_HOST:-127.0.0.1}"
CH_PORT="${CH_PORT:-9000}"
CH_USER="${CH_USER:-default}"
CH_PASSWORD="${CH_PASSWORD:-}"
CH_DATABASE="${CH_DATABASE:-default}"
args=(--host "${CH_HOST}" --port "${CH_PORT}" --user "${CH_USER}")
if [[ -n "${CH_PASSWORD}" ]]; then
args+=(--password "${CH_PASSWORD}")
fi
echo "[INFO] creating database if not exists: ${CH_DATABASE}"
clickhouse-client "${args[@]}" --query "CREATE DATABASE IF NOT EXISTS ${CH_DATABASE}"
echo "[INFO] initializing tables in database: ${CH_DATABASE}"
clickhouse-client "${args[@]}" --database "${CH_DATABASE}" < "${SQL_FILE}"
echo "[INFO] checking table status ..."
clickhouse-client "${args[@]}" --database "${CH_DATABASE}" --query \
"SELECT name, engine FROM system.tables WHERE database='${CH_DATABASE}' AND name IN ('logs_ingest','dns_logs_ingest') ORDER BY name"
echo "[OK] ClickHouse ingest tables are ready in database '${CH_DATABASE}'"

View File

@@ -0,0 +1,69 @@
-- Initialize HTTP and DNS ingest tables for GoEdge access logs.
-- Run with:
-- clickhouse-client --database <db_name> < init_waf_logs_tables.sql
CREATE TABLE IF NOT EXISTS logs_ingest
(
timestamp DateTime CODEC(DoubleDelta, ZSTD(1)),
node_id UInt64,
cluster_id UInt64,
server_id UInt64,
host LowCardinality(String),
ip String,
method LowCardinality(String),
path String CODEC(ZSTD(1)),
status UInt16,
bytes_in UInt64 CODEC(Delta, ZSTD(1)),
bytes_out UInt64 CODEC(Delta, ZSTD(1)),
cost_ms UInt32 CODEC(Delta, ZSTD(1)),
ua String CODEC(ZSTD(1)),
referer String CODEC(ZSTD(1)),
log_type LowCardinality(String),
trace_id String,
firewall_policy_id UInt64 DEFAULT 0,
firewall_rule_group_id UInt64 DEFAULT 0,
firewall_rule_set_id UInt64 DEFAULT 0,
firewall_rule_id UInt64 DEFAULT 0,
request_headers String CODEC(ZSTD(3)) DEFAULT '',
request_body String CODEC(ZSTD(3)) DEFAULT '',
response_headers String CODEC(ZSTD(3)) DEFAULT '',
response_body String CODEC(ZSTD(3)) DEFAULT '',
INDEX idx_trace_id trace_id TYPE bloom_filter(0.01) GRANULARITY 4,
INDEX idx_ip ip TYPE bloom_filter(0.01) GRANULARITY 4,
INDEX idx_host host TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4,
INDEX idx_fw_policy firewall_policy_id TYPE minmax GRANULARITY 4,
INDEX idx_status status TYPE minmax GRANULARITY 4
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp, node_id, server_id, trace_id)
SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS dns_logs_ingest
(
timestamp DateTime CODEC(DoubleDelta, ZSTD(1)),
request_id String,
node_id UInt64,
cluster_id UInt64,
domain_id UInt64,
record_id UInt64,
remote_addr String,
question_name String,
question_type LowCardinality(String),
record_name String,
record_type LowCardinality(String),
record_value String,
networking LowCardinality(String),
is_recursive UInt8,
error String CODEC(ZSTD(1)),
ns_route_codes Array(String),
content_json String CODEC(ZSTD(3)) DEFAULT '',
INDEX idx_request_id request_id TYPE bloom_filter(0.01) GRANULARITY 4,
INDEX idx_remote_addr remote_addr TYPE bloom_filter(0.01) GRANULARITY 4,
INDEX idx_question_name question_name TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4,
INDEX idx_domain_id domain_id TYPE minmax GRANULARITY 4
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp, request_id, node_id)
SETTINGS index_granularity = 8192;

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ "${EUID}" -ne 0 ]]; then
echo "[ERROR] please run as root"
exit 1
fi
if [[ ! -f /etc/os-release ]]; then
echo "[ERROR] /etc/os-release not found"
exit 1
fi
# shellcheck disable=SC1091
source /etc/os-release
os_id="$(echo "${ID:-}" | tr '[:upper:]' '[:lower:]')"
os_ver="${VERSION_ID:-}"
is_ubuntu22=false
is_amzn2023=false
if [[ "${os_id}" == "ubuntu" && "${os_ver}" == 22.04* ]]; then
is_ubuntu22=true
fi
if [[ "${os_id}" == "amzn" && "${os_ver}" == 2023* ]]; then
is_amzn2023=true
fi
if [[ "${is_ubuntu22}" != "true" && "${is_amzn2023}" != "true" ]]; then
echo "[ERROR] only Ubuntu 22.04 or Amazon Linux 2023 is supported. current: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"
exit 1
fi
if [[ "${is_ubuntu22}" == "true" ]]; then
echo "[INFO] detected Ubuntu 22.04"
echo "[INFO] installing prerequisites ..."
apt-get update -y
DEBIAN_FRONTEND=noninteractive apt-get install -y curl ca-certificates gnupg apt-transport-https lsb-release
echo "[INFO] configuring ClickHouse apt repository ..."
install -d -m 0755 /etc/apt/keyrings
if [[ ! -f /etc/apt/keyrings/clickhouse.gpg ]]; then
curl -fsSL https://packages.clickhouse.com/CLICKHOUSE-KEY.GPG | gpg --dearmor -o /etc/apt/keyrings/clickhouse.gpg
fi
cat >/etc/apt/sources.list.d/clickhouse.list <<'EOF'
deb [signed-by=/etc/apt/keyrings/clickhouse.gpg arch=amd64,arm64] https://packages.clickhouse.com/deb stable main
EOF
echo "[INFO] installing clickhouse-server and clickhouse-client ..."
apt-get update -y
DEBIAN_FRONTEND=noninteractive apt-get install -y clickhouse-server clickhouse-client clickhouse-common-static
fi
if [[ "${is_amzn2023}" == "true" ]]; then
echo "[INFO] detected Amazon Linux 2023"
echo "[INFO] installing prerequisites ..."
dnf makecache -y
dnf install -y curl ca-certificates gnupg2 dnf-plugins-core
echo "[INFO] configuring ClickHouse yum repository ..."
cat >/etc/yum.repos.d/clickhouse.repo <<'EOF'
[clickhouse-stable]
name=ClickHouse Stable Repository
baseurl=https://packages.clickhouse.com/rpm/stable/$basearch
enabled=1
gpgcheck=1
gpgkey=https://packages.clickhouse.com/rpm/stable/repodata/repomd.xml.key
https://packages.clickhouse.com/rpm/clickhouse-static.key
EOF
echo "[INFO] installing clickhouse-server and clickhouse-client ..."
dnf clean all
dnf makecache -y
if ! dnf install -y clickhouse-server clickhouse-client clickhouse-common-static; then
dnf install -y clickhouse-server clickhouse-client
fi
fi
echo "[INFO] enabling clickhouse-server ..."
systemctl enable clickhouse-server >/dev/null 2>&1 || true
systemctl restart clickhouse-server
sleep 2
if [[ -n "${CLICKHOUSE_DEFAULT_PASSWORD:-}" ]]; then
echo "[INFO] setting default user password ..."
if [[ "${CLICKHOUSE_DEFAULT_PASSWORD}" == *"'"* ]]; then
echo "[ERROR] CLICKHOUSE_DEFAULT_PASSWORD contains single quote, please set password manually with clickhouse-client"
exit 1
fi
clickhouse-client --query "ALTER USER default IDENTIFIED WITH plaintext_password BY '${CLICKHOUSE_DEFAULT_PASSWORD}'"
fi
echo "[INFO] health check ..."
clickhouse-client --query "SELECT version()"
echo "[OK] ClickHouse install completed: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"

View File

@@ -0,0 +1,123 @@
-- =============================================================================
-- ClickHouse logs_ingest 表优化脚本
--
-- 说明:
-- - 所有 ALTER 操作均为在线操作,无需停服
-- - 建议按阶段顺序执行,每阶段执行后观察 system.parts 确认生效
-- - 压缩编解码器变更仅影响新写入的 part存量数据需等 merge 或手动 OPTIMIZE
--
-- 执行方式:
-- clickhouse-client --host 127.0.0.1 --port 9000 --user default --password 'xxx' < optimize_schema.sql
-- =============================================================================
-- =============================================
-- 阶段 1大字段压缩优化效果最显著
-- =============================================
-- 大文本字段改用 ZSTD(3),对 JSON / HTTP 文本压缩率远优于默认 LZ4
-- 预期效果:磁盘占用减少 40%-60%
ALTER TABLE logs_ingest MODIFY COLUMN request_headers String CODEC(ZSTD(3));
ALTER TABLE logs_ingest MODIFY COLUMN request_body String CODEC(ZSTD(3));
ALTER TABLE logs_ingest MODIFY COLUMN response_headers String CODEC(ZSTD(3));
ALTER TABLE logs_ingest MODIFY COLUMN response_body String CODEC(ZSTD(3));
-- 中等长度文本字段用 ZSTD(1),平衡压缩率与 CPU 开销
ALTER TABLE logs_ingest MODIFY COLUMN ua String CODEC(ZSTD(1));
ALTER TABLE logs_ingest MODIFY COLUMN path String CODEC(ZSTD(1));
ALTER TABLE logs_ingest MODIFY COLUMN referer String CODEC(ZSTD(1));
-- 低基数字段改用 LowCardinality内存+磁盘双降)
-- method 的基数极低GET/POST/PUT/DELETE 等host 基数取决于站点数量
ALTER TABLE logs_ingest MODIFY COLUMN method LowCardinality(String);
ALTER TABLE logs_ingest MODIFY COLUMN log_type LowCardinality(String);
ALTER TABLE logs_ingest MODIFY COLUMN host LowCardinality(String);
-- 数值字段使用 Delta + ZSTD 编码(利用相邻行的时间/大小相关性)
ALTER TABLE logs_ingest MODIFY COLUMN bytes_in UInt64 CODEC(Delta, ZSTD(1));
ALTER TABLE logs_ingest MODIFY COLUMN bytes_out UInt64 CODEC(Delta, ZSTD(1));
ALTER TABLE logs_ingest MODIFY COLUMN cost_ms UInt32 CODEC(Delta, ZSTD(1));
-- =============================================
-- 阶段 2添加 Skipping Index加速高频过滤查询
-- =============================================
-- trace_id 精确查找(查看日志详情 FindByTraceId
-- bloom_filter(0.01) = 1% 误判率GRANULARITY 4 = 每 4 个 granule 一个 bloom block
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_trace_id trace_id TYPE bloom_filter(0.01) GRANULARITY 4;
-- IP 精确查找
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_ip ip TYPE bloom_filter(0.01) GRANULARITY 4;
-- host 模糊查询支持tokenbf_v1 对 LIKE '%xxx%' 有效)
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_host host TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
-- firewall_policy_id 过滤HasFirewallPolicy: > 0
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_fw_policy firewall_policy_id TYPE minmax GRANULARITY 4;
-- status 范围过滤HasError: status >= 400
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_status status TYPE minmax GRANULARITY 4;
-- =============================================
-- 阶段 3物化索引到现有数据对存量数据生效
-- =============================================
-- 注意MATERIALIZE INDEX 会触发后台 mutation大表可能需要一定时间
-- 可通过 SELECT * FROM system.mutations WHERE is_done = 0 监控进度
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_trace_id;
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_ip;
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_host;
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_fw_policy;
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_status;
-- =============================================================================
-- dns_logs_ingest 表优化DNS 日志表)
-- =============================================================================
-- 大文本字段压缩
ALTER TABLE dns_logs_ingest MODIFY COLUMN content_json String CODEC(ZSTD(3));
ALTER TABLE dns_logs_ingest MODIFY COLUMN error String CODEC(ZSTD(1));
-- 低基数字段
ALTER TABLE dns_logs_ingest MODIFY COLUMN question_type LowCardinality(String);
ALTER TABLE dns_logs_ingest MODIFY COLUMN record_type LowCardinality(String);
ALTER TABLE dns_logs_ingest MODIFY COLUMN networking LowCardinality(String);
-- request_id 精确查找
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_request_id request_id TYPE bloom_filter(0.01) GRANULARITY 4;
-- remote_addr 精确查找
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_remote_addr remote_addr TYPE bloom_filter(0.01) GRANULARITY 4;
-- question_name 模糊查询
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_question_name question_name TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
-- domain_id 过滤
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_domain_id domain_id TYPE minmax GRANULARITY 4;
-- 物化索引到现有数据
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_request_id;
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_remote_addr;
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_question_name;
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_domain_id;
-- =============================================================================
-- 验证命令(执行完上述 ALTER 后运行)
-- =============================================================================
-- 查看列的压缩编解码器
-- SELECT name, type, compression_codec FROM system.columns WHERE table = 'logs_ingest' AND database = currentDatabase();
-- 查看表的压缩率
-- SELECT table, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed, round(sum(data_uncompressed_bytes) / sum(data_compressed_bytes), 2) AS ratio FROM system.columns WHERE table IN ('logs_ingest', 'dns_logs_ingest') GROUP BY table;
-- 查看各列占用的磁盘空间(找出最大的列)
-- SELECT name, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed FROM system.columns WHERE table = 'logs_ingest' GROUP BY name ORDER BY sum(data_compressed_bytes) DESC;
-- 查看 mutation 进度
-- SELECT database, table, mutation_id, command, is_done, parts_to_do FROM system.mutations WHERE is_done = 0;
-- 强制触发 merge可选让压缩编解码器变更对存量数据生效
-- OPTIMIZE TABLE logs_ingest FINAL;
-- OPTIMIZE TABLE dns_logs_ingest FINAL;