引入lumberjack和fluentbit自动分发

This commit is contained in:
robin
2026-02-13 22:36:17 +08:00
parent c6da67db79
commit e9093baffb
47 changed files with 4589 additions and 317 deletions

111
deploy/clickhouse/README.md Normal file
View File

@@ -0,0 +1,111 @@
# ClickHouse + Fluent Bit 使用手册Ubuntu 22.04 / Amazon Linux 2023
## 1. 支持范围
- Ubuntu 22.04
- Amazon Linux 2023AWS
安装脚本:`install_clickhouse_linux.sh`(自动识别上述系统)。
## 2. 安装 ClickHouse
```bash
cd /path/to/waf-platform/deploy/clickhouse
chmod +x install_clickhouse_linux.sh
sudo ./install_clickhouse_linux.sh
```
可选:安装时初始化 `default` 用户密码:
```bash
sudo CLICKHOUSE_DEFAULT_PASSWORD='YourStrongPassword' ./install_clickhouse_linux.sh
```
## 3. 开启 HTTPS默认仅 crt+key
脚本默认生成 `server.crt + server.key`(带 SAN并启用 8443
```bash
cd /path/to/waf-platform/deploy/clickhouse
chmod +x configure_clickhouse_https.sh
sudo CH_HTTPS_PORT=8443 \
CH_CERT_CN=clickhouse.example.com \
CH_CERT_DNS=clickhouse.example.com \
CH_CERT_IP=<CLICKHOUSE_IP> \
./configure_clickhouse_https.sh
```
使用已有证书:
```bash
sudo SRC_CERT=/path/to/server.crt \
SRC_KEY=/path/to/server.key \
CH_HTTPS_PORT=8443 \
./configure_clickhouse_https.sh
```
## 4. 初始化日志表(含优化)
```bash
cd /path/to/waf-platform/deploy/clickhouse
chmod +x init_waf_logs_tables.sh
sudo CH_HOST=127.0.0.1 \
CH_PORT=9000 \
CH_USER=default \
CH_PASSWORD='YourStrongPassword' \
CH_DATABASE=default \
./init_waf_logs_tables.sh
```
说明:
- `init_waf_logs_tables.sql` 已内置主要优化(`CODEC``LowCardinality`、跳数索引)。
- `optimize_schema.sql` 主要用于历史表补齐优化,不是首次建表必需步骤。
## 5. 平台侧配置EdgeAdmin
在 ClickHouse 设置页配置:
- HostClickHouse 地址
- Port`8443`
- Database`default`
- Scheme`https`
当前实现说明:
- 前端不再提供 `TLS跳过校验``TLS Server Name` 配置项。
- 后端固定 `TLSSkipVerify=true`(默认不校验证书)。
保存后点击“测试连接”。
## 6. Fluent Bit 配置方式
推荐平台托管模式(在线安装/升级 Node、DNS 时自动下发):
- `/etc/fluent-bit/fluent-bit.conf`
- `/etc/fluent-bit/.edge-managed.env`
- `/etc/fluent-bit/.edge-managed.json`
检查状态:
```bash
sudo systemctl status fluent-bit --no-pager
sudo cat /etc/fluent-bit/.edge-managed.json
```
## 7. 验证与排障
查看 Fluent Bit 日志:
```bash
sudo journalctl -u fluent-bit -f
```
查看写入:
```sql
SELECT count() FROM default.logs_ingest;
SELECT count() FROM default.dns_logs_ingest;
```
常见错误:
- `connection refused`8443 未监听或网络未放行。
- `legacy Common Name`:证书缺 SAN需重签。

View File

@@ -0,0 +1,227 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ "${EUID}" -ne 0 ]]; then
echo "[ERROR] please run as root"
exit 1
fi
if [[ ! -f /etc/os-release ]]; then
echo "[ERROR] /etc/os-release not found"
exit 1
fi
# shellcheck disable=SC1091
source /etc/os-release
os_id="$(echo "${ID:-}" | tr '[:upper:]' '[:lower:]')"
os_ver="${VERSION_ID:-}"
is_ubuntu22=false
is_amzn2023=false
if [[ "${os_id}" == "ubuntu" && "${os_ver}" == 22.04* ]]; then
is_ubuntu22=true
fi
if [[ "${os_id}" == "amzn" && "${os_ver}" == 2023* ]]; then
is_amzn2023=true
fi
if [[ "${is_ubuntu22}" != "true" && "${is_amzn2023}" != "true" ]]; then
echo "[ERROR] only Ubuntu 22.04 or Amazon Linux 2023 is supported. current: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"
exit 1
fi
if ! command -v openssl >/dev/null 2>&1 || ! command -v curl >/dev/null 2>&1; then
if [[ "${is_ubuntu22}" == "true" ]]; then
apt-get update -y
DEBIAN_FRONTEND=noninteractive apt-get install -y openssl curl ca-certificates
else
dnf makecache -y
dnf install -y openssl curl ca-certificates
fi
fi
CH_HTTPS_PORT="${CH_HTTPS_PORT:-8443}"
CH_LISTEN_HOST="${CH_LISTEN_HOST:-::}"
CH_CERT_CN="${CH_CERT_CN:-$(hostname -f 2>/dev/null || hostname)}"
CH_CERT_DNS="${CH_CERT_DNS:-}"
CH_CERT_IP="${CH_CERT_IP:-}"
CH_CERT_DAYS="${CH_CERT_DAYS:-825}"
CH_GENERATE_CA="${CH_GENERATE_CA:-false}"
SRC_CERT="${SRC_CERT:-}"
SRC_KEY="${SRC_KEY:-}"
SRC_CA="${SRC_CA:-}"
CH_DIR="/etc/clickhouse-server"
CH_CONFIG_D_DIR="${CH_DIR}/config.d"
PKI_DIR="${CH_DIR}/pki"
SERVER_CERT="${CH_DIR}/server.crt"
SERVER_KEY="${CH_DIR}/server.key"
CA_CERT="${CH_DIR}/ca.crt"
OVERRIDE_FILE="${CH_CONFIG_D_DIR}/waf-https.xml"
mkdir -p "${CH_CONFIG_D_DIR}" "${PKI_DIR}"
split_csv() {
local raw="$1"
if [[ -z "${raw}" ]]; then
return 0
fi
IFS=',' read -r -a arr <<<"${raw}"
for item in "${arr[@]}"; do
item="$(echo "${item}" | xargs)"
if [[ -n "${item}" ]]; then
echo "${item}"
fi
done
}
build_san_line() {
local san_entries=()
while IFS= read -r dns_item; do
san_entries+=("DNS:${dns_item}")
done < <(split_csv "${CH_CERT_DNS}")
while IFS= read -r ip_item; do
san_entries+=("IP:${ip_item}")
done < <(split_csv "${CH_CERT_IP}")
if [[ ${#san_entries[@]} -eq 0 ]]; then
san_entries+=("DNS:${CH_CERT_CN}")
fi
local san_line
san_line="$(IFS=,; echo "${san_entries[*]}")"
echo "${san_line}"
}
generate_self_signed_cert() {
echo "[INFO] generating self-signed server certificate (crt+key only) ..."
local server_key="${PKI_DIR}/server.key"
local server_csr="${PKI_DIR}/server.csr"
local server_crt="${PKI_DIR}/server.crt"
local ext_file="${PKI_DIR}/server.ext"
local san_line
san_line="$(build_san_line)"
openssl genrsa -out "${server_key}" 2048
openssl req -new -key "${server_key}" -out "${server_csr}" -subj "/CN=${CH_CERT_CN}"
cat >"${ext_file}" <<EOF
subjectAltName=${san_line}
keyUsage=digitalSignature,keyEncipherment
extendedKeyUsage=serverAuth
EOF
openssl x509 -req -in "${server_csr}" -signkey "${server_key}" \
-out "${server_crt}" -days "${CH_CERT_DAYS}" -sha256 -extfile "${ext_file}"
cp -f "${server_crt}" "${SERVER_CERT}"
cp -f "${server_key}" "${SERVER_KEY}"
rm -f "${CA_CERT}"
}
generate_cert_with_ca() {
echo "[INFO] generating local CA and server certificate ..."
local ca_key="${PKI_DIR}/ca.key"
local ca_crt="${PKI_DIR}/ca.crt"
local server_key="${PKI_DIR}/server.key"
local server_csr="${PKI_DIR}/server.csr"
local server_crt="${PKI_DIR}/server.crt"
local ext_file="${PKI_DIR}/server.ext"
local san_line
san_line="$(build_san_line)"
openssl genrsa -out "${ca_key}" 4096
openssl req -x509 -new -nodes -key "${ca_key}" -sha256 -days 3650 \
-out "${ca_crt}" -subj "/CN=ClickHouse Local CA"
openssl genrsa -out "${server_key}" 2048
openssl req -new -key "${server_key}" -out "${server_csr}" -subj "/CN=${CH_CERT_CN}"
cat >"${ext_file}" <<EOF
subjectAltName=${san_line}
keyUsage=digitalSignature,keyEncipherment
extendedKeyUsage=serverAuth
EOF
openssl x509 -req -in "${server_csr}" -CA "${ca_crt}" -CAkey "${ca_key}" -CAcreateserial \
-out "${server_crt}" -days "${CH_CERT_DAYS}" -sha256 -extfile "${ext_file}"
cp -f "${server_crt}" "${SERVER_CERT}"
cp -f "${server_key}" "${SERVER_KEY}"
cp -f "${ca_crt}" "${CA_CERT}"
}
if [[ -n "${SRC_CERT}" || -n "${SRC_KEY}" ]]; then
if [[ -z "${SRC_CERT}" || -z "${SRC_KEY}" ]]; then
echo "[ERROR] SRC_CERT and SRC_KEY must be provided together"
exit 1
fi
echo "[INFO] using provided certificate files ..."
cp -f "${SRC_CERT}" "${SERVER_CERT}"
cp -f "${SRC_KEY}" "${SERVER_KEY}"
if [[ -n "${SRC_CA}" ]]; then
cp -f "${SRC_CA}" "${CA_CERT}"
else
rm -f "${CA_CERT}"
fi
else
case "$(echo "${CH_GENERATE_CA}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on)
generate_cert_with_ca
;;
*)
generate_self_signed_cert
;;
esac
fi
chown clickhouse:clickhouse "${SERVER_CERT}" "${SERVER_KEY}" || true
chmod 0644 "${SERVER_CERT}"
chmod 0640 "${SERVER_KEY}"
if [[ -f "${CA_CERT}" ]]; then
chown clickhouse:clickhouse "${CA_CERT}" || true
chmod 0644 "${CA_CERT}"
fi
echo "[INFO] writing ClickHouse HTTPS override config ..."
cat >"${OVERRIDE_FILE}" <<EOF
<clickhouse>
<https_port>${CH_HTTPS_PORT}</https_port>
<listen_host>${CH_LISTEN_HOST}</listen_host>
<openSSL>
<server>
<certificateFile>${SERVER_CERT}</certificateFile>
<privateKeyFile>${SERVER_KEY}</privateKeyFile>
<verificationMode>none</verificationMode>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<invalidCertificateHandler>
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</server>
</openSSL>
</clickhouse>
EOF
echo "[INFO] restarting clickhouse-server ..."
systemctl restart clickhouse-server
sleep 2
echo "[INFO] service status ..."
systemctl --no-pager -l status clickhouse-server | sed -n '1,15p'
echo "[INFO] verifying HTTPS endpoint ..."
curl -sk "https://127.0.0.1:${CH_HTTPS_PORT}/?query=SELECT%201" || true
echo
echo "[OK] ClickHouse HTTPS setup finished"
echo " HTTPS port : ${CH_HTTPS_PORT}"
echo " cert file : ${SERVER_CERT}"
echo " key file : ${SERVER_KEY}"
if [[ -f "${CA_CERT}" ]]; then
echo " CA file : ${CA_CERT}"
echo " import this CA file into API/Fluent Bit hosts if tls.verify=On"
fi

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SQL_FILE="${SCRIPT_DIR}/init_waf_logs_tables.sql"
if [[ ! -f "${SQL_FILE}" ]]; then
echo "[ERROR] SQL file not found: ${SQL_FILE}"
exit 1
fi
if ! command -v clickhouse-client >/dev/null 2>&1; then
echo "[ERROR] clickhouse-client not found. Please install ClickHouse client first."
exit 1
fi
CH_HOST="${CH_HOST:-127.0.0.1}"
CH_PORT="${CH_PORT:-9000}"
CH_USER="${CH_USER:-default}"
CH_PASSWORD="${CH_PASSWORD:-}"
CH_DATABASE="${CH_DATABASE:-default}"
args=(--host "${CH_HOST}" --port "${CH_PORT}" --user "${CH_USER}")
if [[ -n "${CH_PASSWORD}" ]]; then
args+=(--password "${CH_PASSWORD}")
fi
echo "[INFO] creating database if not exists: ${CH_DATABASE}"
clickhouse-client "${args[@]}" --query "CREATE DATABASE IF NOT EXISTS ${CH_DATABASE}"
echo "[INFO] initializing tables in database: ${CH_DATABASE}"
clickhouse-client "${args[@]}" --database "${CH_DATABASE}" < "${SQL_FILE}"
echo "[INFO] checking table status ..."
clickhouse-client "${args[@]}" --database "${CH_DATABASE}" --query \
"SELECT name, engine FROM system.tables WHERE database='${CH_DATABASE}' AND name IN ('logs_ingest','dns_logs_ingest') ORDER BY name"
echo "[OK] ClickHouse ingest tables are ready in database '${CH_DATABASE}'"

View File

@@ -0,0 +1,69 @@
-- Initialize HTTP and DNS ingest tables for GoEdge access logs.
-- Run with:
-- clickhouse-client --database <db_name> < init_waf_logs_tables.sql
CREATE TABLE IF NOT EXISTS logs_ingest
(
timestamp DateTime CODEC(DoubleDelta, ZSTD(1)),
node_id UInt64,
cluster_id UInt64,
server_id UInt64,
host LowCardinality(String),
ip String,
method LowCardinality(String),
path String CODEC(ZSTD(1)),
status UInt16,
bytes_in UInt64 CODEC(Delta, ZSTD(1)),
bytes_out UInt64 CODEC(Delta, ZSTD(1)),
cost_ms UInt32 CODEC(Delta, ZSTD(1)),
ua String CODEC(ZSTD(1)),
referer String CODEC(ZSTD(1)),
log_type LowCardinality(String),
trace_id String,
firewall_policy_id UInt64 DEFAULT 0,
firewall_rule_group_id UInt64 DEFAULT 0,
firewall_rule_set_id UInt64 DEFAULT 0,
firewall_rule_id UInt64 DEFAULT 0,
request_headers String CODEC(ZSTD(3)) DEFAULT '',
request_body String CODEC(ZSTD(3)) DEFAULT '',
response_headers String CODEC(ZSTD(3)) DEFAULT '',
response_body String CODEC(ZSTD(3)) DEFAULT '',
INDEX idx_trace_id trace_id TYPE bloom_filter(0.01) GRANULARITY 4,
INDEX idx_ip ip TYPE bloom_filter(0.01) GRANULARITY 4,
INDEX idx_host host TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4,
INDEX idx_fw_policy firewall_policy_id TYPE minmax GRANULARITY 4,
INDEX idx_status status TYPE minmax GRANULARITY 4
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp, node_id, server_id, trace_id)
SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS dns_logs_ingest
(
timestamp DateTime CODEC(DoubleDelta, ZSTD(1)),
request_id String,
node_id UInt64,
cluster_id UInt64,
domain_id UInt64,
record_id UInt64,
remote_addr String,
question_name String,
question_type LowCardinality(String),
record_name String,
record_type LowCardinality(String),
record_value String,
networking LowCardinality(String),
is_recursive UInt8,
error String CODEC(ZSTD(1)),
ns_route_codes Array(String),
content_json String CODEC(ZSTD(3)) DEFAULT '',
INDEX idx_request_id request_id TYPE bloom_filter(0.01) GRANULARITY 4,
INDEX idx_remote_addr remote_addr TYPE bloom_filter(0.01) GRANULARITY 4,
INDEX idx_question_name question_name TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4,
INDEX idx_domain_id domain_id TYPE minmax GRANULARITY 4
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp, request_id, node_id)
SETTINGS index_granularity = 8192;

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ "${EUID}" -ne 0 ]]; then
echo "[ERROR] please run as root"
exit 1
fi
if [[ ! -f /etc/os-release ]]; then
echo "[ERROR] /etc/os-release not found"
exit 1
fi
# shellcheck disable=SC1091
source /etc/os-release
os_id="$(echo "${ID:-}" | tr '[:upper:]' '[:lower:]')"
os_ver="${VERSION_ID:-}"
is_ubuntu22=false
is_amzn2023=false
if [[ "${os_id}" == "ubuntu" && "${os_ver}" == 22.04* ]]; then
is_ubuntu22=true
fi
if [[ "${os_id}" == "amzn" && "${os_ver}" == 2023* ]]; then
is_amzn2023=true
fi
if [[ "${is_ubuntu22}" != "true" && "${is_amzn2023}" != "true" ]]; then
echo "[ERROR] only Ubuntu 22.04 or Amazon Linux 2023 is supported. current: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"
exit 1
fi
if [[ "${is_ubuntu22}" == "true" ]]; then
echo "[INFO] detected Ubuntu 22.04"
echo "[INFO] installing prerequisites ..."
apt-get update -y
DEBIAN_FRONTEND=noninteractive apt-get install -y curl ca-certificates gnupg apt-transport-https lsb-release
echo "[INFO] configuring ClickHouse apt repository ..."
install -d -m 0755 /etc/apt/keyrings
if [[ ! -f /etc/apt/keyrings/clickhouse.gpg ]]; then
curl -fsSL https://packages.clickhouse.com/CLICKHOUSE-KEY.GPG | gpg --dearmor -o /etc/apt/keyrings/clickhouse.gpg
fi
cat >/etc/apt/sources.list.d/clickhouse.list <<'EOF'
deb [signed-by=/etc/apt/keyrings/clickhouse.gpg arch=amd64,arm64] https://packages.clickhouse.com/deb stable main
EOF
echo "[INFO] installing clickhouse-server and clickhouse-client ..."
apt-get update -y
DEBIAN_FRONTEND=noninteractive apt-get install -y clickhouse-server clickhouse-client clickhouse-common-static
fi
if [[ "${is_amzn2023}" == "true" ]]; then
echo "[INFO] detected Amazon Linux 2023"
echo "[INFO] installing prerequisites ..."
dnf makecache -y
dnf install -y curl ca-certificates gnupg2 dnf-plugins-core
echo "[INFO] configuring ClickHouse yum repository ..."
cat >/etc/yum.repos.d/clickhouse.repo <<'EOF'
[clickhouse-stable]
name=ClickHouse Stable Repository
baseurl=https://packages.clickhouse.com/rpm/stable/$basearch
enabled=1
gpgcheck=1
gpgkey=https://packages.clickhouse.com/rpm/stable/repodata/repomd.xml.key
https://packages.clickhouse.com/rpm/clickhouse-static.key
EOF
echo "[INFO] installing clickhouse-server and clickhouse-client ..."
dnf clean all
dnf makecache -y
if ! dnf install -y clickhouse-server clickhouse-client clickhouse-common-static; then
dnf install -y clickhouse-server clickhouse-client
fi
fi
echo "[INFO] enabling clickhouse-server ..."
systemctl enable clickhouse-server >/dev/null 2>&1 || true
systemctl restart clickhouse-server
sleep 2
if [[ -n "${CLICKHOUSE_DEFAULT_PASSWORD:-}" ]]; then
echo "[INFO] setting default user password ..."
if [[ "${CLICKHOUSE_DEFAULT_PASSWORD}" == *"'"* ]]; then
echo "[ERROR] CLICKHOUSE_DEFAULT_PASSWORD contains single quote, please set password manually with clickhouse-client"
exit 1
fi
clickhouse-client --query "ALTER USER default IDENTIFIED WITH plaintext_password BY '${CLICKHOUSE_DEFAULT_PASSWORD}'"
fi
echo "[INFO] health check ..."
clickhouse-client --query "SELECT version()"
echo "[OK] ClickHouse install completed: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"

View File

@@ -0,0 +1,123 @@
-- =============================================================================
-- ClickHouse logs_ingest 表优化脚本
--
-- 说明:
-- - 所有 ALTER 操作均为在线操作,无需停服
-- - 建议按阶段顺序执行,每阶段执行后观察 system.parts 确认生效
-- - 压缩编解码器变更仅影响新写入的 part存量数据需等 merge 或手动 OPTIMIZE
--
-- 执行方式:
-- clickhouse-client --host 127.0.0.1 --port 9000 --user default --password 'xxx' < optimize_schema.sql
-- =============================================================================
-- =============================================
-- 阶段 1大字段压缩优化效果最显著
-- =============================================
-- 大文本字段改用 ZSTD(3),对 JSON / HTTP 文本压缩率远优于默认 LZ4
-- 预期效果:磁盘占用减少 40%-60%
ALTER TABLE logs_ingest MODIFY COLUMN request_headers String CODEC(ZSTD(3));
ALTER TABLE logs_ingest MODIFY COLUMN request_body String CODEC(ZSTD(3));
ALTER TABLE logs_ingest MODIFY COLUMN response_headers String CODEC(ZSTD(3));
ALTER TABLE logs_ingest MODIFY COLUMN response_body String CODEC(ZSTD(3));
-- 中等长度文本字段用 ZSTD(1),平衡压缩率与 CPU 开销
ALTER TABLE logs_ingest MODIFY COLUMN ua String CODEC(ZSTD(1));
ALTER TABLE logs_ingest MODIFY COLUMN path String CODEC(ZSTD(1));
ALTER TABLE logs_ingest MODIFY COLUMN referer String CODEC(ZSTD(1));
-- 低基数字段改用 LowCardinality内存+磁盘双降)
-- method 的基数极低GET/POST/PUT/DELETE 等host 基数取决于站点数量
ALTER TABLE logs_ingest MODIFY COLUMN method LowCardinality(String);
ALTER TABLE logs_ingest MODIFY COLUMN log_type LowCardinality(String);
ALTER TABLE logs_ingest MODIFY COLUMN host LowCardinality(String);
-- 数值字段使用 Delta + ZSTD 编码(利用相邻行的时间/大小相关性)
ALTER TABLE logs_ingest MODIFY COLUMN bytes_in UInt64 CODEC(Delta, ZSTD(1));
ALTER TABLE logs_ingest MODIFY COLUMN bytes_out UInt64 CODEC(Delta, ZSTD(1));
ALTER TABLE logs_ingest MODIFY COLUMN cost_ms UInt32 CODEC(Delta, ZSTD(1));
-- =============================================
-- 阶段 2添加 Skipping Index加速高频过滤查询
-- =============================================
-- trace_id 精确查找(查看日志详情 FindByTraceId
-- bloom_filter(0.01) = 1% 误判率GRANULARITY 4 = 每 4 个 granule 一个 bloom block
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_trace_id trace_id TYPE bloom_filter(0.01) GRANULARITY 4;
-- IP 精确查找
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_ip ip TYPE bloom_filter(0.01) GRANULARITY 4;
-- host 模糊查询支持tokenbf_v1 对 LIKE '%xxx%' 有效)
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_host host TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
-- firewall_policy_id 过滤HasFirewallPolicy: > 0
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_fw_policy firewall_policy_id TYPE minmax GRANULARITY 4;
-- status 范围过滤HasError: status >= 400
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_status status TYPE minmax GRANULARITY 4;
-- =============================================
-- 阶段 3物化索引到现有数据对存量数据生效
-- =============================================
-- 注意MATERIALIZE INDEX 会触发后台 mutation大表可能需要一定时间
-- 可通过 SELECT * FROM system.mutations WHERE is_done = 0 监控进度
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_trace_id;
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_ip;
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_host;
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_fw_policy;
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_status;
-- =============================================================================
-- dns_logs_ingest 表优化DNS 日志表)
-- =============================================================================
-- 大文本字段压缩
ALTER TABLE dns_logs_ingest MODIFY COLUMN content_json String CODEC(ZSTD(3));
ALTER TABLE dns_logs_ingest MODIFY COLUMN error String CODEC(ZSTD(1));
-- 低基数字段
ALTER TABLE dns_logs_ingest MODIFY COLUMN question_type LowCardinality(String);
ALTER TABLE dns_logs_ingest MODIFY COLUMN record_type LowCardinality(String);
ALTER TABLE dns_logs_ingest MODIFY COLUMN networking LowCardinality(String);
-- request_id 精确查找
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_request_id request_id TYPE bloom_filter(0.01) GRANULARITY 4;
-- remote_addr 精确查找
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_remote_addr remote_addr TYPE bloom_filter(0.01) GRANULARITY 4;
-- question_name 模糊查询
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_question_name question_name TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
-- domain_id 过滤
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_domain_id domain_id TYPE minmax GRANULARITY 4;
-- 物化索引到现有数据
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_request_id;
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_remote_addr;
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_question_name;
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_domain_id;
-- =============================================================================
-- 验证命令(执行完上述 ALTER 后运行)
-- =============================================================================
-- 查看列的压缩编解码器
-- SELECT name, type, compression_codec FROM system.columns WHERE table = 'logs_ingest' AND database = currentDatabase();
-- 查看表的压缩率
-- SELECT table, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed, round(sum(data_uncompressed_bytes) / sum(data_compressed_bytes), 2) AS ratio FROM system.columns WHERE table IN ('logs_ingest', 'dns_logs_ingest') GROUP BY table;
-- 查看各列占用的磁盘空间(找出最大的列)
-- SELECT name, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed FROM system.columns WHERE table = 'logs_ingest' GROUP BY name ORDER BY sum(data_compressed_bytes) DESC;
-- 查看 mutation 进度
-- SELECT database, table, mutation_id, command, is_done, parts_to_do FROM system.mutations WHERE is_done = 0;
-- 强制触发 merge可选让压缩编解码器变更对存量数据生效
-- OPTIMIZE TABLE logs_ingest FINAL;
-- OPTIMIZE TABLE dns_logs_ingest FINAL;

View File

@@ -21,7 +21,14 @@
- **边缘节点EdgeNode** 已开启本地日志落盘,目录优先取“公用访问日志策略”的文件 `path`(取目录),为空时回退 `EDGE_LOG_DIR`,再回退默认 `/var/log/edge/edge-node`;生成 `access.log``waf.log``error.log`JSON Lines
- **DNS 节点EdgeDNS** 已开启本地日志落盘,目录优先取“公用访问日志策略”的文件 `path`(取目录),为空时回退 `EDGE_DNS_LOG_DIR`,再回退默认 `/var/log/edge/edge-dns`;生成 `access.log`JSON Lines
- **ClickHouse** 已安装并可访问(单机或集群),且已创建好 `logs_ingest`见下文「五、ClickHouse 建表」)。
- 若 Fluent Bit 与 ClickHouse 不在同一台机,需保证网络可达(默认 HTTP 端口 8123
- 若 Fluent Bit 与 ClickHouse 不在同一台机,需保证网络可达(默认 HTTPS 端口 8443
- 日志轮转默认由 Node/DNS 内建 `lumberjack` 执行:
- `maxSizeMB=256`
- `maxBackups=14`
- `maxAgeDays=7`
- `compress=false`
- `localTime=true`
可通过公用日志策略 `file.rotate` 调整。
---
@@ -71,7 +78,7 @@ sudo cp fluent-bit.conf clickhouse-upstream.conf /etc/fluent-bit/
编辑 `clickhouse-upstream.conf`,按实际环境填写 ClickHouse 的 Host/Port
- **单机**:保留一个 `[NODE]`,改 `Host``Port`(默认 8123
- **单机**:保留一个 `[NODE]`,改 `Host``Port`(默认 8443
- **集群**:复制多段 `[NODE]`,每段一个节点,例如:
```ini
@@ -81,12 +88,12 @@ sudo cp fluent-bit.conf clickhouse-upstream.conf /etc/fluent-bit/
[NODE]
Name node-01
Host 192.168.1.10
Port 8123
Port 8443
[NODE]
Name node-02
Host 192.168.1.11
Port 8123
Port 8443
```
### 3.3 ClickHouse 账号密码(有密码时必做)
@@ -319,9 +326,9 @@ Fluent Bit 写入时使用 `json_date_key timestamp` 和 `json_date_format epoch
| 组件 | 说明 |
|------|------|
| **EdgeNode** | 日志落盘路径优先复用公用访问日志策略文件 `path`(取目录);若为空回退 `EDGE_LOG_DIR`,再回退默认 `/var/log/edge/edge-node`;生成 `access.log`、`waf.log`、`error.log`支持 SIGHUP 重开句柄,可与 logrotate 的 `copytruncate` 配合。 |
| **EdgeNode** | 日志落盘路径优先复用公用访问日志策略文件 `path`(取目录);若为空回退 `EDGE_LOG_DIR`,再回退默认 `/var/log/edge/edge-node`;生成 `access.log`、`waf.log`、`error.log`内建 lumberjack 轮转(默认 256MB/14份/7天可按策略调整仍支持 SIGHUP 重建 writer。 |
| **EdgeDNS** | DNS 访问日志落盘路径优先复用公用访问日志策略文件 `path`(取目录);若为空回退 `EDGE_DNS_LOG_DIR`,再回退默认 `/var/log/edge/edge-dns`;生成 `access.log`JSON Lines由 Fluent Bit 采集写入 `dns_logs_ingest`。 |
| **logrotate** | 使用 `deploy/fluent-bit/logrotate.conf` 示例做轮转,避免磁盘占满。 |
| **logrotate** | 可选的历史兼容方案(已非必需);默认建议使用节点内建 lumberjack 轮转。 |
| **平台EdgeAPI** | 配置 ClickHouse 只读连接(`CLICKHOUSE_HOST`、`CLICKHOUSE_PORT`、`CLICKHOUSE_USER`、`CLICKHOUSE_PASSWORD`、`CLICKHOUSE_DATABASE`);当请求带 `Day` 且已配置 ClickHouse 时,访问日志列表查询走 ClickHouse。 |
---
@@ -411,3 +418,54 @@ sudo systemctl restart fluent-bit
```
回滚后恢复原 HTTP 模式,不影响平台 API/管理端配置。
---
## 十、平台托管模式(推荐)
从 `v1.4.7` 开始Node/DNS 在线安装流程会由平台托管 Fluent Bit默认不再要求逐台手改 `/etc/fluent-bit/fluent-bit.conf`。
### 10.1 托管行为
- 安装器优先使用发布包内置离线包(不走 `curl | sh`)。
- 首次安装后写入:
- `/etc/fluent-bit/fluent-bit.conf`
- `/etc/fluent-bit/parsers.conf`
- `/etc/fluent-bit/.edge-managed.env`
- `/etc/fluent-bit/.edge-managed.json`
- 配置发生变化时按 `hash` 幂等更新,仅在内容变化时重启服务。
- Node 与 DNS 同机安装时会自动合并角色,输出单份配置。
### 10.2 托管元数据
平台会维护 `/etc/fluent-bit/.edge-managed.json`,核心字段:
- `roles`: 当前机器启用角色(`node`/`dns`
- `hash`: 当前托管配置摘要
- `sourceVersion`: 平台版本号
- `updatedAt`: 最近更新时间戳
### 10.3 支持矩阵(离线包)
当前固定支持以下平台键:
- `ubuntu22.04-amd64`
- `ubuntu22.04-arm64`
- `amzn2023-amd64`
- `amzn2023-arm64`
构建阶段会校验矩阵包是否齐全,缺失会直接失败并打印期望文件路径。
### 10.4 手工配置兼容
- 若现有 `fluent-bit.conf` 不是平台托管文件(不含 `managed-by-edgeapi` 标记),安装器不会强制覆盖,会返回明确错误提示。
- 需要切到托管模式时,先备份旧配置,再由平台触发一次安装/更新任务。
### 10.5 Resource Profile Notes (New)
- Managed default is now tuned for `2C4G` nodes (conservative and stable).
- Additional sample profiles are provided for larger nodes:
- `deploy/fluent-bit/fluent-bit-sample-4c8g.conf`
- `deploy/fluent-bit/fluent-bit-sample-8c16g.conf`
- These sample files are for benchmark/reference only and are not auto-applied by installer.
- To use higher profiles in managed mode, sync those parameters into `EdgeAPI/internal/installers/fluent_bit.go` and then trigger node reinstall/upgrade.

View File

@@ -8,4 +8,4 @@
[NODE]
Name node-01
Host 127.0.0.1
Port 8123
Port 8443

View File

@@ -1,4 +1,4 @@
# DNS 节点专用:使用 HTTP 输出写入 ClickHouse无需 out_clickhouse 插件)
# DNS 节点专用:使用 HTTPS 输出写入 ClickHouse无需 out_clickhouse 插件)
# 启动前设置CH_USER、CH_PASSWORD若 ClickHouse 不在本机,请修改 Host、Port
# Read_from_Head=true首次启动会发送已有日志若只采新日志建议改为 false
@@ -26,11 +26,15 @@
Name http
Match app.dns.logs
Host 127.0.0.1
Port 8123
Port 8443
URI /?query=INSERT%20INTO%20default.dns_logs_ingest%20FORMAT%20JSONEachRow
Format json_lines
http_user ${CH_USER}
http_passwd ${CH_PASSWORD}
tls On
tls.verify On
# tls.ca_file /etc/ssl/certs/ca-certificates.crt
# tls.vhost clickhouse.example.com
json_date_key timestamp
json_date_format epoch
Retry_Limit 10

View File

@@ -0,0 +1,69 @@
# Sample profile for 4C8G nodes (Node + DNS on same host).
# Replace Host/Port/URI and credentials according to your ClickHouse deployment.
[SERVICE]
Flush 1
Log_Level info
Parsers_File parsers.conf
storage.path /var/lib/fluent-bit/storage
storage.sync normal
storage.checksum off
storage.backlog.mem_limit 512MB
[INPUT]
Name tail
Path /var/log/edge/edge-node/*.log
Tag app.http.logs
Parser json
Refresh_Interval 2
Read_from_Head false
DB /var/lib/fluent-bit/http-logs.db
storage.type filesystem
Mem_Buf_Limit 256MB
Skip_Long_Lines On
[INPUT]
Name tail
Path /var/log/edge/edge-dns/*.log
Tag app.dns.logs
Parser json
Refresh_Interval 2
Read_from_Head false
DB /var/lib/fluent-bit/dns-logs.db
storage.type filesystem
Mem_Buf_Limit 256MB
Skip_Long_Lines On
[OUTPUT]
Name http
Match app.http.logs
Host 127.0.0.1
Port 8443
URI /?query=INSERT%20INTO%20default.logs_ingest%20FORMAT%20JSONEachRow
Format json_lines
http_user ${CH_USER}
http_passwd ${CH_PASSWORD}
json_date_key timestamp
json_date_format epoch
workers 2
net.keepalive On
Retry_Limit False
tls On
tls.verify On
[OUTPUT]
Name http
Match app.dns.logs
Host 127.0.0.1
Port 8443
URI /?query=INSERT%20INTO%20default.dns_logs_ingest%20FORMAT%20JSONEachRow
Format json_lines
http_user ${CH_USER}
http_passwd ${CH_PASSWORD}
json_date_key timestamp
json_date_format epoch
workers 2
net.keepalive On
Retry_Limit False
tls On
tls.verify On

View File

@@ -0,0 +1,69 @@
# Sample profile for 8C16G nodes (Node + DNS on same host).
# Replace Host/Port/URI and credentials according to your ClickHouse deployment.
[SERVICE]
Flush 1
Log_Level info
Parsers_File parsers.conf
storage.path /var/lib/fluent-bit/storage
storage.sync normal
storage.checksum off
storage.backlog.mem_limit 1024MB
[INPUT]
Name tail
Path /var/log/edge/edge-node/*.log
Tag app.http.logs
Parser json
Refresh_Interval 1
Read_from_Head false
DB /var/lib/fluent-bit/http-logs.db
storage.type filesystem
Mem_Buf_Limit 512MB
Skip_Long_Lines On
[INPUT]
Name tail
Path /var/log/edge/edge-dns/*.log
Tag app.dns.logs
Parser json
Refresh_Interval 1
Read_from_Head false
DB /var/lib/fluent-bit/dns-logs.db
storage.type filesystem
Mem_Buf_Limit 512MB
Skip_Long_Lines On
[OUTPUT]
Name http
Match app.http.logs
Host 127.0.0.1
Port 8443
URI /?query=INSERT%20INTO%20default.logs_ingest%20FORMAT%20JSONEachRow
Format json_lines
http_user ${CH_USER}
http_passwd ${CH_PASSWORD}
json_date_key timestamp
json_date_format epoch
workers 4
net.keepalive On
Retry_Limit False
tls On
tls.verify On
[OUTPUT]
Name http
Match app.dns.logs
Host 127.0.0.1
Port 8443
URI /?query=INSERT%20INTO%20default.dns_logs_ingest%20FORMAT%20JSONEachRow
Format json_lines
http_user ${CH_USER}
http_passwd ${CH_PASSWORD}
json_date_key timestamp
json_date_format epoch
workers 4
net.keepalive On
Retry_Limit False
tls On
tls.verify On