chore: sync local changes
This commit is contained in:
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
# Local runtime/state artifacts (do not commit)
|
||||
default.etcd/
|
||||
**/.DS_Store
|
||||
deploy/fluent-bit/logs.db
|
||||
deploy/fluent-bit/logs.db-shm
|
||||
deploy/fluent-bit/logs.db-wal
|
||||
deploy/fluent-bit/storage/
|
||||
@@ -28,7 +28,6 @@ const (
|
||||
fluentBitParsersFile = "/etc/fluent-bit/parsers.conf"
|
||||
fluentBitManagedMetaFile = "/etc/fluent-bit/.edge-managed.json"
|
||||
fluentBitManagedEnvFile = "/etc/fluent-bit/.edge-managed.env"
|
||||
fluentBitLogrotateFile = "/etc/logrotate.d/edge-goedge"
|
||||
fluentBitDropInDir = "/etc/systemd/system/fluent-bit.service.d"
|
||||
fluentBitDropInFile = "/etc/systemd/system/fluent-bit.service.d/edge-managed.conf"
|
||||
fluentBitServiceName = "fluent-bit"
|
||||
@@ -85,7 +84,7 @@ func (this *BaseInstaller) SetupFluentBit(role nodeconfigs.NodeRole) error {
|
||||
return err
|
||||
}
|
||||
|
||||
_, stderr, err := this.client.Exec("mkdir -p " + shQuote(fluentBitConfigDir) + " " + shQuote(fluentBitStorageDir) + " /etc/logrotate.d")
|
||||
_, stderr, err := this.client.Exec("mkdir -p " + shQuote(fluentBitConfigDir) + " " + shQuote(fluentBitStorageDir))
|
||||
if err != nil {
|
||||
return fmt.Errorf("prepare fluent-bit directories failed: %w, stderr: %s", err, stderr)
|
||||
}
|
||||
@@ -536,13 +535,6 @@ func (this *BaseInstaller) applyManagedConfig(tempDir string, desired *fluentBit
|
||||
return false, err
|
||||
}
|
||||
|
||||
localLogrotate := filepath.Join(Tea.Root, "deploy", "fluent-bit", "logrotate.conf")
|
||||
if _, err := os.Stat(localLogrotate); err == nil {
|
||||
if err := this.copyLocalFileToRemote(tempDir, localLogrotate, fluentBitLogrotateFile, 0644); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -566,13 +558,13 @@ func renderManagedConfig(desired *fluentBitDesiredConfig) (string, error) {
|
||||
lines := []string{
|
||||
"# " + fluentBitManagedMarker,
|
||||
"[SERVICE]",
|
||||
" Flush 2",
|
||||
" Flush 1",
|
||||
" Log_Level info",
|
||||
" Parsers_File " + fluentBitParsersFile,
|
||||
" storage.path " + fluentBitStorageDir,
|
||||
" storage.sync normal",
|
||||
" storage.checksum off",
|
||||
" storage.backlog.mem_limit 256MB",
|
||||
" storage.backlog.mem_limit 512MB",
|
||||
"",
|
||||
}
|
||||
|
||||
@@ -587,7 +579,7 @@ func renderManagedConfig(desired *fluentBitDesiredConfig) (string, error) {
|
||||
" Read_from_Head false",
|
||||
" DB /var/lib/fluent-bit/http-logs.db",
|
||||
" storage.type filesystem",
|
||||
" Mem_Buf_Limit 128MB",
|
||||
" Mem_Buf_Limit 256MB",
|
||||
" Skip_Long_Lines On",
|
||||
"",
|
||||
)
|
||||
@@ -604,7 +596,7 @@ func renderManagedConfig(desired *fluentBitDesiredConfig) (string, error) {
|
||||
" Read_from_Head false",
|
||||
" DB /var/lib/fluent-bit/dns-logs.db",
|
||||
" storage.type filesystem",
|
||||
" Mem_Buf_Limit 128MB",
|
||||
" Mem_Buf_Limit 256MB",
|
||||
" Skip_Long_Lines On",
|
||||
"",
|
||||
)
|
||||
@@ -623,7 +615,7 @@ func renderManagedConfig(desired *fluentBitDesiredConfig) (string, error) {
|
||||
" http_passwd ${CH_PASSWORD}",
|
||||
" json_date_key timestamp",
|
||||
" json_date_format epoch",
|
||||
" workers 1",
|
||||
" workers 2",
|
||||
" net.keepalive On",
|
||||
" Retry_Limit False",
|
||||
)
|
||||
@@ -654,7 +646,7 @@ func renderManagedConfig(desired *fluentBitDesiredConfig) (string, error) {
|
||||
" http_passwd ${CH_PASSWORD}",
|
||||
" json_date_key timestamp",
|
||||
" json_date_format epoch",
|
||||
" workers 1",
|
||||
" workers 2",
|
||||
" net.keepalive On",
|
||||
" Retry_Limit False",
|
||||
)
|
||||
|
||||
BIN
EdgeAdmin/.DS_Store
vendored
BIN
EdgeAdmin/.DS_Store
vendored
Binary file not shown.
@@ -108,7 +108,7 @@ function build() {
|
||||
unzip -q "$(basename "$EDGE_API_ZIP_FILE")"
|
||||
rm -f "$(basename "$EDGE_API_ZIP_FILE")"
|
||||
|
||||
# ensure edge-api package always contains fluent-bit templates/packages
|
||||
# ensure edge-api package always contains fluent-bit runtime assets/packages
|
||||
FLUENT_ROOT="$ROOT/../../deploy/fluent-bit"
|
||||
FLUENT_DIST="$DIST/edge-api/deploy/fluent-bit"
|
||||
if [ -d "$FLUENT_ROOT" ]; then
|
||||
@@ -117,17 +117,7 @@ function build() {
|
||||
mkdir -p "$FLUENT_DIST"
|
||||
|
||||
FLUENT_FILES=(
|
||||
"fluent-bit.conf"
|
||||
"fluent-bit-dns.conf"
|
||||
"fluent-bit-https.conf"
|
||||
"fluent-bit-dns-https.conf"
|
||||
"fluent-bit-windows.conf"
|
||||
"fluent-bit-windows-https.conf"
|
||||
"parsers.conf"
|
||||
"clickhouse-upstream.conf"
|
||||
"clickhouse-upstream-windows.conf"
|
||||
"logrotate.conf"
|
||||
"README.md"
|
||||
)
|
||||
for file in "${FLUENT_FILES[@]}"; do
|
||||
if [ -f "$FLUENT_ROOT/$file" ]; then
|
||||
|
||||
BIN
EdgeCommon/build/.DS_Store
vendored
BIN
EdgeCommon/build/.DS_Store
vendored
Binary file not shown.
BIN
EdgeDNS/.DS_Store
vendored
BIN
EdgeDNS/.DS_Store
vendored
Binary file not shown.
@@ -112,7 +112,7 @@ function copy_fluent_bit_assets() {
|
||||
rm -rf "$FLUENT_DIST"
|
||||
mkdir -p "$FLUENT_DIST"
|
||||
|
||||
for file in fluent-bit.conf fluent-bit-dns.conf fluent-bit-https.conf fluent-bit-dns-https.conf fluent-bit-windows.conf fluent-bit-windows-https.conf parsers.conf clickhouse-upstream.conf clickhouse-upstream-windows.conf logrotate.conf README.md; do
|
||||
for file in fluent-bit.conf fluent-bit-dns.conf fluent-bit-https.conf fluent-bit-dns-https.conf fluent-bit-windows.conf fluent-bit-windows-https.conf parsers.conf clickhouse-upstream.conf clickhouse-upstream-windows.conf README.md; do
|
||||
if [ -f "$FLUENT_ROOT/$file" ]; then
|
||||
cp "$FLUENT_ROOT/$file" "$FLUENT_DIST/"
|
||||
fi
|
||||
|
||||
BIN
EdgeNode/.DS_Store
vendored
BIN
EdgeNode/.DS_Store
vendored
Binary file not shown.
@@ -186,7 +186,7 @@ function copy_fluent_bit_assets() {
|
||||
rm -rf "$FLUENT_DIST"
|
||||
mkdir -p "$FLUENT_DIST"
|
||||
|
||||
for file in fluent-bit.conf fluent-bit-dns.conf fluent-bit-https.conf fluent-bit-dns-https.conf fluent-bit-windows.conf fluent-bit-windows-https.conf parsers.conf clickhouse-upstream.conf clickhouse-upstream-windows.conf logrotate.conf README.md; do
|
||||
for file in fluent-bit.conf fluent-bit-dns.conf fluent-bit-https.conf fluent-bit-dns-https.conf fluent-bit-windows.conf fluent-bit-windows-https.conf parsers.conf clickhouse-upstream.conf clickhouse-upstream-windows.conf README.md; do
|
||||
if [ -f "$FLUENT_ROOT/$file" ]; then
|
||||
cp "$FLUENT_ROOT/$file" "$FLUENT_DIST/"
|
||||
fi
|
||||
|
||||
BIN
EdgeNode/dist/.DS_Store
vendored
BIN
EdgeNode/dist/.DS_Store
vendored
Binary file not shown.
BIN
EdgeUser/.DS_Store
vendored
BIN
EdgeUser/.DS_Store
vendored
Binary file not shown.
BIN
EdgeUser/dist/.DS_Store
vendored
BIN
EdgeUser/dist/.DS_Store
vendored
Binary file not shown.
Binary file not shown.
1290
HTTPDNS_技术实施方案.md
1290
HTTPDNS_技术实施方案.md
File diff suppressed because it is too large
Load Diff
Binary file not shown.
BIN
default.etcd/.DS_Store
vendored
BIN
default.etcd/.DS_Store
vendored
Binary file not shown.
BIN
default.etcd/member/.DS_Store
vendored
BIN
default.etcd/member/.DS_Store
vendored
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,111 +1,197 @@
|
||||
# ClickHouse + Fluent Bit 使用手册(Ubuntu 22.04 / Amazon Linux 2023)
|
||||
# ClickHouse + Fluent Bit 快速部署(Ubuntu 22.04 / Amazon Linux 2023)
|
||||
|
||||
## 1. 支持范围
|
||||
## 1. 脚本说明
|
||||
|
||||
- Ubuntu 22.04
|
||||
- Amazon Linux 2023(AWS)
|
||||
|
||||
安装脚本:`install_clickhouse_linux.sh`(自动识别上述系统)。
|
||||
|
||||
## 2. 安装 ClickHouse
|
||||
- `setup_clickhouse.sh`:一键入口(推荐),默认顺序执行 安装 ClickHouse -> 配置 HTTPS -> 应用运行参数 -> 初始化日志表。
|
||||
- `install_clickhouse_linux.sh`:安装 `clickhouse-server`、`clickhouse-client`,并启动服务。
|
||||
- `configure_clickhouse_https.sh`:生成自签名 `server.crt + server.key`,写入 HTTPS 配置并重启服务。
|
||||
- `configure_clickhouse_runtime.sh`:默认将日志级别设为 `warning`,并禁用高开销系统日志表(`text_log`、`part_log`、`metric_log`、`asynchronous_metric_log`、`trace_log`)。
|
||||
- `init_waf_logs_tables.sh`:执行建表脚本。
|
||||
- `init_waf_logs_tables.sql`:`logs_ingest`、`dns_logs_ingest` 表结构定义。
|
||||
|
||||
进入脚本所在目录
|
||||
```bash
|
||||
cd /path/to/waf-platform/deploy/clickhouse
|
||||
chmod +x install_clickhouse_linux.sh
|
||||
sudo ./install_clickhouse_linux.sh
|
||||
cd /opt/waf-platform/deploy/clickhouse
|
||||
chmod +x setup_clickhouse.sh
|
||||
```
|
||||
|
||||
可选:安装时初始化 `default` 用户密码:
|
||||
## 2. 一键部署
|
||||
|
||||
### 2.1 方式A:不设置 ClickHouse 密码(用户名固定 `default`)
|
||||
|
||||
```bash
|
||||
sudo CLICKHOUSE_DEFAULT_PASSWORD='YourStrongPassword' ./install_clickhouse_linux.sh
|
||||
```
|
||||
|
||||
## 3. 开启 HTTPS(默认仅 crt+key)
|
||||
|
||||
脚本默认生成 `server.crt + server.key`(带 SAN)并启用 8443:
|
||||
|
||||
```bash
|
||||
cd /path/to/waf-platform/deploy/clickhouse
|
||||
chmod +x configure_clickhouse_https.sh
|
||||
sudo CH_HTTPS_PORT=8443 \
|
||||
CH_CERT_CN=clickhouse.example.com \
|
||||
CH_CERT_DNS=clickhouse.example.com \
|
||||
CH_CERT_IP=<CLICKHOUSE_IP> \
|
||||
./configure_clickhouse_https.sh
|
||||
```
|
||||
|
||||
使用已有证书:
|
||||
|
||||
```bash
|
||||
sudo SRC_CERT=/path/to/server.crt \
|
||||
SRC_KEY=/path/to/server.key \
|
||||
CH_HTTPS_PORT=8443 \
|
||||
./configure_clickhouse_https.sh
|
||||
```
|
||||
|
||||
## 4. 初始化日志表(含优化)
|
||||
|
||||
```bash
|
||||
cd /path/to/waf-platform/deploy/clickhouse
|
||||
chmod +x init_waf_logs_tables.sh
|
||||
sudo CH_HOST=127.0.0.1 \
|
||||
CH_PORT=9000 \
|
||||
CH_USER=default \
|
||||
CH_PASSWORD='YourStrongPassword' \
|
||||
CH_DATABASE=default \
|
||||
./init_waf_logs_tables.sh
|
||||
sudo ./setup_clickhouse.sh
|
||||
```
|
||||
|
||||
说明:
|
||||
- `init_waf_logs_tables.sql` 已内置主要优化(`CODEC`、`LowCardinality`、跳数索引)。
|
||||
- `optimize_schema.sql` 主要用于历史表补齐优化,不是首次建表必需步骤。
|
||||
- ClickHouse 连接用户是 `default`
|
||||
- 未设置密码时,后续平台连接密码留空
|
||||
|
||||
## 5. 平台侧配置(EdgeAdmin)
|
||||
### 2.2 方式B:设置用户名/密码(示例使用 `default`)
|
||||
|
||||
在 ClickHouse 设置页配置:
|
||||
```bash
|
||||
sudo CH_USER='default' \
|
||||
CH_PASSWORD='YourStrongPassword' \
|
||||
CH_DATABASE='default' \
|
||||
./setup_clickhouse.sh
|
||||
```
|
||||
|
||||
- Host:ClickHouse 地址
|
||||
- Port:`8443`
|
||||
- Database:`default`
|
||||
- Scheme:`https`
|
||||
说明:
|
||||
- `CH_USER`/`CH_PASSWORD`:初始化日志表时用于连接 ClickHouse
|
||||
- 如果你使用自定义用户,把 `CH_USER` 改为你的用户名,并保证该用户已有对应数据库权限
|
||||
|
||||
当前实现说明:
|
||||
- 前端不再提供 `TLS跳过校验` 和 `TLS Server Name` 配置项。
|
||||
- 后端固定 `TLSSkipVerify=true`(默认不校验证书)。
|
||||
可选:单独应用运行参数(日志级别/系统日志表开关):
|
||||
|
||||
保存后点击“测试连接”。
|
||||
```bash
|
||||
sudo CH_LOG_LEVEL=warning ./setup_clickhouse.sh runtime
|
||||
```
|
||||
|
||||
## 6. Fluent Bit 配置方式
|
||||
## 3. ClickHouse 安装后关键目录
|
||||
|
||||
推荐平台托管模式(在线安装/升级 Node、DNS 时自动下发):
|
||||
- 配置目录:`/etc/clickhouse-server/`
|
||||
- 客户端配置目录:`/etc/clickhouse-client/`
|
||||
- 数据目录:`/var/lib/clickhouse/`
|
||||
- 日志目录:`/var/log/clickhouse-server/`
|
||||
- HTTPS 覆盖配置:`/etc/clickhouse-server/config.d/waf-https.xml`
|
||||
- 运行参数覆盖配置:`/etc/clickhouse-server/config.d/waf-runtime.xml`
|
||||
- HTTPS 证书和私钥:`/etc/clickhouse-server/server.crt`、`/etc/clickhouse-server/server.key`
|
||||
- 证书生成中间文件目录:`/etc/clickhouse-server/pki/`
|
||||
|
||||
- `/etc/fluent-bit/fluent-bit.conf`
|
||||
- `/etc/fluent-bit/.edge-managed.env`
|
||||
- `/etc/fluent-bit/.edge-managed.json`
|
||||
## 4. 管理平台配置(EdgeAdmin)
|
||||
|
||||
检查状态:
|
||||
页面路径:
|
||||
- 左侧菜单:`系统设置` -> `高级设置`
|
||||
- 顶部标签:`日志数据库(ClickHouse)`
|
||||
|
||||
表单填写:
|
||||
- `连接地址(Host)`:ClickHouse 地址(IP 或域名),如 `10.0.0.8` 或 `clickhouse.example.com`
|
||||
- `协议(Scheme)`:`https`
|
||||
- `端口(Port)`:`8443`
|
||||
- `用户名(User)`:`default`(或你自定义的用户名)
|
||||
- `密码(Password)`:对应用户密码
|
||||
- `数据库(Database)`:`default`(或你初始化日志表时使用的库名)
|
||||
|
||||
提交顺序:
|
||||
1. 点“测试连接”
|
||||
2. 连接成功后点“保存”
|
||||
|
||||
## 5. Fluent Bit(两种方式)
|
||||
|
||||
### 5.1 跟随节点在线自动安装(推荐)
|
||||
|
||||
说明:
|
||||
- Node / DNS 在线安装或升级时,平台会自动安装/升级 Fluent Bit 并下发配置。
|
||||
- 默认由平台托管,不需要逐台手改配置文件。
|
||||
|
||||
安装后所在节点关键文件:
|
||||
- `/etc/fluent-bit/fluent-bit.conf`:Fluent Bit 主配置(输入日志路径、输出 ClickHouse、性能参数)。
|
||||
- `/etc/fluent-bit/parsers.conf`:日志解析器定义(当前主要使用 JSON parser)。
|
||||
- `/etc/fluent-bit/.edge-managed.env`:平台下发的 ClickHouse 认证环境变量(`CH_USER`/`CH_PASSWORD`)。
|
||||
- `/etc/fluent-bit/.edge-managed.json`:平台下发的元数据(角色、配置哈希、版本、更新时间)。
|
||||
|
||||
|
||||
说明:
|
||||
- 在线安装时,节点上的 `/etc/fluent-bit/fluent-bit.conf` 会被平台下发覆盖。
|
||||
|
||||
fluent-bit中ClickHouse 账号密码下发与更新逻辑:
|
||||
- 下发来源:管理平台 -日志数据库(ClickHouse)中保存的账号密码。
|
||||
- 落地文件:平台在线安装或升级时写入节点 `/etc/fluent-bit/.edge-managed.env`,内容为 `CH_USER`、`CH_PASSWORD`。
|
||||
- 更新触发:当平台里的 ClickHouse 账号或密码变更后,需触发一次节点安装/升级任务以下发新凭证。
|
||||
|
||||
- 常见问题:只在 ClickHouse 侧改密码、未同步更新平台配置时,Fluent Bit 会出现认证失败(401/unauthorized)。
|
||||
|
||||
高配机器调优(当前默认按 4C8G 参数):
|
||||
- 当前默认参数:`Flush=1`、`storage.backlog.mem_limit=512MB`、`Mem_Buf_Limit=256MB`、`workers=2`。
|
||||
- 机器升配后优先调这 4 个参数:
|
||||
- `storage.backlog.mem_limit`:总缓冲上限(先增大,降低突发堆积丢日志风险)。
|
||||
- `Mem_Buf_Limit`:每个 tail input 的内存缓冲(HTTP 与 DNS 两段都要改)。
|
||||
- `workers`:输出并发写入线程数(HTTP 与 DNS 两段都要改)。
|
||||
- `Flush`:刷盘/发送间隔(值越小越实时,CPU/网络开销更高)。
|
||||
- 8C16G 参考值可按 `deploy/fluent-bit/fluent-bit-sample-8c16g.conf`:
|
||||
- `storage.backlog.mem_limit=1024MB`
|
||||
- `Mem_Buf_Limit=512MB`
|
||||
- `workers=4`
|
||||
- `Refresh_Interval=1`
|
||||
- 修改方法:
|
||||
1. 编辑 `EdgeAPI/internal/installers/fluent_bit.go` 的 `renderManagedConfig()`。
|
||||
2. 按上面参数同步修改 Node/DNS 两段 `[INPUT]` 和 `[OUTPUT]`。
|
||||
3. 重新发布 API 并触发节点安装/升级任务,下发新配置。
|
||||
|
||||
检查:
|
||||
|
||||
```bash
|
||||
sudo systemctl status fluent-bit --no-pager
|
||||
sudo cat /etc/fluent-bit/.edge-managed.json
|
||||
sudo journalctl -u fluent-bit -n 100 --no-pager
|
||||
```
|
||||
|
||||
## 7. 验证与排障
|
||||
### 5.2 手动安装(自动安装失败时)
|
||||
|
||||
查看 Fluent Bit 日志:
|
||||
说明:
|
||||
- 适合节点在线自动安装 Fluent Bit 失败的场景。
|
||||
- 采用在线安装方式,由你手动安装并维护配置。
|
||||
|
||||
步骤:
|
||||
|
||||
1. 在线安装 Fluent Bit。
|
||||
|
||||
Ubuntu 22.04:
|
||||
|
||||
```bash
|
||||
curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y fluent-bit
|
||||
```
|
||||
|
||||
AWS 2023:
|
||||
|
||||
```bash
|
||||
curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh
|
||||
sudo dnf makecache -y
|
||||
sudo dnf install -y fluent-bit
|
||||
```
|
||||
|
||||
2. 放置配置文件:
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /etc/fluent-bit
|
||||
sudo cp /opt/waf-platform/deploy/fluent-bit/fluent-bit.conf /etc/fluent-bit/
|
||||
sudo cp /opt/waf-platform/deploy/fluent-bit/clickhouse-upstream.conf /etc/fluent-bit/
|
||||
sudo cp /opt/waf-platform/deploy/fluent-bit/parsers.conf /etc/fluent-bit/
|
||||
```
|
||||
|
||||
3. 修改 `/etc/fluent-bit/clickhouse-upstream.conf` 的 ClickHouse `Host`、`Port`(如 `8443`)。
|
||||
4. 配置认证环境变量(按需):
|
||||
|
||||
```bash
|
||||
sudo tee /etc/fluent-bit/fluent-bit.env >/dev/null <<'EOF'
|
||||
CH_USER=default
|
||||
CH_PASSWORD=YourStrongPassword
|
||||
EOF
|
||||
```
|
||||
|
||||
5. 让 systemd 读取环境变量:
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /etc/systemd/system/fluent-bit.service.d
|
||||
sudo tee /etc/systemd/system/fluent-bit.service.d/override.conf >/dev/null <<'EOF'
|
||||
[Service]
|
||||
EnvironmentFile=/etc/fluent-bit/fluent-bit.env
|
||||
EOF
|
||||
```
|
||||
|
||||
6. 启动并检查:
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable fluent-bit
|
||||
sudo systemctl restart fluent-bit
|
||||
sudo systemctl status fluent-bit --no-pager
|
||||
sudo journalctl -u fluent-bit -n 100 --no-pager
|
||||
```
|
||||
|
||||
## 6. 验证
|
||||
|
||||
```bash
|
||||
sudo journalctl -u fluent-bit -f
|
||||
```
|
||||
|
||||
查看写入:
|
||||
|
||||
```sql
|
||||
SELECT count() FROM default.logs_ingest;
|
||||
SELECT count() FROM default.dns_logs_ingest;
|
||||
```
|
||||
|
||||
常见错误:
|
||||
- `connection refused`:8443 未监听或网络未放行。
|
||||
- `legacy Common Name`:证书缺 SAN,需重签。
|
||||
|
||||
@@ -46,18 +46,12 @@ CH_CERT_CN="${CH_CERT_CN:-$(hostname -f 2>/dev/null || hostname)}"
|
||||
CH_CERT_DNS="${CH_CERT_DNS:-}"
|
||||
CH_CERT_IP="${CH_CERT_IP:-}"
|
||||
CH_CERT_DAYS="${CH_CERT_DAYS:-825}"
|
||||
CH_GENERATE_CA="${CH_GENERATE_CA:-false}"
|
||||
|
||||
SRC_CERT="${SRC_CERT:-}"
|
||||
SRC_KEY="${SRC_KEY:-}"
|
||||
SRC_CA="${SRC_CA:-}"
|
||||
|
||||
CH_DIR="/etc/clickhouse-server"
|
||||
CH_CONFIG_D_DIR="${CH_DIR}/config.d"
|
||||
PKI_DIR="${CH_DIR}/pki"
|
||||
SERVER_CERT="${CH_DIR}/server.crt"
|
||||
SERVER_KEY="${CH_DIR}/server.key"
|
||||
CA_CERT="${CH_DIR}/ca.crt"
|
||||
OVERRIDE_FILE="${CH_CONFIG_D_DIR}/waf-https.xml"
|
||||
|
||||
mkdir -p "${CH_CONFIG_D_DIR}" "${PKI_DIR}"
|
||||
@@ -117,72 +111,13 @@ EOF
|
||||
|
||||
cp -f "${server_crt}" "${SERVER_CERT}"
|
||||
cp -f "${server_key}" "${SERVER_KEY}"
|
||||
rm -f "${CA_CERT}"
|
||||
}
|
||||
|
||||
generate_cert_with_ca() {
|
||||
echo "[INFO] generating local CA and server certificate ..."
|
||||
local ca_key="${PKI_DIR}/ca.key"
|
||||
local ca_crt="${PKI_DIR}/ca.crt"
|
||||
local server_key="${PKI_DIR}/server.key"
|
||||
local server_csr="${PKI_DIR}/server.csr"
|
||||
local server_crt="${PKI_DIR}/server.crt"
|
||||
local ext_file="${PKI_DIR}/server.ext"
|
||||
local san_line
|
||||
san_line="$(build_san_line)"
|
||||
|
||||
openssl genrsa -out "${ca_key}" 4096
|
||||
openssl req -x509 -new -nodes -key "${ca_key}" -sha256 -days 3650 \
|
||||
-out "${ca_crt}" -subj "/CN=ClickHouse Local CA"
|
||||
|
||||
openssl genrsa -out "${server_key}" 2048
|
||||
openssl req -new -key "${server_key}" -out "${server_csr}" -subj "/CN=${CH_CERT_CN}"
|
||||
|
||||
cat >"${ext_file}" <<EOF
|
||||
subjectAltName=${san_line}
|
||||
keyUsage=digitalSignature,keyEncipherment
|
||||
extendedKeyUsage=serverAuth
|
||||
EOF
|
||||
|
||||
openssl x509 -req -in "${server_csr}" -CA "${ca_crt}" -CAkey "${ca_key}" -CAcreateserial \
|
||||
-out "${server_crt}" -days "${CH_CERT_DAYS}" -sha256 -extfile "${ext_file}"
|
||||
|
||||
cp -f "${server_crt}" "${SERVER_CERT}"
|
||||
cp -f "${server_key}" "${SERVER_KEY}"
|
||||
cp -f "${ca_crt}" "${CA_CERT}"
|
||||
}
|
||||
|
||||
if [[ -n "${SRC_CERT}" || -n "${SRC_KEY}" ]]; then
|
||||
if [[ -z "${SRC_CERT}" || -z "${SRC_KEY}" ]]; then
|
||||
echo "[ERROR] SRC_CERT and SRC_KEY must be provided together"
|
||||
exit 1
|
||||
fi
|
||||
echo "[INFO] using provided certificate files ..."
|
||||
cp -f "${SRC_CERT}" "${SERVER_CERT}"
|
||||
cp -f "${SRC_KEY}" "${SERVER_KEY}"
|
||||
if [[ -n "${SRC_CA}" ]]; then
|
||||
cp -f "${SRC_CA}" "${CA_CERT}"
|
||||
else
|
||||
rm -f "${CA_CERT}"
|
||||
fi
|
||||
else
|
||||
case "$(echo "${CH_GENERATE_CA}" | tr '[:upper:]' '[:lower:]')" in
|
||||
1|true|yes|on)
|
||||
generate_cert_with_ca
|
||||
;;
|
||||
*)
|
||||
generate_self_signed_cert
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
chown clickhouse:clickhouse "${SERVER_CERT}" "${SERVER_KEY}" || true
|
||||
chmod 0644 "${SERVER_CERT}"
|
||||
chmod 0640 "${SERVER_KEY}"
|
||||
if [[ -f "${CA_CERT}" ]]; then
|
||||
chown clickhouse:clickhouse "${CA_CERT}" || true
|
||||
chmod 0644 "${CA_CERT}"
|
||||
fi
|
||||
|
||||
echo "[INFO] writing ClickHouse HTTPS override config ..."
|
||||
cat >"${OVERRIDE_FILE}" <<EOF
|
||||
@@ -221,7 +156,3 @@ echo "[OK] ClickHouse HTTPS setup finished"
|
||||
echo " HTTPS port : ${CH_HTTPS_PORT}"
|
||||
echo " cert file : ${SERVER_CERT}"
|
||||
echo " key file : ${SERVER_KEY}"
|
||||
if [[ -f "${CA_CERT}" ]]; then
|
||||
echo " CA file : ${CA_CERT}"
|
||||
echo " import this CA file into API/Fluent Bit hosts if tls.verify=On"
|
||||
fi
|
||||
|
||||
50
deploy/clickhouse/configure_clickhouse_runtime.sh
Normal file
50
deploy/clickhouse/configure_clickhouse_runtime.sh
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ "${EUID}" -ne 0 ]]; then
|
||||
echo "[ERROR] please run as root"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CH_LOG_LEVEL="${CH_LOG_LEVEL:-warning}"
|
||||
CH_DIR="/etc/clickhouse-server"
|
||||
CH_CONFIG_D_DIR="${CH_DIR}/config.d"
|
||||
OVERRIDE_FILE="${CH_CONFIG_D_DIR}/waf-runtime.xml"
|
||||
|
||||
case "${CH_LOG_LEVEL}" in
|
||||
none|fatal|critical|error|warning|notice|information|debug|trace|test)
|
||||
;;
|
||||
*)
|
||||
echo "[ERROR] invalid CH_LOG_LEVEL: ${CH_LOG_LEVEL}"
|
||||
echo " allowed: none,fatal,critical,error,warning,notice,information,debug,trace,test"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
mkdir -p "${CH_CONFIG_D_DIR}"
|
||||
|
||||
echo "[INFO] writing ClickHouse runtime override config ..."
|
||||
cat >"${OVERRIDE_FILE}" <<EOF
|
||||
<clickhouse>
|
||||
<logger>
|
||||
<level>${CH_LOG_LEVEL}</level>
|
||||
</logger>
|
||||
|
||||
<text_log remove="1"/>
|
||||
<part_log remove="1"/>
|
||||
<metric_log remove="1"/>
|
||||
<asynchronous_metric_log remove="1"/>
|
||||
<trace_log remove="1"/>
|
||||
</clickhouse>
|
||||
EOF
|
||||
|
||||
echo "[INFO] restarting clickhouse-server ..."
|
||||
systemctl restart clickhouse-server
|
||||
sleep 2
|
||||
|
||||
echo "[INFO] service status ..."
|
||||
systemctl --no-pager -l status clickhouse-server | sed -n '1,15p'
|
||||
|
||||
echo "[OK] ClickHouse runtime config applied"
|
||||
echo " file : ${OVERRIDE_FILE}"
|
||||
echo " logger level: ${CH_LOG_LEVEL}"
|
||||
@@ -1,123 +0,0 @@
|
||||
-- =============================================================================
|
||||
-- ClickHouse logs_ingest 表优化脚本
|
||||
--
|
||||
-- 说明:
|
||||
-- - 所有 ALTER 操作均为在线操作,无需停服
|
||||
-- - 建议按阶段顺序执行,每阶段执行后观察 system.parts 确认生效
|
||||
-- - 压缩编解码器变更仅影响新写入的 part,存量数据需等 merge 或手动 OPTIMIZE
|
||||
--
|
||||
-- 执行方式:
|
||||
-- clickhouse-client --host 127.0.0.1 --port 9000 --user default --password 'xxx' < optimize_schema.sql
|
||||
-- =============================================================================
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 1:大字段压缩优化(效果最显著)
|
||||
-- =============================================
|
||||
|
||||
-- 大文本字段改用 ZSTD(3),对 JSON / HTTP 文本压缩率远优于默认 LZ4
|
||||
-- 预期效果:磁盘占用减少 40%-60%
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN request_headers String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN request_body String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN response_headers String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN response_body String CODEC(ZSTD(3));
|
||||
|
||||
-- 中等长度文本字段用 ZSTD(1),平衡压缩率与 CPU 开销
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN ua String CODEC(ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN path String CODEC(ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN referer String CODEC(ZSTD(1));
|
||||
|
||||
-- 低基数字段改用 LowCardinality(内存+磁盘双降)
|
||||
-- method 的基数极低(GET/POST/PUT/DELETE 等),host 基数取决于站点数量
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN method LowCardinality(String);
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN log_type LowCardinality(String);
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN host LowCardinality(String);
|
||||
|
||||
-- 数值字段使用 Delta + ZSTD 编码(利用相邻行的时间/大小相关性)
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN bytes_in UInt64 CODEC(Delta, ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN bytes_out UInt64 CODEC(Delta, ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN cost_ms UInt32 CODEC(Delta, ZSTD(1));
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 2:添加 Skipping Index(加速高频过滤查询)
|
||||
-- =============================================
|
||||
|
||||
-- trace_id 精确查找(查看日志详情 FindByTraceId)
|
||||
-- bloom_filter(0.01) = 1% 误判率,GRANULARITY 4 = 每 4 个 granule 一个 bloom block
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_trace_id trace_id TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- IP 精确查找
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_ip ip TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- host 模糊查询支持(tokenbf_v1 对 LIKE '%xxx%' 有效)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_host host TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
|
||||
|
||||
-- firewall_policy_id 过滤(HasFirewallPolicy: > 0)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_fw_policy firewall_policy_id TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- status 范围过滤(HasError: status >= 400)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_status status TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 3:物化索引到现有数据(对存量数据生效)
|
||||
-- =============================================
|
||||
-- 注意:MATERIALIZE INDEX 会触发后台 mutation,大表可能需要一定时间
|
||||
-- 可通过 SELECT * FROM system.mutations WHERE is_done = 0 监控进度
|
||||
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_trace_id;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_ip;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_host;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_fw_policy;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_status;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- dns_logs_ingest 表优化(DNS 日志表)
|
||||
-- =============================================================================
|
||||
|
||||
-- 大文本字段压缩
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN content_json String CODEC(ZSTD(3));
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN error String CODEC(ZSTD(1));
|
||||
|
||||
-- 低基数字段
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN question_type LowCardinality(String);
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN record_type LowCardinality(String);
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN networking LowCardinality(String);
|
||||
|
||||
-- request_id 精确查找
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_request_id request_id TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- remote_addr 精确查找
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_remote_addr remote_addr TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- question_name 模糊查询
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_question_name question_name TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
|
||||
|
||||
-- domain_id 过滤
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_domain_id domain_id TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- 物化索引到现有数据
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_request_id;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_remote_addr;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_question_name;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_domain_id;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- 验证命令(执行完上述 ALTER 后运行)
|
||||
-- =============================================================================
|
||||
|
||||
-- 查看列的压缩编解码器
|
||||
-- SELECT name, type, compression_codec FROM system.columns WHERE table = 'logs_ingest' AND database = currentDatabase();
|
||||
|
||||
-- 查看表的压缩率
|
||||
-- SELECT table, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed, round(sum(data_uncompressed_bytes) / sum(data_compressed_bytes), 2) AS ratio FROM system.columns WHERE table IN ('logs_ingest', 'dns_logs_ingest') GROUP BY table;
|
||||
|
||||
-- 查看各列占用的磁盘空间(找出最大的列)
|
||||
-- SELECT name, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed FROM system.columns WHERE table = 'logs_ingest' GROUP BY name ORDER BY sum(data_compressed_bytes) DESC;
|
||||
|
||||
-- 查看 mutation 进度
|
||||
-- SELECT database, table, mutation_id, command, is_done, parts_to_do FROM system.mutations WHERE is_done = 0;
|
||||
|
||||
-- 强制触发 merge(可选,让压缩编解码器变更对存量数据生效)
|
||||
-- OPTIMIZE TABLE logs_ingest FINAL;
|
||||
-- OPTIMIZE TABLE dns_logs_ingest FINAL;
|
||||
108
deploy/clickhouse/setup_clickhouse.sh
Normal file
108
deploy/clickhouse/setup_clickhouse.sh
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
INSTALL_SCRIPT="${SCRIPT_DIR}/install_clickhouse_linux.sh"
|
||||
HTTPS_SCRIPT="${SCRIPT_DIR}/configure_clickhouse_https.sh"
|
||||
RUNTIME_SCRIPT="${SCRIPT_DIR}/configure_clickhouse_runtime.sh"
|
||||
TABLES_SCRIPT="${SCRIPT_DIR}/init_waf_logs_tables.sh"
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage:
|
||||
sudo ./setup_clickhouse.sh [all|install|https|runtime|tables]
|
||||
|
||||
Modes:
|
||||
all Install ClickHouse, configure HTTPS, apply runtime config, init ingest tables (default)
|
||||
install Install ClickHouse only
|
||||
https Configure HTTPS only
|
||||
runtime Apply ClickHouse runtime config only
|
||||
tables Initialize ingest tables only
|
||||
|
||||
Common env vars:
|
||||
CLICKHOUSE_DEFAULT_PASSWORD Default user password set during install
|
||||
CH_HTTPS_PORT HTTPS port (default: 8443)
|
||||
CH_CERT_CN Certificate CN
|
||||
CH_CERT_DNS Certificate SAN DNS list (comma-separated)
|
||||
CH_CERT_IP Certificate SAN IP list (comma-separated)
|
||||
CH_CERT_DAYS Certificate validity days (default: 825)
|
||||
CH_LOG_LEVEL ClickHouse logger level (default: warning)
|
||||
CH_HOST ClickHouse host for table init (default: 127.0.0.1)
|
||||
CH_PORT ClickHouse port for table init (default: 9000)
|
||||
CH_USER ClickHouse user for table init (default: default)
|
||||
CH_PASSWORD ClickHouse password for table init
|
||||
CH_DATABASE Database for table init (default: default)
|
||||
EOF
|
||||
}
|
||||
|
||||
require_script() {
|
||||
local script="$1"
|
||||
if [[ ! -f "${script}" ]]; then
|
||||
echo "[ERROR] required file not found: ${script}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
run_install() {
|
||||
echo "[INFO] step 1/3: install ClickHouse ..."
|
||||
bash "${INSTALL_SCRIPT}"
|
||||
}
|
||||
|
||||
run_https() {
|
||||
echo "[INFO] step 2/3: configure ClickHouse HTTPS ..."
|
||||
bash "${HTTPS_SCRIPT}"
|
||||
}
|
||||
|
||||
run_runtime() {
|
||||
echo "[INFO] step 3/4: apply ClickHouse runtime config ..."
|
||||
bash "${RUNTIME_SCRIPT}"
|
||||
}
|
||||
|
||||
run_tables() {
|
||||
echo "[INFO] step 4/4: initialize ingest tables ..."
|
||||
bash "${TABLES_SCRIPT}"
|
||||
}
|
||||
|
||||
MODE="${1:-all}"
|
||||
|
||||
case "${MODE}" in
|
||||
-h|--help|help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
all|install|https|runtime|tables)
|
||||
;;
|
||||
*)
|
||||
echo "[ERROR] invalid mode: ${MODE}"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
require_script "${INSTALL_SCRIPT}"
|
||||
require_script "${HTTPS_SCRIPT}"
|
||||
require_script "${RUNTIME_SCRIPT}"
|
||||
require_script "${TABLES_SCRIPT}"
|
||||
|
||||
case "${MODE}" in
|
||||
all)
|
||||
run_install
|
||||
run_https
|
||||
run_runtime
|
||||
run_tables
|
||||
;;
|
||||
install)
|
||||
run_install
|
||||
;;
|
||||
https)
|
||||
run_https
|
||||
;;
|
||||
runtime)
|
||||
run_runtime
|
||||
;;
|
||||
tables)
|
||||
run_tables
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "[OK] setup completed: mode=${MODE}"
|
||||
2
deploy/fluent-bit/.gitignore
vendored
2
deploy/fluent-bit/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
fluent-bit-windows.conf
|
||||
clickhouse-upstream-windows.conf
|
||||
@@ -1,471 +0,0 @@
|
||||
# 边缘节点日志链路部署(Fluent Bit + ClickHouse)
|
||||
|
||||
与 [日志链路调整方案](../../log-pipeline-migration-plan.md) 配套的配置与部署说明。本文档为 **Fluent Bit 部署教程**,按步骤即可在边缘节点或日志采集机上跑通采集 → ClickHouse 写入。
|
||||
|
||||
---
|
||||
|
||||
## Fluent Bit 跑在哪台机器上?
|
||||
|
||||
**Fluent Bit 应部署在写日志文件的节点机器上**(EdgeNode / EdgeDNS 同机),不要部署在 EdgeAPI 机器上。
|
||||
|
||||
- HTTP 日志文件默认在 `/var/log/edge/edge-node/*.log`,由 **EdgeNode** 本机写入;若配置了公用访问日志策略的文件 `path`,节点会优先复用该 `path` 所在目录。
|
||||
- DNS 日志文件默认在 `/var/log/edge/edge-dns/*.log`,由 **EdgeDNS** 本机写入;若配置了公用访问日志策略的文件 `path`,节点会优先复用该 `path` 所在目录。
|
||||
- Fluent Bit 使用 **tail** 读取本机路径,因此必须运行在这些日志文件所在机器上。
|
||||
- EdgeAPI 机器主要负责查询 ClickHouse/MySQL,不需要承担日志采集。
|
||||
- 多机部署时,每台写日志节点都跑一份 Fluent Bit,上报到同一 ClickHouse 集群。
|
||||
|
||||
---
|
||||
|
||||
## 一、前置条件
|
||||
|
||||
- **边缘节点(EdgeNode)** 已开启本地日志落盘,目录优先取“公用访问日志策略”的文件 `path`(取目录),为空时回退 `EDGE_LOG_DIR`,再回退默认 `/var/log/edge/edge-node`;生成 `access.log`、`waf.log`、`error.log`(JSON Lines)。
|
||||
- **DNS 节点(EdgeDNS)** 已开启本地日志落盘,目录优先取“公用访问日志策略”的文件 `path`(取目录),为空时回退 `EDGE_DNS_LOG_DIR`,再回退默认 `/var/log/edge/edge-dns`;生成 `access.log`(JSON Lines)。
|
||||
- **ClickHouse** 已安装并可访问(单机或集群),且已创建好 `logs_ingest` 表(见下文「五、ClickHouse 建表」)。
|
||||
- 若 Fluent Bit 与 ClickHouse 不在同一台机,需保证网络可达(默认 HTTPS 端口 8443)。
|
||||
- 日志轮转默认由 Node/DNS 内建 `lumberjack` 执行:
|
||||
- `maxSizeMB=256`
|
||||
- `maxBackups=14`
|
||||
- `maxAgeDays=7`
|
||||
- `compress=false`
|
||||
- `localTime=true`
|
||||
可通过公用日志策略 `file.rotate` 调整。
|
||||
|
||||
---
|
||||
|
||||
## 二、安装 Fluent Bit
|
||||
|
||||
### 2.1 Ubuntu / Debian
|
||||
|
||||
```bash
|
||||
# 添加 Fluent Bit 官方源并安装(以 Ubuntu 22.04 为例)
|
||||
curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh
|
||||
sudo apt-get install -y fluent-bit
|
||||
|
||||
# 或使用 TD Agent Bit 源(若需 ClickHouse 等扩展)
|
||||
# 见:https://docs.fluentbit.io/manual/installation/linux/ubuntu
|
||||
```
|
||||
|
||||
### 2.2 CentOS / RHEL / Amazon Linux
|
||||
|
||||
```bash
|
||||
# 使用官方 install 脚本
|
||||
curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh
|
||||
|
||||
# 或 yum/dnf 安装(以提供的仓库为准)
|
||||
# sudo yum install -y fluent-bit
|
||||
```
|
||||
|
||||
### 2.3 使用二进制包
|
||||
|
||||
从 [Fluent Bit 官方 Release](https://github.com/fluent/fluent-bit/releases) 下载对应架构的 tarball,解压后将 `bin/fluent-bit` 放到 PATH,并确保其 **Output 插件支持 ClickHouse**(部分发行版或自编译需启用 `out_clickhouse`)。
|
||||
|
||||
---
|
||||
|
||||
## 三、部署配置文件
|
||||
|
||||
### 3.1 放置配置
|
||||
|
||||
将本目录下配置文件放到同一目录,例如 `/etc/fluent-bit/` 或 `/opt/edge/fluent-bit/`:
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /etc/fluent-bit
|
||||
sudo cp fluent-bit.conf clickhouse-upstream.conf /etc/fluent-bit/
|
||||
```
|
||||
|
||||
两文件需在同一目录,因 `fluent-bit.conf` 中有 `@INCLUDE clickhouse-upstream.conf`。
|
||||
|
||||
### 3.2 修改 ClickHouse 地址(必做)
|
||||
|
||||
编辑 `clickhouse-upstream.conf`,按实际环境填写 ClickHouse 的 Host/Port:
|
||||
|
||||
- **单机**:保留一个 `[NODE]`,改 `Host`、`Port`(默认 8443)。
|
||||
- **集群**:复制多段 `[NODE]`,每段一个节点,例如:
|
||||
|
||||
```ini
|
||||
[UPSTREAM]
|
||||
Name ch_backends
|
||||
|
||||
[NODE]
|
||||
Name node-01
|
||||
Host 192.168.1.10
|
||||
Port 8443
|
||||
|
||||
[NODE]
|
||||
Name node-02
|
||||
Host 192.168.1.11
|
||||
Port 8443
|
||||
```
|
||||
|
||||
### 3.3 ClickHouse 账号密码(有密码时必做)
|
||||
|
||||
不在 `clickhouse-upstream.conf` 里配置密码,而是通过 **环境变量** 传给 Fluent Bit:
|
||||
|
||||
- `CH_USER`:ClickHouse 用户名(如 `default`)。
|
||||
- `CH_PASSWORD`:对应用户的密码。
|
||||
|
||||
在 systemd 或启动脚本中设置(见下文「四、以 systemd 方式运行」)。
|
||||
|
||||
### 3.4 日志路径与 parsers.conf
|
||||
|
||||
- **日志路径**:`fluent-bit.conf` 里已同时配置 HTTP 与 DNS 两类路径:
|
||||
- HTTP:`/var/log/edge/edge-node/*.log`
|
||||
- DNS:`/var/log/edge/edge-dns/*.log`
|
||||
若你配置了公用访问日志策略的文件 `path`,或改了 `EDGE_LOG_DIR` / `EDGE_DNS_LOG_DIR`,请同步修改对应 `Path`。
|
||||
- **Parsers_File**:主配置引用了 `parsers.conf`。若安装包自带(如 `/etc/fluent-bit/parsers.conf`),无需改动;若启动报错找不到文件,可:
|
||||
- 从 Fluent Bit 官方仓库复制 [conf/parsers.conf](https://github.com/fluent/fluent-bit/blob/master/conf/parsers.conf) 到同一目录,或
|
||||
- 在同一目录新建空文件 `parsers.conf`(仅当不使用任何 parser 时)。
|
||||
|
||||
### 3.5 数据与状态目录
|
||||
|
||||
Fluent Bit 会使用配置里的 `storage.path` 和 DB 路径,需保证进程有写权限:
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /var/lib/fluent-bit/storage
|
||||
sudo chown -R <运行 fluent-bit 的用户>:<同组> /var/lib/fluent-bit
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 四、以 systemd 方式运行
|
||||
|
||||
### 4.1 使用自带服务(若安装包已提供)
|
||||
|
||||
若通过 apt/yum 安装,通常已有 `fluent-bit.service`。先改配置路径和环境变量:
|
||||
|
||||
```bash
|
||||
# 编辑服务文件(路径以实际为准,如 /lib/systemd/system/fluent-bit.service)
|
||||
sudo systemctl edit fluent-bit --full
|
||||
```
|
||||
|
||||
在 `[Service]` 中增加或修改:
|
||||
|
||||
- `EnvironmentFile` 指向你的环境变量文件,或直接写:
|
||||
- `Environment="CH_USER=default"`
|
||||
- `Environment="CH_PASSWORD=你的密码"`
|
||||
- `ExecStart` 中的配置文件路径改为你的 `fluent-bit.conf`,例如:
|
||||
- `ExecStart=/opt/fluent-bit/bin/fluent-bit -c /etc/fluent-bit/fluent-bit.conf`
|
||||
|
||||
然后:
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable fluent-bit
|
||||
sudo systemctl start fluent-bit
|
||||
sudo systemctl status fluent-bit
|
||||
```
|
||||
|
||||
### 4.2 自定义 systemd 单元(无自带服务时)
|
||||
|
||||
新建 `/etc/systemd/system/fluent-bit-edge.service`:
|
||||
|
||||
```ini
|
||||
[Unit]
|
||||
Description=Fluent Bit - Edge Node Logs to ClickHouse
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/fluent-bit -c /etc/fluent-bit/fluent-bit.conf
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
# ClickHouse 认证(按需修改)
|
||||
Environment="CH_USER=default"
|
||||
Environment="CH_PASSWORD=your_clickhouse_password"
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
若密码含特殊字符,建议用 `EnvironmentFile=/etc/fluent-bit/fluent-bit.env`,并在该文件中写:
|
||||
|
||||
```bash
|
||||
CH_USER=default
|
||||
CH_PASSWORD=your_clickhouse_password
|
||||
```
|
||||
|
||||
然后:
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable fluent-bit-edge
|
||||
sudo systemctl start fluent-bit-edge
|
||||
sudo systemctl status fluent-bit-edge
|
||||
```
|
||||
|
||||
### 4.3 前台调试
|
||||
|
||||
不依赖 systemd 时可直接前台跑(便于看日志):
|
||||
|
||||
```bash
|
||||
export CH_USER=default
|
||||
export CH_PASSWORD=your_clickhouse_password
|
||||
fluent-bit -c /etc/fluent-bit/fluent-bit.conf
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 五、ClickHouse 建表
|
||||
|
||||
平台(EdgeAPI)会查询两张表:
|
||||
- HTTP:`logs_ingest`
|
||||
- DNS:`dns_logs_ingest`
|
||||
|
||||
需在 ClickHouse 中先建表。库名默认为 `default`,若使用其它库,需与 EdgeAPI 的 `CLICKHOUSE_DATABASE` 一致。
|
||||
|
||||
在 ClickHouse 中执行(按需改库名或引擎):
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS default.logs_ingest
|
||||
(
|
||||
timestamp DateTime,
|
||||
node_id UInt64,
|
||||
cluster_id UInt64,
|
||||
server_id UInt64,
|
||||
host String,
|
||||
ip String,
|
||||
method String,
|
||||
path String,
|
||||
status UInt16,
|
||||
bytes_in UInt64,
|
||||
bytes_out UInt64,
|
||||
cost_ms UInt32,
|
||||
ua String,
|
||||
referer String,
|
||||
log_type String,
|
||||
trace_id String,
|
||||
firewall_policy_id UInt64 DEFAULT 0,
|
||||
firewall_rule_group_id UInt64 DEFAULT 0,
|
||||
firewall_rule_set_id UInt64 DEFAULT 0,
|
||||
firewall_rule_id UInt64 DEFAULT 0,
|
||||
request_headers String DEFAULT '',
|
||||
request_body String DEFAULT '',
|
||||
response_headers String DEFAULT '',
|
||||
response_body String DEFAULT ''
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (timestamp, node_id, server_id, trace_id)
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
DNS 日志建表:
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS default.dns_logs_ingest
|
||||
(
|
||||
timestamp DateTime,
|
||||
request_id String,
|
||||
node_id UInt64,
|
||||
cluster_id UInt64,
|
||||
domain_id UInt64,
|
||||
record_id UInt64,
|
||||
remote_addr String,
|
||||
question_name String,
|
||||
question_type String,
|
||||
record_name String,
|
||||
record_type String,
|
||||
record_value String,
|
||||
networking String,
|
||||
is_recursive UInt8,
|
||||
error String,
|
||||
ns_route_codes Array(String),
|
||||
content_json String DEFAULT ''
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (timestamp, request_id, node_id)
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
- **log_type**:`access` / `waf` / `error`;攻击日志同时看 **firewall_rule_id** 或 **firewall_policy_id** 是否大于 0(与原有 MySQL 通过规则 ID 判断攻击日志一致)。
|
||||
- **request_headers / response_headers**:JSON 字符串;**request_body / response_body**:请求/响应体(单条建议限制长度,如 512KB)。
|
||||
- **request_body 为空**:需在管理端为该站点/服务的「访问日志」策略中勾选「请求Body」后才会落盘;默认未勾选。路径大致为:站点/服务 → 访问日志 → 策略 → 记录字段 → 勾选「请求Body」。WAF 拦截且策略开启「记录请求Body」时也会记录。
|
||||
- **response_body 为空**:当前版本未实现(proto 与节点均未支持响应体落盘),表中已预留字段,后续可扩展。
|
||||
- **原有 MySQL 日志同步到 ClickHouse**:见 [mysql-to-clickhouse-migration.md](mysql-to-clickhouse-migration.md)。
|
||||
|
||||
若表已存在且缺少新字段,可执行:
|
||||
|
||||
```sql
|
||||
ALTER TABLE default.logs_ingest ADD COLUMN IF NOT EXISTS firewall_policy_id UInt64 DEFAULT 0;
|
||||
ALTER TABLE default.logs_ingest ADD COLUMN IF NOT EXISTS firewall_rule_group_id UInt64 DEFAULT 0;
|
||||
ALTER TABLE default.logs_ingest ADD COLUMN IF NOT EXISTS firewall_rule_set_id UInt64 DEFAULT 0;
|
||||
ALTER TABLE default.logs_ingest ADD COLUMN IF NOT EXISTS firewall_rule_id UInt64 DEFAULT 0;
|
||||
ALTER TABLE default.logs_ingest ADD COLUMN IF NOT EXISTS request_headers String DEFAULT '';
|
||||
ALTER TABLE default.logs_ingest ADD COLUMN IF NOT EXISTS request_body String DEFAULT '';
|
||||
ALTER TABLE default.logs_ingest ADD COLUMN IF NOT EXISTS response_headers String DEFAULT '';
|
||||
ALTER TABLE default.logs_ingest ADD COLUMN IF NOT EXISTS response_body String DEFAULT '';
|
||||
ALTER TABLE default.dns_logs_ingest ADD COLUMN IF NOT EXISTS content_json String DEFAULT '';
|
||||
```
|
||||
|
||||
Fluent Bit 写入时使用 `json_date_key timestamp` 和 `json_date_format epoch`,会将 JSON 中的 `timestamp`(Unix 秒)转为 DateTime。
|
||||
|
||||
---
|
||||
|
||||
## 六、验证与排错
|
||||
|
||||
1. **看 Fluent Bit 日志**
|
||||
- systemd:`journalctl -u fluent-bit-edge -f`(或你的服务名)
|
||||
- 前台:直接看终端输出。
|
||||
|
||||
2. **看 ClickHouse 是否有数据**
|
||||
```sql
|
||||
SELECT count() FROM default.logs_ingest;
|
||||
SELECT * FROM default.logs_ingest LIMIT 5;
|
||||
SELECT count() FROM default.dns_logs_ingest;
|
||||
SELECT * FROM default.dns_logs_ingest LIMIT 5;
|
||||
```
|
||||
|
||||
3. **常见问题**
|
||||
- **连接被拒**:检查 `clickhouse-upstream.conf` 的 Host/Port、防火墙、ClickHouse 的 `listen_host`。
|
||||
- **认证失败**:检查 `CH_USER`、`CH_PASSWORD` 是否与 ClickHouse 用户一致,环境变量是否被 systemd 正确加载。
|
||||
- **找不到 parsers.conf**:见上文 3.4。
|
||||
- **没有新数据**:确认 EdgeNode/EdgeDNS 已写日志到 `Path` 下,且 Fluent Bit 对目录有读权限;可分别执行 `tail -f /var/log/edge/edge-node/access.log` 与 `tail -f /var/log/edge/edge-dns/access.log`。
|
||||
- **Node 上没有 `/var/log/edge/edge-node/access.log`**:见下文「八、Node 上找不到日志文件」。
|
||||
|
||||
---
|
||||
|
||||
## 七、与其它组件的关系(简要)
|
||||
|
||||
| 组件 | 说明 |
|
||||
|------|------|
|
||||
| **EdgeNode** | 日志落盘路径优先复用公用访问日志策略文件 `path`(取目录);若为空回退 `EDGE_LOG_DIR`,再回退默认 `/var/log/edge/edge-node`;生成 `access.log`、`waf.log`、`error.log`;内建 lumberjack 轮转(默认 256MB/14份/7天,可按策略调整),仍支持 SIGHUP 重建 writer。 |
|
||||
| **EdgeDNS** | DNS 访问日志落盘路径优先复用公用访问日志策略文件 `path`(取目录);若为空回退 `EDGE_DNS_LOG_DIR`,再回退默认 `/var/log/edge/edge-dns`;生成 `access.log`(JSON Lines),由 Fluent Bit 采集写入 `dns_logs_ingest`。 |
|
||||
| **logrotate** | 可选的历史兼容方案(已非必需);默认建议使用节点内建 lumberjack 轮转。 |
|
||||
| **平台(EdgeAPI)** | 配置 ClickHouse 只读连接(`CLICKHOUSE_HOST`、`CLICKHOUSE_PORT`、`CLICKHOUSE_USER`、`CLICKHOUSE_PASSWORD`、`CLICKHOUSE_DATABASE`);当请求带 `Day` 且已配置 ClickHouse 时,访问日志列表查询走 ClickHouse。 |
|
||||
|
||||
---
|
||||
|
||||
## 八、Node 上找不到日志文件
|
||||
|
||||
若在 EdgeNode 机器上执行 `tail -f /var/log/edge/edge-node/access.log` 报 **No such file or directory**,按下面检查:
|
||||
|
||||
1. **EdgeNode 版本**
|
||||
本地日志落盘是较新功能,需使用**包含该功能的 EdgeNode 构建**(当前仓库版本在首次加载配置时会预创建目录和三个空日志文件)。
|
||||
|
||||
2. **预创建目录(可选)**
|
||||
若进程以非 root 运行,可先手动建目录并赋权,避免无权限创建 `/var/log/edge`:
|
||||
```bash
|
||||
sudo mkdir -p /var/log/edge/edge-node
|
||||
sudo chown <运行 edge-node 的用户>:<同组> /var/log/edge/edge-node
|
||||
```
|
||||
|
||||
3. **重启 EdgeNode**
|
||||
新版本在**首次成功加载节点配置后**会调用 `EnsureInit()`,自动创建 `/var/log/edge/edge-node` 及 `access.log`、`waf.log`、`error.log`。重启一次 edge-node 后再看目录下是否已有文件。
|
||||
|
||||
4. **自定义路径**
|
||||
若在管理端设置了公用访问日志策略的文件 `path`,节点会优先使用该目录;否则才使用 `EDGE_LOG_DIR`。Fluent Bit 的 `Path` 需与实际目录一致。
|
||||
|
||||
以上完成即完成 Fluent Bit 的部署与验证。
|
||||
|
||||
---
|
||||
|
||||
## 九、HTTPS 模式(ClickHouse)
|
||||
|
||||
当 ClickHouse 只开放 HTTPS(如 8443)或链路必须加密时,使用本目录新增模板:
|
||||
|
||||
- `fluent-bit-https.conf`:Node+DNS 同机采集(HTTP+DNS 双输入)
|
||||
- `fluent-bit-dns-https.conf`:仅 DNS 节点采集
|
||||
- `fluent-bit-windows-https.conf`:Windows 节点 HTTPS 采集
|
||||
|
||||
### 9.1 什么时候用 HTTPS 模板
|
||||
|
||||
- ClickHouse 仅开放 HTTPS 端口;
|
||||
- 节点到 ClickHouse 跨公网或需要传输加密;
|
||||
- 你希望启用证书校验和 SNI。
|
||||
|
||||
### 9.2 最小切换步骤(Linux)
|
||||
|
||||
1. 备份当前配置:
|
||||
```bash
|
||||
sudo cp /etc/fluent-bit/fluent-bit.conf /etc/fluent-bit/fluent-bit.conf.bak
|
||||
```
|
||||
|
||||
2. 切换为 HTTPS 模板(Node+DNS 同机示例):
|
||||
```bash
|
||||
sudo cp /path/to/fluent-bit-https.conf /etc/fluent-bit/fluent-bit.conf
|
||||
```
|
||||
|
||||
3. 设置账号密码(按你的服务文件方式设置):
|
||||
```bash
|
||||
export CH_USER=default
|
||||
export CH_PASSWORD='your_password'
|
||||
```
|
||||
|
||||
4. 修改模板中的关键项:
|
||||
- `Host` / `Port`(HTTPS 常见端口 `8443`)
|
||||
- `tls.verify`:`On`/`Off`
|
||||
- `tls.ca_file`:自签名证书建议配置 CA 文件
|
||||
- `tls.vhost`:证书 CN/SAN 对应主机名(SNI)
|
||||
|
||||
5. 重启并检查:
|
||||
```bash
|
||||
sudo systemctl restart fluent-bit
|
||||
sudo systemctl status fluent-bit
|
||||
journalctl -u fluent-bit -f
|
||||
```
|
||||
|
||||
### 9.3 验证点
|
||||
|
||||
- `default.logs_ingest` 有新增数据(HTTP)
|
||||
- `default.dns_logs_ingest` 有新增数据(DNS)
|
||||
- Fluent Bit 日志中无 TLS 握手失败(`certificate`, `x509`, `tls`)
|
||||
|
||||
### 9.4 回滚
|
||||
|
||||
TLS 配置错误导致中断时,快速回滚:
|
||||
|
||||
```bash
|
||||
sudo cp /etc/fluent-bit/fluent-bit.conf.bak /etc/fluent-bit/fluent-bit.conf
|
||||
sudo systemctl restart fluent-bit
|
||||
```
|
||||
|
||||
回滚后恢复原 HTTP 模式,不影响平台 API/管理端配置。
|
||||
|
||||
---
|
||||
|
||||
## 十、平台托管模式(推荐)
|
||||
|
||||
从 `v1.4.7` 开始,Node/DNS 在线安装流程会由平台托管 Fluent Bit,默认不再要求逐台手改 `/etc/fluent-bit/fluent-bit.conf`。
|
||||
|
||||
### 10.1 托管行为
|
||||
|
||||
- 安装器优先使用发布包内置离线包(不走 `curl | sh`)。
|
||||
- 首次安装后写入:
|
||||
- `/etc/fluent-bit/fluent-bit.conf`
|
||||
- `/etc/fluent-bit/parsers.conf`
|
||||
- `/etc/fluent-bit/.edge-managed.env`
|
||||
- `/etc/fluent-bit/.edge-managed.json`
|
||||
- 配置发生变化时按 `hash` 幂等更新,仅在内容变化时重启服务。
|
||||
- Node 与 DNS 同机安装时会自动合并角色,输出单份配置。
|
||||
|
||||
### 10.2 托管元数据
|
||||
|
||||
平台会维护 `/etc/fluent-bit/.edge-managed.json`,核心字段:
|
||||
|
||||
- `roles`: 当前机器启用角色(`node`/`dns`)
|
||||
- `hash`: 当前托管配置摘要
|
||||
- `sourceVersion`: 平台版本号
|
||||
- `updatedAt`: 最近更新时间戳
|
||||
|
||||
### 10.3 支持矩阵(离线包)
|
||||
|
||||
当前固定支持以下平台键:
|
||||
|
||||
- `ubuntu22.04-amd64`
|
||||
- `ubuntu22.04-arm64`
|
||||
- `amzn2023-amd64`
|
||||
- `amzn2023-arm64`
|
||||
|
||||
构建阶段会校验矩阵包是否齐全,缺失会直接失败并打印期望文件路径。
|
||||
|
||||
### 10.4 手工配置兼容
|
||||
|
||||
- 若现有 `fluent-bit.conf` 不是平台托管文件(不含 `managed-by-edgeapi` 标记),安装器不会强制覆盖,会返回明确错误提示。
|
||||
- 需要切到托管模式时,先备份旧配置,再由平台触发一次安装/更新任务。
|
||||
|
||||
### 10.5 Resource Profile Notes (New)
|
||||
|
||||
- Managed default is now tuned for `2C4G` nodes (conservative and stable).
|
||||
- Additional sample profiles are provided for larger nodes:
|
||||
- `deploy/fluent-bit/fluent-bit-sample-4c8g.conf`
|
||||
- `deploy/fluent-bit/fluent-bit-sample-8c16g.conf`
|
||||
- These sample files are for benchmark/reference only and are not auto-applied by installer.
|
||||
- To use higher profiles in managed mode, sync those parameters into `EdgeAPI/internal/installers/fluent_bit.go` and then trigger node reinstall/upgrade.
|
||||
@@ -1,69 +0,0 @@
|
||||
# Sample profile for 4C8G nodes (Node + DNS on same host).
|
||||
# Replace Host/Port/URI and credentials according to your ClickHouse deployment.
|
||||
|
||||
[SERVICE]
|
||||
Flush 1
|
||||
Log_Level info
|
||||
Parsers_File parsers.conf
|
||||
storage.path /var/lib/fluent-bit/storage
|
||||
storage.sync normal
|
||||
storage.checksum off
|
||||
storage.backlog.mem_limit 512MB
|
||||
|
||||
[INPUT]
|
||||
Name tail
|
||||
Path /var/log/edge/edge-node/*.log
|
||||
Tag app.http.logs
|
||||
Parser json
|
||||
Refresh_Interval 2
|
||||
Read_from_Head false
|
||||
DB /var/lib/fluent-bit/http-logs.db
|
||||
storage.type filesystem
|
||||
Mem_Buf_Limit 256MB
|
||||
Skip_Long_Lines On
|
||||
|
||||
[INPUT]
|
||||
Name tail
|
||||
Path /var/log/edge/edge-dns/*.log
|
||||
Tag app.dns.logs
|
||||
Parser json
|
||||
Refresh_Interval 2
|
||||
Read_from_Head false
|
||||
DB /var/lib/fluent-bit/dns-logs.db
|
||||
storage.type filesystem
|
||||
Mem_Buf_Limit 256MB
|
||||
Skip_Long_Lines On
|
||||
|
||||
[OUTPUT]
|
||||
Name http
|
||||
Match app.http.logs
|
||||
Host 127.0.0.1
|
||||
Port 8443
|
||||
URI /?query=INSERT%20INTO%20default.logs_ingest%20FORMAT%20JSONEachRow
|
||||
Format json_lines
|
||||
http_user ${CH_USER}
|
||||
http_passwd ${CH_PASSWORD}
|
||||
json_date_key timestamp
|
||||
json_date_format epoch
|
||||
workers 2
|
||||
net.keepalive On
|
||||
Retry_Limit False
|
||||
tls On
|
||||
tls.verify On
|
||||
|
||||
[OUTPUT]
|
||||
Name http
|
||||
Match app.dns.logs
|
||||
Host 127.0.0.1
|
||||
Port 8443
|
||||
URI /?query=INSERT%20INTO%20default.dns_logs_ingest%20FORMAT%20JSONEachRow
|
||||
Format json_lines
|
||||
http_user ${CH_USER}
|
||||
http_passwd ${CH_PASSWORD}
|
||||
json_date_key timestamp
|
||||
json_date_format epoch
|
||||
workers 2
|
||||
net.keepalive On
|
||||
Retry_Limit False
|
||||
tls On
|
||||
tls.verify On
|
||||
@@ -1,69 +0,0 @@
|
||||
# Sample profile for 8C16G nodes (Node + DNS on same host).
|
||||
# Replace Host/Port/URI and credentials according to your ClickHouse deployment.
|
||||
|
||||
[SERVICE]
|
||||
Flush 1
|
||||
Log_Level info
|
||||
Parsers_File parsers.conf
|
||||
storage.path /var/lib/fluent-bit/storage
|
||||
storage.sync normal
|
||||
storage.checksum off
|
||||
storage.backlog.mem_limit 1024MB
|
||||
|
||||
[INPUT]
|
||||
Name tail
|
||||
Path /var/log/edge/edge-node/*.log
|
||||
Tag app.http.logs
|
||||
Parser json
|
||||
Refresh_Interval 1
|
||||
Read_from_Head false
|
||||
DB /var/lib/fluent-bit/http-logs.db
|
||||
storage.type filesystem
|
||||
Mem_Buf_Limit 512MB
|
||||
Skip_Long_Lines On
|
||||
|
||||
[INPUT]
|
||||
Name tail
|
||||
Path /var/log/edge/edge-dns/*.log
|
||||
Tag app.dns.logs
|
||||
Parser json
|
||||
Refresh_Interval 1
|
||||
Read_from_Head false
|
||||
DB /var/lib/fluent-bit/dns-logs.db
|
||||
storage.type filesystem
|
||||
Mem_Buf_Limit 512MB
|
||||
Skip_Long_Lines On
|
||||
|
||||
[OUTPUT]
|
||||
Name http
|
||||
Match app.http.logs
|
||||
Host 127.0.0.1
|
||||
Port 8443
|
||||
URI /?query=INSERT%20INTO%20default.logs_ingest%20FORMAT%20JSONEachRow
|
||||
Format json_lines
|
||||
http_user ${CH_USER}
|
||||
http_passwd ${CH_PASSWORD}
|
||||
json_date_key timestamp
|
||||
json_date_format epoch
|
||||
workers 4
|
||||
net.keepalive On
|
||||
Retry_Limit False
|
||||
tls On
|
||||
tls.verify On
|
||||
|
||||
[OUTPUT]
|
||||
Name http
|
||||
Match app.dns.logs
|
||||
Host 127.0.0.1
|
||||
Port 8443
|
||||
URI /?query=INSERT%20INTO%20default.dns_logs_ingest%20FORMAT%20JSONEachRow
|
||||
Format json_lines
|
||||
http_user ${CH_USER}
|
||||
http_passwd ${CH_PASSWORD}
|
||||
json_date_key timestamp
|
||||
json_date_format epoch
|
||||
workers 4
|
||||
net.keepalive On
|
||||
Retry_Limit False
|
||||
tls On
|
||||
tls.verify On
|
||||
@@ -1,62 +0,0 @@
|
||||
[SERVICE]
|
||||
Flush 1
|
||||
Log_Level info
|
||||
Parsers_File parsers.conf
|
||||
storage.path ./storage
|
||||
storage.sync normal
|
||||
|
||||
[INPUT]
|
||||
Name tail
|
||||
Path E:\var\log\edge\edge-node\*.log
|
||||
Tag app.http.logs
|
||||
Parser json
|
||||
Refresh_Interval 1
|
||||
Read_from_Head true
|
||||
DB ./http-logs.db
|
||||
Mem_Buf_Limit 128MB
|
||||
Skip_Long_Lines On
|
||||
|
||||
[INPUT]
|
||||
Name tail
|
||||
Path E:\var\log\edge\edge-dns\*.log
|
||||
Tag app.dns.logs
|
||||
Parser json
|
||||
Refresh_Interval 1
|
||||
Read_from_Head true
|
||||
DB ./dns-logs.db
|
||||
Mem_Buf_Limit 128MB
|
||||
Skip_Long_Lines On
|
||||
|
||||
[OUTPUT]
|
||||
Name http
|
||||
Match app.http.logs
|
||||
Host 127.0.0.1
|
||||
Port 8443
|
||||
URI /?query=INSERT+INTO+logs_ingest+FORMAT+JSONEachRow
|
||||
Format json_lines
|
||||
http_user ${CH_USER}
|
||||
http_passwd ${CH_PASSWORD}
|
||||
tls On
|
||||
tls.verify On
|
||||
# tls.ca_file C:\\path\\to\\ca.pem
|
||||
# tls.vhost clickhouse.example.com
|
||||
Json_Date_Key timestamp
|
||||
Json_Date_Format epoch
|
||||
Retry_Limit 10
|
||||
|
||||
[OUTPUT]
|
||||
Name http
|
||||
Match app.dns.logs
|
||||
Host 127.0.0.1
|
||||
Port 8443
|
||||
URI /?query=INSERT+INTO+dns_logs_ingest+FORMAT+JSONEachRow
|
||||
Format json_lines
|
||||
http_user ${CH_USER}
|
||||
http_passwd ${CH_PASSWORD}
|
||||
tls On
|
||||
tls.verify On
|
||||
# tls.ca_file C:\\path\\to\\ca.pem
|
||||
# tls.vhost clickhouse.example.com
|
||||
Json_Date_Key timestamp
|
||||
Json_Date_Format epoch
|
||||
Retry_Limit 10
|
||||
@@ -1,20 +0,0 @@
|
||||
# logrotate 示例:边缘节点日志轮转
|
||||
# 安装:放入 /etc/logrotate.d/edge-node 或 include 到主配置
|
||||
|
||||
/var/log/edge/edge-node/*.log {
|
||||
daily
|
||||
rotate 14
|
||||
compress
|
||||
missingok
|
||||
notifempty
|
||||
copytruncate
|
||||
}
|
||||
|
||||
/var/log/edge/edge-dns/*.log {
|
||||
daily
|
||||
rotate 14
|
||||
compress
|
||||
missingok
|
||||
notifempty
|
||||
copytruncate
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
148
日志策略逻辑梳理与问题清单.md
148
日志策略逻辑梳理与问题清单.md
@@ -1,148 +0,0 @@
|
||||
# 日志策略逻辑梳理与问题清单(当前基线:`E:\AI_PRODUCT\waf-platform`)
|
||||
|
||||
## 1. 结论摘要
|
||||
|
||||
- 当前链路是 **`type` + `writeTargets` 双字段共同决定行为**。
|
||||
- 运行时真正用于读写判断的是 `writeTargets`(`ParseWriteTargetsFromPolicy` 解析结果)。
|
||||
- HTTP 与 DNS 都已接入“公用策略”下发,DNS 也已支持 ClickHouse 读取。
|
||||
- 目前存在多处逻辑不一致,核心风险是:**页面显示、数据库值、实际读写行为可能不同步**。
|
||||
|
||||
## 2. 关键入口文件
|
||||
|
||||
- 类型与组合映射:`EdgeCommon/pkg/serverconfigs/access_log_storages.go`
|
||||
- 写入目标定义/解析:`EdgeCommon/pkg/serverconfigs/access_log_write_targets.go`
|
||||
- 策略创建/更新(Admin):`EdgeAdmin/internal/web/actions/default/servers/accesslogs/createPopup.go`、`EdgeAdmin/internal/web/actions/default/servers/accesslogs/update.go`
|
||||
- 策略保存(API):`EdgeAPI/internal/rpc/services/service_http_access_log_policy_plus.go`
|
||||
- 策略落库(DAO):`EdgeAPI/internal/db/models/http_access_log_policy_dao.go`
|
||||
- 公用策略运行时缓存:`EdgeAPI/internal/accesslogs/storage_manager.go`
|
||||
- HTTP 节点队列:`EdgeNode/internal/nodes/http_access_log_queue.go`
|
||||
- DNS 节点队列:`EdgeDNS/internal/nodes/ns_access_log_queue.go`
|
||||
- 节点配置下发:`EdgeAPI/internal/db/models/node_dao.go`、`EdgeAPI/internal/db/models/ns_node_dao_plus.go`
|
||||
- HTTP 查询服务:`EdgeAPI/internal/rpc/services/service_http_access_log.go`
|
||||
- DNS 查询服务:`EdgeAPI/internal/rpc/services/nameservers/service_ns_access_log.go`
|
||||
- CH 查询实现:`EdgeAPI/internal/clickhouse/logs_ingest_store.go`、`EdgeAPI/internal/clickhouse/ns_logs_ingest_store.go`
|
||||
|
||||
## 3. 数据模型与语义
|
||||
|
||||
`edgeHTTPAccessLogPolicies` 关键字段:
|
||||
|
||||
- `type`:`file` / `file_mysql` / `file_clickhouse` / `file_mysql_clickhouse` / `es` / `tcp` / `syslog` / `command`
|
||||
- `writeTargets`:JSON(`file/mysql/clickhouse` 三个布尔值)
|
||||
- `disableDefaultDB`:停用默认数据库存储(兼容旧语义)
|
||||
|
||||
当前实际规则:
|
||||
|
||||
1. Admin 侧根据下拉 `type` 生成 `writeTargetsJSON`。
|
||||
2. API 原样落库(仅做少量历史 type 别名兼容)。
|
||||
3. 运行时使用 `ParseWriteTargetsFromPolicy(writeTargets, type, disableDefaultDB)` 得到最终写入目标。
|
||||
|
||||
## 4. 端到端链路(当前行为)
|
||||
|
||||
### 4.1 策略创建/更新
|
||||
|
||||
- 创建与更新都会调用 `ParseStorageTypeAndWriteTargets`,并同时提交 `type` 与 `writeTargetsJSON`。
|
||||
- `file_clickhouse` / `file_mysql_clickhouse` 在 UI 上隐藏了手填路径输入,依赖旧值或默认目录回退。
|
||||
- DAO 更新时,只有 `writeTargetsJSON` 非空才会覆盖 `writeTargets` 字段。
|
||||
|
||||
### 4.2 HTTP 写入链路
|
||||
|
||||
- Node 侧:
|
||||
- `needWriteFile = writeTargets == nil || writeTargets.NeedWriteFile()`
|
||||
- `needReportAPI = writeTargets == nil || writeTargets.NeedReportToAPI()`
|
||||
- API 侧:
|
||||
- `CreateHTTPAccessLogs` 里是否写 MySQL 由 `canWriteAccessLogsToDB() -> WriteMySQL()` 决定。
|
||||
- 同时调用 `writeAccessLogsToPolicy()`,把日志再交给公用策略存储引擎处理(如 file/es/tcp/syslog/command)。
|
||||
- 查询侧:
|
||||
- `shouldReadAccessLogsFromClickHouse()` 为真且 CH 配置可用时优先读 CH。
|
||||
- CH 失败后,按 `shouldReadAccessLogsFromMySQL()` 回退 MySQL。
|
||||
|
||||
### 4.3 DNS 写入链路
|
||||
|
||||
- DNS 节点:
|
||||
- `needWriteFile = targets == nil || targets.File || targets.ClickHouse`
|
||||
- `needReportAPI = targets == nil || targets.MySQL`
|
||||
- 即 CH-only 下 DNS 只写本地文件,不上报 API。
|
||||
- DNS API 查询:
|
||||
- 与 HTTP 一样优先 CH,再按策略回退 MySQL。
|
||||
|
||||
### 4.4 节点路径更新机制
|
||||
|
||||
- API 下发公用策略的 `AccessLogFilePath` 与 `AccessLogWriteTargets` 到 HTTP/DNS 节点配置。
|
||||
- Node/DNS 收到新配置后会 `SetDirByPolicyPath(...)` 并 `EnsureInit/Reopen/Close`,可自动切换目录。
|
||||
- 空路径时会回退到:
|
||||
- HTTP:`EDGE_LOG_DIR` 或默认 `/var/log/edge/edge-node`
|
||||
- DNS:`EDGE_DNS_LOG_DIR` 或默认 `/var/log/edge/edge-dns`
|
||||
|
||||
## 5. 行为矩阵(按当前代码)
|
||||
|
||||
- `file`
|
||||
- 写文件:是
|
||||
- 写 MySQL:否(仅当 `writeTargets.mysql=true` 才会写)
|
||||
- 读:优先 CH(若开启),否则按 MySQL 开关
|
||||
- `file_mysql`
|
||||
- 写文件:是
|
||||
- 写 MySQL:是
|
||||
- 读:MySQL 可读;若 CH 也开则优先 CH
|
||||
- `file_clickhouse`
|
||||
- 写文件:是
|
||||
- 写 MySQL:否(理论上)
|
||||
- 读:优先 CH;若 CH 不可用且 mysql=false,则返回空
|
||||
- `file_mysql_clickhouse`
|
||||
- 写文件:是
|
||||
- 写 MySQL:是
|
||||
- 读:优先 CH,失败回退 MySQL
|
||||
- `es/tcp/syslog/command`
|
||||
- 仍会由 `writeTargets` 决定是否 MySQL(当前解析默认给 MySQL=true)
|
||||
- 另外会通过策略引擎输出到对应目标
|
||||
|
||||
## 6. 逻辑问题清单(按优先级)
|
||||
|
||||
### P0:`type` 与 `writeTargets` 双真源,容易漂移
|
||||
|
||||
- 页面展示与回显会参考 `type`,实际写读判断优先看 `writeTargets`。
|
||||
- 一旦两者不一致,会出现“UI 看起来是 ClickHouse,实际还在写/读 MySQL”。
|
||||
|
||||
### P0:`disableDefaultDB` 在新链路中容易失效
|
||||
|
||||
- `WriteMySQL()` 优先看 `writeTargets.MySQL`,只有 `writeTargets` 为空才回退 `disableDefaultDB`。
|
||||
- 由于 Admin 基本总会提交 `writeTargetsJSON`,`disableDefaultDB` 常常不会真正生效。
|
||||
|
||||
### P1:HTTP 与 DNS 在 CH-only 场景上报 API 语义不一致
|
||||
|
||||
- HTTP:`NeedReportToAPI()` = `MySQL || ClickHouse`,CH-only 仍会上报 API。
|
||||
- DNS:CH-only 不上报 API,仅写文件给 Fluent Bit。
|
||||
- 高并发下会带来不必要的 API 压力与行为差异。
|
||||
|
||||
### P1:`file_clickhouse` 可能出现空路径,策略引擎会启动失败
|
||||
|
||||
- `FileStorage.Start()` 要求 `path` 非空。
|
||||
- 但 UI 在 clickhouse 组合类型隐藏路径输入,若 `options.path` 为空,策略引擎会报错(虽然节点本地写文件仍可回退目录工作)。
|
||||
|
||||
### P1:HTTP 可能出现“节点写文件 + API 再写文件”的重复路径
|
||||
|
||||
- `CreateHTTPAccessLogs` 无论是否写 MySQL,都会 `writeAccessLogsToPolicy()`。
|
||||
- 公用策略若为 file*,API 侧 `StorageManager.createStorage()` 会创建 `FileStorage` 并再次落文件。
|
||||
- 若目标是“仅节点写文件供 Fluent Bit 采集”,这会引入额外重复写入。
|
||||
|
||||
### P2:DNS `requestId` 生成算法有重复风险
|
||||
|
||||
- `ns_access_log_queue.go` 里 `timestamp/requestId` 为 `loop()` 局部变量,每轮 tick 重置。
|
||||
- 同秒跨批次可能冲突,影响游标分页与去重。
|
||||
|
||||
### P2:UI 文案分支存在不可达条件
|
||||
|
||||
- `createPopup.html` / `update.html` 在 `file|file_mysql` 区块内嵌了 clickhouse 条件文案分支,实际不会触发。
|
||||
- 不影响功能,但会增加理解成本。
|
||||
|
||||
## 7. 建议修复顺序
|
||||
|
||||
1. 先统一单一真源(建议 API 层统一按 `type` 规范化并覆盖 `writeTargets`)。
|
||||
2. 明确 `disableDefaultDB` 与 `writeTargets` 的优先级,避免“配置项在 UI 可选但不生效”。
|
||||
3. 统一 HTTP/DNS 在 CH-only 的上报语义(建议都走“节点文件 + Fluent Bit”,API 不再接收该流量)。
|
||||
4. 修复 file_clickhouse 空路径策略启动失败(要求路径 or 统一默认路径回填到 options)。
|
||||
5. 修复 DNS requestId 生成(全局原子递增或更高精度时间戳方案)。
|
||||
|
||||
## 8. 当前可用性判断
|
||||
|
||||
- 系统“可运行”,但配置行为存在歧义,且在高并发下会放大成本和排障难度。
|
||||
- 若目标是稳定的高吞吐日志链路,建议优先处理 P0/P1 问题后再继续线上放量。
|
||||
232
编译部署升级策略.md
232
编译部署升级策略.md
@@ -1,232 +0,0 @@
|
||||
# waf-platform 编译、部署、升级策略(WSL Ubuntu 22.04)
|
||||
|
||||
## 1. 适用范围
|
||||
|
||||
- 主基线:`E:\AI_PRODUCT\waf-platform`(不是 `waf-platform-1.4.5/1.4.6`)。
|
||||
- 本手册覆盖:
|
||||
- `EdgeAdmin` / `EdgeAPI` / `EdgeNode` / `EdgeDNS`
|
||||
- HTTP + DNS 访问日志策略
|
||||
- Fluent Bit + ClickHouse 日志链路
|
||||
|
||||
---
|
||||
|
||||
## 2. 关键结论(先看)
|
||||
|
||||
1. 用 `EdgeAdmin/build/build.sh` 编译时,会联动编译 `EdgeAPI`,并由 `EdgeAPI` 联动编译 `EdgeNode`。
|
||||
2. `EdgeDNS` 只有在 `plus` 模式下才会被 `EdgeAPI/build/build.sh` 自动编译并放入 deploy。
|
||||
3. 当前脚本已临时关闭自动 `arm64` 编译,只保留 `amd64` 自动链路。
|
||||
3. 如果你要发布“本次所有改动”(含 DNS/ClickHouse),建议统一用:
|
||||
```bash
|
||||
cd /mnt/e/AI_PRODUCT/waf-platform/EdgeAdmin/build
|
||||
bash build.sh linux amd64 plus
|
||||
```
|
||||
4. DNS 节点与 Node 节点分离部署时,两边都要有 Fluent Bit(各自采集本机日志)。
|
||||
|
||||
---
|
||||
|
||||
## 3. 编译前检查
|
||||
|
||||
在 WSL Ubuntu 22.04 执行:
|
||||
|
||||
```bash
|
||||
cd /mnt/e/AI_PRODUCT/waf-platform
|
||||
git rev-parse --short HEAD
|
||||
go version
|
||||
which zip unzip go find sed
|
||||
```
|
||||
|
||||
建议:
|
||||
|
||||
- 线上 Ubuntu 22.04,尽量也在 Ubuntu 22.04 编译,避免 `GLIBC`/`GLIBCXX` 不兼容。
|
||||
- 若 Node plus 使用 cgo/libpcap/libbrotli,请确保构建机依赖完整。
|
||||
|
||||
---
|
||||
|
||||
## 4. 一键编译(推荐)
|
||||
|
||||
```bash
|
||||
cd /mnt/e/AI_PRODUCT/waf-platform/EdgeAdmin/build
|
||||
bash build.sh linux amd64 plus
|
||||
```
|
||||
|
||||
### 4.1 此命令会做什么
|
||||
|
||||
- 编译 `EdgeAdmin`
|
||||
- 自动调用 `EdgeAPI/build/build.sh`
|
||||
- `EdgeAPI` 自动编译并打包 `EdgeNode`(当前仅 linux/amd64)
|
||||
- `plus` 模式下,`EdgeAPI` 自动编译并打包 `EdgeDNS`(当前仅 linux/amd64)
|
||||
- 把 node/dns 包放入 API 的 `deploy` 目录用于远程安装
|
||||
|
||||
### 4.2 主要产物位置
|
||||
|
||||
- Admin 包:`EdgeAdmin/dist/edge-admin-linux-amd64-v*.zip`
|
||||
- API 包:`EdgeAPI/dist/edge-api-linux-amd64-v*.zip`
|
||||
- Node 包:`EdgeNode/dist/edge-node-linux-*.zip`
|
||||
- DNS 包:`EdgeDNS/dist/edge-dns-linux-*.zip`(plus 时)
|
||||
- API deploy 安装包目录:`EdgeAPI/build/deploy/`
|
||||
|
||||
---
|
||||
|
||||
## 5. 是否需要单独编译 API / DNS / Node
|
||||
|
||||
### 5.1 不需要单独编译 API 的场景
|
||||
|
||||
- 你已经执行 `EdgeAdmin/build/build.sh ... plus`,且要发布整套改动。
|
||||
|
||||
### 5.2 需要单独编译的场景
|
||||
|
||||
- 只改了 API,不想重新打 Admin:
|
||||
```bash
|
||||
cd /mnt/e/AI_PRODUCT/waf-platform/EdgeAPI/build
|
||||
bash build.sh linux amd64 plus
|
||||
```
|
||||
- 只改了 Node:
|
||||
```bash
|
||||
cd /mnt/e/AI_PRODUCT/waf-platform/EdgeNode/build
|
||||
bash build.sh linux amd64 plus
|
||||
```
|
||||
- 只改了 DNS:
|
||||
```bash
|
||||
cd /mnt/e/AI_PRODUCT/waf-platform/EdgeDNS/build
|
||||
bash build.sh linux amd64
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. 升级顺序(生产建议)
|
||||
|
||||
## 6.1 第一步:先改 ClickHouse(DDL)
|
||||
|
||||
先在 ClickHouse 建/改表,至少包含:
|
||||
|
||||
- `logs_ingest`(HTTP)
|
||||
- `dns_logs_ingest`(DNS)
|
||||
|
||||
先做 DDL 的原因:避免新版本写入时目标表不存在。
|
||||
|
||||
## 6.2 第二步:部署 Fluent Bit 配置
|
||||
|
||||
### Node 节点(HTTP)
|
||||
|
||||
- 配置文件目录一般是 `/etc/fluent-bit/`
|
||||
- 至少更新:
|
||||
- `fluent-bit.conf`(或你实际启用的 `fluent-bit-http.conf`)
|
||||
- `clickhouse-upstream.conf`
|
||||
- `parsers.conf`(通常可复用)
|
||||
|
||||
### DNS 节点(DNS)
|
||||
|
||||
- DNS 节点若之前没装 Fluent Bit,需要先安装并创建 service。
|
||||
- `curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh`
|
||||
- `sudo apt-get update`
|
||||
- `sudo apt-get install -y fluent-bit`
|
||||
- 建议同样用 `/etc/fluent-bit/`,放:
|
||||
- `fluent-bit.conf`(DNS 版本或含 DNS INPUT/OUTPUT 的统一版本)
|
||||
- `clickhouse-upstream.conf`
|
||||
- `parsers.conf`
|
||||
|
||||
重启:
|
||||
|
||||
```bash
|
||||
sudo systemctl restart fluent-bit
|
||||
sudo systemctl status fluent-bit
|
||||
```
|
||||
|
||||
## 6.3 第三步:升级管理面(API + Admin)
|
||||
|
||||
在管理节点更新 `edge-api`、`edge-admin` 包并重启对应服务。
|
||||
./bin/edge-api status
|
||||
./bin/edge-api restart
|
||||
|
||||
## 6.4 第四步:升级数据面(Node / DNS)
|
||||
|
||||
- 通过 API 的远程安装/升级流程分批升级 Node、DNS
|
||||
- 或手工替换二进制后重启服务
|
||||
|
||||
## 6.5 第五步:最后切换日志策略
|
||||
|
||||
在页面启用目标策略(MySQL only / ClickHouse only / 双写),并验证读写链路。
|
||||
|
||||
---
|
||||
|
||||
## 7. 日志策略与读写行为(当前实现)
|
||||
|
||||
## 7.1 HTTP / DNS 共用语义
|
||||
|
||||
- `WriteMySQL=true`:写 MySQL(通过 API)
|
||||
- `WriteClickHouse=true`:写本地日志文件,由 Fluent Bit 异步采集进 CH
|
||||
- 两者都开:双写
|
||||
- 两者都关:不写
|
||||
|
||||
## 7.2 查询侧优先级
|
||||
|
||||
- 优先读 ClickHouse(可用且策略允许)
|
||||
- ClickHouse 异常时按策略回退 MySQL
|
||||
- 若两边都不可读,返回空
|
||||
|
||||
## 7.3 关于“日志文件路径”
|
||||
|
||||
- 现在前端已调整:当存储类型包含 ClickHouse 时,创建/编辑页隐藏“日志文件路径”输入。
|
||||
- 但 Fluent Bit 的 `Path` 必须匹配实际日志目录;若你改了日志目录,需要同步改 Fluent Bit 配置并重启。
|
||||
|
||||
---
|
||||
|
||||
## 8. 服务检查与常用命令
|
||||
|
||||
## 8.1 检查 Fluent Bit 服务名
|
||||
|
||||
```bash
|
||||
systemctl list-unit-files | grep -Ei 'fluent|td-agent-bit'
|
||||
systemctl status fluent-bit.service
|
||||
```
|
||||
|
||||
## 8.2 查看 Fluent Bit 实际使用的配置文件
|
||||
|
||||
```bash
|
||||
systemctl status fluent-bit.service
|
||||
```
|
||||
|
||||
重点看 `ExecStart`,例如:
|
||||
|
||||
```text
|
||||
/opt/fluent-bit/bin/fluent-bit -c /etc/fluent-bit/fluent-bit.conf
|
||||
```
|
||||
|
||||
## 8.3 验证 ClickHouse 是否有数据
|
||||
|
||||
```sql
|
||||
SELECT count() FROM default.logs_ingest;
|
||||
SELECT count() FROM default.dns_logs_ingest;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. 回滚策略(最小影响)
|
||||
|
||||
1. 先把页面日志策略切回 MySQL-only。
|
||||
2. 回滚 API/Admin 到上一版本。
|
||||
3. Node/DNS 分批回滚。
|
||||
4. Fluent Bit 保留运行不影响主业务(只停止 CH 写入即可)。
|
||||
|
||||
---
|
||||
|
||||
## 10. 一次发布的最简执行清单
|
||||
|
||||
```bash
|
||||
# 1) 构建
|
||||
cd /mnt/e/AI_PRODUCT/waf-platform/EdgeAdmin/build
|
||||
bash build.sh linux amd64 plus
|
||||
|
||||
# 2) 上传产物
|
||||
# EdgeAdmin/dist/*.zip
|
||||
# EdgeAPI/dist/*.zip
|
||||
# EdgeAPI/build/deploy/* (node/dns installer zip)
|
||||
|
||||
# 3) 线上先执行 CH DDL
|
||||
# 4) 更新 fluent-bit 配置并重启
|
||||
sudo systemctl restart fluent-bit
|
||||
|
||||
# 5) 升级 edge-api / edge-admin 并重启
|
||||
# 6) 升级 edge-node / edge-dns
|
||||
# 7) 切日志策略并验证
|
||||
```
|
||||
215
访问日志策略配置手册.md
215
访问日志策略配置手册.md
@@ -1,215 +0,0 @@
|
||||
# 访问日志策略配置手册(默认安装 / 仅MySQL / 仅ClickHouse / 双写)
|
||||
|
||||
## 1. 适用范围
|
||||
- 代码基线:`e:\AI_PRODUCT\waf-platform`
|
||||
- 页面入口:`系统设置 -> 访问日志 -> 日志策略`
|
||||
- 查询入口:`网站 -> 站点 -> 日志`(`/servers/server/log`)
|
||||
|
||||
---
|
||||
|
||||
## 2. 默认安装后的行为(什么都不配)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[EdgeNode 产生日志] --> B[写本地文件 /var/log/edge/edge-node/*.log]
|
||||
A --> C[上报 EdgeAPI]
|
||||
C --> D[写 MySQL 访问日志表]
|
||||
E[日志查询页] --> D
|
||||
```
|
||||
|
||||
- 默认即可写日志,不会因为没配 ClickHouse 就停写。
|
||||
- 查询默认走 MySQL。
|
||||
- 是否有“独立日志数据库节点”会影响写到哪个 MySQL:
|
||||
- 有日志库节点:优先写日志库节点池。
|
||||
- 没有日志库节点:回退写默认数据库。
|
||||
|
||||
---
|
||||
|
||||
## 3. 必须设置项(上线最小集)
|
||||
|
||||
### 3.1 基础必需(任何模式都建议)
|
||||
1. `EdgeAPI` 数据库连接可用(`db.yaml` / `.db.yaml`)。
|
||||
2. `EdgeNode` 与 `EdgeAPI` 通信正常(节点在线,可上报日志)。
|
||||
3. 建议创建并启用一个**公用**访问日志策略(避免多环境行为不一致)。
|
||||
|
||||
### 3.2 仅 ClickHouse / MySQL+ClickHouse 额外必需
|
||||
1. `EdgeAPI` 配置 ClickHouse 读取:
|
||||
- `EdgeAPI/configs/api.yaml`:
|
||||
```yaml
|
||||
clickhouse:
|
||||
host: 127.0.0.1
|
||||
port: 8123
|
||||
user: default
|
||||
password: "xxxxxx"
|
||||
database: default
|
||||
```
|
||||
2. Fluent Bit 已部署并运行,采集:
|
||||
- `/var/log/edge/edge-node/*.log`
|
||||
3. ClickHouse 已建表:`logs_ingest`(见 `deploy/fluent-bit/README.md`)。
|
||||
|
||||
### 3.3 本地日志轮转(默认开启)
|
||||
从当前版本开始,EdgeNode / EdgeDNS 使用内建 `lumberjack` 轮转,不再依赖系统 `logrotate`。
|
||||
|
||||
默认值:
|
||||
- `maxSizeMB=256`
|
||||
- `maxBackups=14`
|
||||
- `maxAgeDays=7`
|
||||
- `compress=false`
|
||||
- `localTime=true`
|
||||
|
||||
可在策略 `file.rotate` 中配置,例如:
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/var/log/web-access-${date}.log",
|
||||
"autoCreate": true,
|
||||
"rotate": {
|
||||
"maxSizeMB": 256,
|
||||
"maxBackups": 14,
|
||||
"maxAgeDays": 7,
|
||||
"compress": false,
|
||||
"localTime": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. 三种目标模式怎么配
|
||||
|
||||
## 4.1 只写入 MySQL
|
||||
|
||||
在“日志策略”中:
|
||||
1. 新建或修改策略,`存储类型` 选 **文件+MySQL**。
|
||||
2. 设为 **公用**,并确保 **启用**。
|
||||
3. `日志文件路径` 填一个 API 可写路径(必填校验项):
|
||||
- 示例:`/var/log/edge/edge-api/http-access-${date}.log`
|
||||
|
||||
结果:
|
||||
- 写入:MySQL(主路径)+ Node 本地日志文件
|
||||
- 查询:MySQL
|
||||
- 不依赖 ClickHouse
|
||||
|
||||
---
|
||||
|
||||
## 4.2 只写入 ClickHouse
|
||||
|
||||
在“日志策略”中:
|
||||
1. `存储类型` 选 **文件+ClickHouse**。
|
||||
2. 设为 **公用**,并确保 **启用**。
|
||||
3. `日志文件路径` 仍需填写(策略校验要求):
|
||||
- 示例:`/var/log/edge/edge-api/http-access-${date}.log`
|
||||
4. 确保 Fluent Bit 正在采集 Node 目录并写入 ClickHouse。
|
||||
5. 确保 `EdgeAPI` 的 ClickHouse 连接已配置。
|
||||
|
||||
结果:
|
||||
- 写入:Node 本地文件 -> Fluent Bit -> ClickHouse
|
||||
- API 不写 MySQL
|
||||
- 查询优先 ClickHouse(无 CH 时可能查不到数据)
|
||||
|
||||
---
|
||||
|
||||
## 4.3 同时写入 MySQL + ClickHouse
|
||||
|
||||
在“日志策略”中:
|
||||
1. `存储类型` 选 **文件+MySQL+ClickHouse**。
|
||||
2. 设为 **公用**,并确保 **启用**。
|
||||
3. `日志文件路径` 填写有效路径(同上)。
|
||||
4. ClickHouse + Fluent Bit 同 4.2 要求。
|
||||
|
||||
结果:
|
||||
- 写入:MySQL + ClickHouse(并行)
|
||||
- 查询:优先 ClickHouse,失败可回退 MySQL
|
||||
|
||||
---
|
||||
|
||||
## 5. 配置生效链路图
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
P[公用日志策略 type/writeTargets] --> C[EdgeAPI 解析 writeTargets]
|
||||
C --> N[下发到 EdgeNode GlobalServerConfig.HTTPAccessLog.WriteTargets]
|
||||
N --> W1[NeedWriteFile]
|
||||
N --> W2[NeedReportToAPI]
|
||||
W1 --> F[Node本地日志文件]
|
||||
F --> FB[Fluent Bit]
|
||||
FB --> CH[(ClickHouse.logs_ingest)]
|
||||
W2 --> API[CreateHTTPAccessLogs]
|
||||
API --> MYSQL[(MySQL访问日志表)]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. 验证清单(建议上线前逐项过)
|
||||
|
||||
1. 打开 `/servers/server/log`,持续压测 1~2 分钟。
|
||||
2. 检查最新日志是否持续上顶(不是停在旧时间段)。
|
||||
3. 错误日志筛选是否只显示 `status>=400`。
|
||||
4. 仅 CH 模式下,停掉 Fluent Bit 后确认告警和查询表现符合预期。
|
||||
5. MySQL+CH 模式下,临时断 CH,确认页面可回退 MySQL。
|
||||
|
||||
---
|
||||
|
||||
## 7. 常见问题
|
||||
|
||||
### Q1:策略里的“日志文件路径”是干嘛的?
|
||||
- 这是策略 `file` 配置的必填项(API 侧校验)。
|
||||
- 即使你用 ClickHouse 链路,当前实现仍要求该字段有值。
|
||||
- 真正给 Fluent Bit 采集的是 **Node 目录**:`/var/log/edge/edge-node/*.log`。
|
||||
|
||||
### Q2:不勾“停用默认数据库存储”,会不会同时写默认库和独立日志库?
|
||||
- 正常不会双写同一条。
|
||||
- 有独立日志库节点时优先写节点池;节点池不可用时才回退默认库。
|
||||
|
||||
### Q3:修改策略后要不要重启?
|
||||
- 通常 1 分钟内自动刷新生效。
|
||||
- 若要立即生效:重启 `edge-api`,并在需要时重启 `edge-node`、`fluent-bit`。
|
||||
|
||||
---
|
||||
|
||||
## 8. DNS 日志与 HTTP 策略联动(新增)
|
||||
|
||||
从当前版本开始,DNS 访问日志与 HTTP 访问日志共享同一套公用策略语义(`writeTargets`):
|
||||
|
||||
- `WriteMySQL=true`:DNS 节点上报 API,API 写入 MySQL 分表。
|
||||
- `WriteClickHouse=true`:DNS 节点写本地 JSONL,Fluent Bit 采集写入 ClickHouse `dns_logs_ingest`。
|
||||
- 双开即双写;双关即不写(仅保留内存处理,不入库)。
|
||||
|
||||
### 8.1 DNS 写入链路
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A[EdgeDNS 产生日志] --> B{writeTargets}
|
||||
B -->|MySQL=true| C[CreateNSAccessLogs]
|
||||
C --> D[(MySQL edgeNSAccessLogs_YYYYMMDD)]
|
||||
B -->|ClickHouse=true| E[/var/log/edge/edge-dns/access.log]
|
||||
E --> F[Fluent Bit]
|
||||
F --> G[(ClickHouse dns_logs_ingest)]
|
||||
```
|
||||
|
||||
### 8.2 DNS 查询链路
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Q[/ns/clusters/accessLogs] --> R{策略是否启用ClickHouse且CH可用}
|
||||
R -->|是| CH[(dns_logs_ingest)]
|
||||
R -->|否| M{策略是否启用MySQL}
|
||||
CH -->|查询失败| M
|
||||
M -->|是| MY[(MySQL edgeNSAccessLogs_YYYYMMDD)]
|
||||
M -->|否| E[返回空列表]
|
||||
```
|
||||
|
||||
### 8.3 组合场景说明(DNS)
|
||||
|
||||
| 策略 | 写入 | 读取 |
|
||||
|------|------|------|
|
||||
| 仅 MySQL | API -> MySQL | MySQL |
|
||||
| 仅 ClickHouse | 本地文件 -> Fluent Bit -> ClickHouse | ClickHouse |
|
||||
| MySQL + ClickHouse | API -> MySQL + 本地文件 -> Fluent Bit -> ClickHouse | 优先 ClickHouse,失败回退 MySQL |
|
||||
|
||||
### 8.4 DNS 相关必须配置
|
||||
|
||||
1. `EdgeAPI` 配置 ClickHouse 连接(仅读 CH 时必须)。
|
||||
2. `deploy/fluent-bit/fluent-bit.conf` 已包含 DNS 输入:`/var/log/edge/edge-dns/*.log`。
|
||||
3. ClickHouse 已创建 `dns_logs_ingest` 表。
|
||||
4. EdgeDNS 运行用户对 `EDGE_DNS_LOG_DIR`(默认 `/var/log/edge/edge-dns`)有写权限。
|
||||
Reference in New Issue
Block a user