引入lumberjack和fluentbit自动分发
This commit is contained in:
111
deploy/clickhouse/README.md
Normal file
111
deploy/clickhouse/README.md
Normal file
@@ -0,0 +1,111 @@
|
||||
# ClickHouse + Fluent Bit 使用手册(Ubuntu 22.04 / Amazon Linux 2023)
|
||||
|
||||
## 1. 支持范围
|
||||
|
||||
- Ubuntu 22.04
|
||||
- Amazon Linux 2023(AWS)
|
||||
|
||||
安装脚本:`install_clickhouse_linux.sh`(自动识别上述系统)。
|
||||
|
||||
## 2. 安装 ClickHouse
|
||||
|
||||
```bash
|
||||
cd /path/to/waf-platform/deploy/clickhouse
|
||||
chmod +x install_clickhouse_linux.sh
|
||||
sudo ./install_clickhouse_linux.sh
|
||||
```
|
||||
|
||||
可选:安装时初始化 `default` 用户密码:
|
||||
|
||||
```bash
|
||||
sudo CLICKHOUSE_DEFAULT_PASSWORD='YourStrongPassword' ./install_clickhouse_linux.sh
|
||||
```
|
||||
|
||||
## 3. 开启 HTTPS(默认仅 crt+key)
|
||||
|
||||
脚本默认生成 `server.crt + server.key`(带 SAN)并启用 8443:
|
||||
|
||||
```bash
|
||||
cd /path/to/waf-platform/deploy/clickhouse
|
||||
chmod +x configure_clickhouse_https.sh
|
||||
sudo CH_HTTPS_PORT=8443 \
|
||||
CH_CERT_CN=clickhouse.example.com \
|
||||
CH_CERT_DNS=clickhouse.example.com \
|
||||
CH_CERT_IP=<CLICKHOUSE_IP> \
|
||||
./configure_clickhouse_https.sh
|
||||
```
|
||||
|
||||
使用已有证书:
|
||||
|
||||
```bash
|
||||
sudo SRC_CERT=/path/to/server.crt \
|
||||
SRC_KEY=/path/to/server.key \
|
||||
CH_HTTPS_PORT=8443 \
|
||||
./configure_clickhouse_https.sh
|
||||
```
|
||||
|
||||
## 4. 初始化日志表(含优化)
|
||||
|
||||
```bash
|
||||
cd /path/to/waf-platform/deploy/clickhouse
|
||||
chmod +x init_waf_logs_tables.sh
|
||||
sudo CH_HOST=127.0.0.1 \
|
||||
CH_PORT=9000 \
|
||||
CH_USER=default \
|
||||
CH_PASSWORD='YourStrongPassword' \
|
||||
CH_DATABASE=default \
|
||||
./init_waf_logs_tables.sh
|
||||
```
|
||||
|
||||
说明:
|
||||
- `init_waf_logs_tables.sql` 已内置主要优化(`CODEC`、`LowCardinality`、跳数索引)。
|
||||
- `optimize_schema.sql` 主要用于历史表补齐优化,不是首次建表必需步骤。
|
||||
|
||||
## 5. 平台侧配置(EdgeAdmin)
|
||||
|
||||
在 ClickHouse 设置页配置:
|
||||
|
||||
- Host:ClickHouse 地址
|
||||
- Port:`8443`
|
||||
- Database:`default`
|
||||
- Scheme:`https`
|
||||
|
||||
当前实现说明:
|
||||
- 前端不再提供 `TLS跳过校验` 和 `TLS Server Name` 配置项。
|
||||
- 后端固定 `TLSSkipVerify=true`(默认不校验证书)。
|
||||
|
||||
保存后点击“测试连接”。
|
||||
|
||||
## 6. Fluent Bit 配置方式
|
||||
|
||||
推荐平台托管模式(在线安装/升级 Node、DNS 时自动下发):
|
||||
|
||||
- `/etc/fluent-bit/fluent-bit.conf`
|
||||
- `/etc/fluent-bit/.edge-managed.env`
|
||||
- `/etc/fluent-bit/.edge-managed.json`
|
||||
|
||||
检查状态:
|
||||
|
||||
```bash
|
||||
sudo systemctl status fluent-bit --no-pager
|
||||
sudo cat /etc/fluent-bit/.edge-managed.json
|
||||
```
|
||||
|
||||
## 7. 验证与排障
|
||||
|
||||
查看 Fluent Bit 日志:
|
||||
|
||||
```bash
|
||||
sudo journalctl -u fluent-bit -f
|
||||
```
|
||||
|
||||
查看写入:
|
||||
|
||||
```sql
|
||||
SELECT count() FROM default.logs_ingest;
|
||||
SELECT count() FROM default.dns_logs_ingest;
|
||||
```
|
||||
|
||||
常见错误:
|
||||
- `connection refused`:8443 未监听或网络未放行。
|
||||
- `legacy Common Name`:证书缺 SAN,需重签。
|
||||
227
deploy/clickhouse/configure_clickhouse_https.sh
Normal file
227
deploy/clickhouse/configure_clickhouse_https.sh
Normal file
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ "${EUID}" -ne 0 ]]; then
|
||||
echo "[ERROR] please run as root"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f /etc/os-release ]]; then
|
||||
echo "[ERROR] /etc/os-release not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source /etc/os-release
|
||||
os_id="$(echo "${ID:-}" | tr '[:upper:]' '[:lower:]')"
|
||||
os_ver="${VERSION_ID:-}"
|
||||
is_ubuntu22=false
|
||||
is_amzn2023=false
|
||||
|
||||
if [[ "${os_id}" == "ubuntu" && "${os_ver}" == 22.04* ]]; then
|
||||
is_ubuntu22=true
|
||||
fi
|
||||
if [[ "${os_id}" == "amzn" && "${os_ver}" == 2023* ]]; then
|
||||
is_amzn2023=true
|
||||
fi
|
||||
|
||||
if [[ "${is_ubuntu22}" != "true" && "${is_amzn2023}" != "true" ]]; then
|
||||
echo "[ERROR] only Ubuntu 22.04 or Amazon Linux 2023 is supported. current: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v openssl >/dev/null 2>&1 || ! command -v curl >/dev/null 2>&1; then
|
||||
if [[ "${is_ubuntu22}" == "true" ]]; then
|
||||
apt-get update -y
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y openssl curl ca-certificates
|
||||
else
|
||||
dnf makecache -y
|
||||
dnf install -y openssl curl ca-certificates
|
||||
fi
|
||||
fi
|
||||
|
||||
CH_HTTPS_PORT="${CH_HTTPS_PORT:-8443}"
|
||||
CH_LISTEN_HOST="${CH_LISTEN_HOST:-::}"
|
||||
CH_CERT_CN="${CH_CERT_CN:-$(hostname -f 2>/dev/null || hostname)}"
|
||||
CH_CERT_DNS="${CH_CERT_DNS:-}"
|
||||
CH_CERT_IP="${CH_CERT_IP:-}"
|
||||
CH_CERT_DAYS="${CH_CERT_DAYS:-825}"
|
||||
CH_GENERATE_CA="${CH_GENERATE_CA:-false}"
|
||||
|
||||
SRC_CERT="${SRC_CERT:-}"
|
||||
SRC_KEY="${SRC_KEY:-}"
|
||||
SRC_CA="${SRC_CA:-}"
|
||||
|
||||
CH_DIR="/etc/clickhouse-server"
|
||||
CH_CONFIG_D_DIR="${CH_DIR}/config.d"
|
||||
PKI_DIR="${CH_DIR}/pki"
|
||||
SERVER_CERT="${CH_DIR}/server.crt"
|
||||
SERVER_KEY="${CH_DIR}/server.key"
|
||||
CA_CERT="${CH_DIR}/ca.crt"
|
||||
OVERRIDE_FILE="${CH_CONFIG_D_DIR}/waf-https.xml"
|
||||
|
||||
mkdir -p "${CH_CONFIG_D_DIR}" "${PKI_DIR}"
|
||||
|
||||
split_csv() {
|
||||
local raw="$1"
|
||||
if [[ -z "${raw}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
IFS=',' read -r -a arr <<<"${raw}"
|
||||
for item in "${arr[@]}"; do
|
||||
item="$(echo "${item}" | xargs)"
|
||||
if [[ -n "${item}" ]]; then
|
||||
echo "${item}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
build_san_line() {
|
||||
local san_entries=()
|
||||
while IFS= read -r dns_item; do
|
||||
san_entries+=("DNS:${dns_item}")
|
||||
done < <(split_csv "${CH_CERT_DNS}")
|
||||
while IFS= read -r ip_item; do
|
||||
san_entries+=("IP:${ip_item}")
|
||||
done < <(split_csv "${CH_CERT_IP}")
|
||||
|
||||
if [[ ${#san_entries[@]} -eq 0 ]]; then
|
||||
san_entries+=("DNS:${CH_CERT_CN}")
|
||||
fi
|
||||
|
||||
local san_line
|
||||
san_line="$(IFS=,; echo "${san_entries[*]}")"
|
||||
echo "${san_line}"
|
||||
}
|
||||
|
||||
generate_self_signed_cert() {
|
||||
echo "[INFO] generating self-signed server certificate (crt+key only) ..."
|
||||
local server_key="${PKI_DIR}/server.key"
|
||||
local server_csr="${PKI_DIR}/server.csr"
|
||||
local server_crt="${PKI_DIR}/server.crt"
|
||||
local ext_file="${PKI_DIR}/server.ext"
|
||||
local san_line
|
||||
san_line="$(build_san_line)"
|
||||
|
||||
openssl genrsa -out "${server_key}" 2048
|
||||
openssl req -new -key "${server_key}" -out "${server_csr}" -subj "/CN=${CH_CERT_CN}"
|
||||
|
||||
cat >"${ext_file}" <<EOF
|
||||
subjectAltName=${san_line}
|
||||
keyUsage=digitalSignature,keyEncipherment
|
||||
extendedKeyUsage=serverAuth
|
||||
EOF
|
||||
|
||||
openssl x509 -req -in "${server_csr}" -signkey "${server_key}" \
|
||||
-out "${server_crt}" -days "${CH_CERT_DAYS}" -sha256 -extfile "${ext_file}"
|
||||
|
||||
cp -f "${server_crt}" "${SERVER_CERT}"
|
||||
cp -f "${server_key}" "${SERVER_KEY}"
|
||||
rm -f "${CA_CERT}"
|
||||
}
|
||||
|
||||
generate_cert_with_ca() {
|
||||
echo "[INFO] generating local CA and server certificate ..."
|
||||
local ca_key="${PKI_DIR}/ca.key"
|
||||
local ca_crt="${PKI_DIR}/ca.crt"
|
||||
local server_key="${PKI_DIR}/server.key"
|
||||
local server_csr="${PKI_DIR}/server.csr"
|
||||
local server_crt="${PKI_DIR}/server.crt"
|
||||
local ext_file="${PKI_DIR}/server.ext"
|
||||
local san_line
|
||||
san_line="$(build_san_line)"
|
||||
|
||||
openssl genrsa -out "${ca_key}" 4096
|
||||
openssl req -x509 -new -nodes -key "${ca_key}" -sha256 -days 3650 \
|
||||
-out "${ca_crt}" -subj "/CN=ClickHouse Local CA"
|
||||
|
||||
openssl genrsa -out "${server_key}" 2048
|
||||
openssl req -new -key "${server_key}" -out "${server_csr}" -subj "/CN=${CH_CERT_CN}"
|
||||
|
||||
cat >"${ext_file}" <<EOF
|
||||
subjectAltName=${san_line}
|
||||
keyUsage=digitalSignature,keyEncipherment
|
||||
extendedKeyUsage=serverAuth
|
||||
EOF
|
||||
|
||||
openssl x509 -req -in "${server_csr}" -CA "${ca_crt}" -CAkey "${ca_key}" -CAcreateserial \
|
||||
-out "${server_crt}" -days "${CH_CERT_DAYS}" -sha256 -extfile "${ext_file}"
|
||||
|
||||
cp -f "${server_crt}" "${SERVER_CERT}"
|
||||
cp -f "${server_key}" "${SERVER_KEY}"
|
||||
cp -f "${ca_crt}" "${CA_CERT}"
|
||||
}
|
||||
|
||||
if [[ -n "${SRC_CERT}" || -n "${SRC_KEY}" ]]; then
|
||||
if [[ -z "${SRC_CERT}" || -z "${SRC_KEY}" ]]; then
|
||||
echo "[ERROR] SRC_CERT and SRC_KEY must be provided together"
|
||||
exit 1
|
||||
fi
|
||||
echo "[INFO] using provided certificate files ..."
|
||||
cp -f "${SRC_CERT}" "${SERVER_CERT}"
|
||||
cp -f "${SRC_KEY}" "${SERVER_KEY}"
|
||||
if [[ -n "${SRC_CA}" ]]; then
|
||||
cp -f "${SRC_CA}" "${CA_CERT}"
|
||||
else
|
||||
rm -f "${CA_CERT}"
|
||||
fi
|
||||
else
|
||||
case "$(echo "${CH_GENERATE_CA}" | tr '[:upper:]' '[:lower:]')" in
|
||||
1|true|yes|on)
|
||||
generate_cert_with_ca
|
||||
;;
|
||||
*)
|
||||
generate_self_signed_cert
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
chown clickhouse:clickhouse "${SERVER_CERT}" "${SERVER_KEY}" || true
|
||||
chmod 0644 "${SERVER_CERT}"
|
||||
chmod 0640 "${SERVER_KEY}"
|
||||
if [[ -f "${CA_CERT}" ]]; then
|
||||
chown clickhouse:clickhouse "${CA_CERT}" || true
|
||||
chmod 0644 "${CA_CERT}"
|
||||
fi
|
||||
|
||||
echo "[INFO] writing ClickHouse HTTPS override config ..."
|
||||
cat >"${OVERRIDE_FILE}" <<EOF
|
||||
<clickhouse>
|
||||
<https_port>${CH_HTTPS_PORT}</https_port>
|
||||
<listen_host>${CH_LISTEN_HOST}</listen_host>
|
||||
<openSSL>
|
||||
<server>
|
||||
<certificateFile>${SERVER_CERT}</certificateFile>
|
||||
<privateKeyFile>${SERVER_KEY}</privateKeyFile>
|
||||
<verificationMode>none</verificationMode>
|
||||
<loadDefaultCAFile>true</loadDefaultCAFile>
|
||||
<cacheSessions>true</cacheSessions>
|
||||
<disableProtocols>sslv2,sslv3</disableProtocols>
|
||||
<preferServerCiphers>true</preferServerCiphers>
|
||||
<invalidCertificateHandler>
|
||||
<name>RejectCertificateHandler</name>
|
||||
</invalidCertificateHandler>
|
||||
</server>
|
||||
</openSSL>
|
||||
</clickhouse>
|
||||
EOF
|
||||
|
||||
echo "[INFO] restarting clickhouse-server ..."
|
||||
systemctl restart clickhouse-server
|
||||
sleep 2
|
||||
|
||||
echo "[INFO] service status ..."
|
||||
systemctl --no-pager -l status clickhouse-server | sed -n '1,15p'
|
||||
|
||||
echo "[INFO] verifying HTTPS endpoint ..."
|
||||
curl -sk "https://127.0.0.1:${CH_HTTPS_PORT}/?query=SELECT%201" || true
|
||||
echo
|
||||
|
||||
echo "[OK] ClickHouse HTTPS setup finished"
|
||||
echo " HTTPS port : ${CH_HTTPS_PORT}"
|
||||
echo " cert file : ${SERVER_CERT}"
|
||||
echo " key file : ${SERVER_KEY}"
|
||||
if [[ -f "${CA_CERT}" ]]; then
|
||||
echo " CA file : ${CA_CERT}"
|
||||
echo " import this CA file into API/Fluent Bit hosts if tls.verify=On"
|
||||
fi
|
||||
38
deploy/clickhouse/init_waf_logs_tables.sh
Normal file
38
deploy/clickhouse/init_waf_logs_tables.sh
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SQL_FILE="${SCRIPT_DIR}/init_waf_logs_tables.sql"
|
||||
|
||||
if [[ ! -f "${SQL_FILE}" ]]; then
|
||||
echo "[ERROR] SQL file not found: ${SQL_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v clickhouse-client >/dev/null 2>&1; then
|
||||
echo "[ERROR] clickhouse-client not found. Please install ClickHouse client first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CH_HOST="${CH_HOST:-127.0.0.1}"
|
||||
CH_PORT="${CH_PORT:-9000}"
|
||||
CH_USER="${CH_USER:-default}"
|
||||
CH_PASSWORD="${CH_PASSWORD:-}"
|
||||
CH_DATABASE="${CH_DATABASE:-default}"
|
||||
|
||||
args=(--host "${CH_HOST}" --port "${CH_PORT}" --user "${CH_USER}")
|
||||
if [[ -n "${CH_PASSWORD}" ]]; then
|
||||
args+=(--password "${CH_PASSWORD}")
|
||||
fi
|
||||
|
||||
echo "[INFO] creating database if not exists: ${CH_DATABASE}"
|
||||
clickhouse-client "${args[@]}" --query "CREATE DATABASE IF NOT EXISTS ${CH_DATABASE}"
|
||||
|
||||
echo "[INFO] initializing tables in database: ${CH_DATABASE}"
|
||||
clickhouse-client "${args[@]}" --database "${CH_DATABASE}" < "${SQL_FILE}"
|
||||
|
||||
echo "[INFO] checking table status ..."
|
||||
clickhouse-client "${args[@]}" --database "${CH_DATABASE}" --query \
|
||||
"SELECT name, engine FROM system.tables WHERE database='${CH_DATABASE}' AND name IN ('logs_ingest','dns_logs_ingest') ORDER BY name"
|
||||
|
||||
echo "[OK] ClickHouse ingest tables are ready in database '${CH_DATABASE}'"
|
||||
69
deploy/clickhouse/init_waf_logs_tables.sql
Normal file
69
deploy/clickhouse/init_waf_logs_tables.sql
Normal file
@@ -0,0 +1,69 @@
|
||||
-- Initialize HTTP and DNS ingest tables for GoEdge access logs.
|
||||
-- Run with:
|
||||
-- clickhouse-client --database <db_name> < init_waf_logs_tables.sql
|
||||
|
||||
CREATE TABLE IF NOT EXISTS logs_ingest
|
||||
(
|
||||
timestamp DateTime CODEC(DoubleDelta, ZSTD(1)),
|
||||
node_id UInt64,
|
||||
cluster_id UInt64,
|
||||
server_id UInt64,
|
||||
host LowCardinality(String),
|
||||
ip String,
|
||||
method LowCardinality(String),
|
||||
path String CODEC(ZSTD(1)),
|
||||
status UInt16,
|
||||
bytes_in UInt64 CODEC(Delta, ZSTD(1)),
|
||||
bytes_out UInt64 CODEC(Delta, ZSTD(1)),
|
||||
cost_ms UInt32 CODEC(Delta, ZSTD(1)),
|
||||
ua String CODEC(ZSTD(1)),
|
||||
referer String CODEC(ZSTD(1)),
|
||||
log_type LowCardinality(String),
|
||||
trace_id String,
|
||||
firewall_policy_id UInt64 DEFAULT 0,
|
||||
firewall_rule_group_id UInt64 DEFAULT 0,
|
||||
firewall_rule_set_id UInt64 DEFAULT 0,
|
||||
firewall_rule_id UInt64 DEFAULT 0,
|
||||
request_headers String CODEC(ZSTD(3)) DEFAULT '',
|
||||
request_body String CODEC(ZSTD(3)) DEFAULT '',
|
||||
response_headers String CODEC(ZSTD(3)) DEFAULT '',
|
||||
response_body String CODEC(ZSTD(3)) DEFAULT '',
|
||||
INDEX idx_trace_id trace_id TYPE bloom_filter(0.01) GRANULARITY 4,
|
||||
INDEX idx_ip ip TYPE bloom_filter(0.01) GRANULARITY 4,
|
||||
INDEX idx_host host TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4,
|
||||
INDEX idx_fw_policy firewall_policy_id TYPE minmax GRANULARITY 4,
|
||||
INDEX idx_status status TYPE minmax GRANULARITY 4
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMMDD(timestamp)
|
||||
ORDER BY (timestamp, node_id, server_id, trace_id)
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS dns_logs_ingest
|
||||
(
|
||||
timestamp DateTime CODEC(DoubleDelta, ZSTD(1)),
|
||||
request_id String,
|
||||
node_id UInt64,
|
||||
cluster_id UInt64,
|
||||
domain_id UInt64,
|
||||
record_id UInt64,
|
||||
remote_addr String,
|
||||
question_name String,
|
||||
question_type LowCardinality(String),
|
||||
record_name String,
|
||||
record_type LowCardinality(String),
|
||||
record_value String,
|
||||
networking LowCardinality(String),
|
||||
is_recursive UInt8,
|
||||
error String CODEC(ZSTD(1)),
|
||||
ns_route_codes Array(String),
|
||||
content_json String CODEC(ZSTD(3)) DEFAULT '',
|
||||
INDEX idx_request_id request_id TYPE bloom_filter(0.01) GRANULARITY 4,
|
||||
INDEX idx_remote_addr remote_addr TYPE bloom_filter(0.01) GRANULARITY 4,
|
||||
INDEX idx_question_name question_name TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4,
|
||||
INDEX idx_domain_id domain_id TYPE minmax GRANULARITY 4
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMMDD(timestamp)
|
||||
ORDER BY (timestamp, request_id, node_id)
|
||||
SETTINGS index_granularity = 8192;
|
||||
95
deploy/clickhouse/install_clickhouse_linux.sh
Normal file
95
deploy/clickhouse/install_clickhouse_linux.sh
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ "${EUID}" -ne 0 ]]; then
|
||||
echo "[ERROR] please run as root"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f /etc/os-release ]]; then
|
||||
echo "[ERROR] /etc/os-release not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source /etc/os-release
|
||||
os_id="$(echo "${ID:-}" | tr '[:upper:]' '[:lower:]')"
|
||||
os_ver="${VERSION_ID:-}"
|
||||
is_ubuntu22=false
|
||||
is_amzn2023=false
|
||||
|
||||
if [[ "${os_id}" == "ubuntu" && "${os_ver}" == 22.04* ]]; then
|
||||
is_ubuntu22=true
|
||||
fi
|
||||
if [[ "${os_id}" == "amzn" && "${os_ver}" == 2023* ]]; then
|
||||
is_amzn2023=true
|
||||
fi
|
||||
|
||||
if [[ "${is_ubuntu22}" != "true" && "${is_amzn2023}" != "true" ]]; then
|
||||
echo "[ERROR] only Ubuntu 22.04 or Amazon Linux 2023 is supported. current: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${is_ubuntu22}" == "true" ]]; then
|
||||
echo "[INFO] detected Ubuntu 22.04"
|
||||
echo "[INFO] installing prerequisites ..."
|
||||
apt-get update -y
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y curl ca-certificates gnupg apt-transport-https lsb-release
|
||||
|
||||
echo "[INFO] configuring ClickHouse apt repository ..."
|
||||
install -d -m 0755 /etc/apt/keyrings
|
||||
if [[ ! -f /etc/apt/keyrings/clickhouse.gpg ]]; then
|
||||
curl -fsSL https://packages.clickhouse.com/CLICKHOUSE-KEY.GPG | gpg --dearmor -o /etc/apt/keyrings/clickhouse.gpg
|
||||
fi
|
||||
|
||||
cat >/etc/apt/sources.list.d/clickhouse.list <<'EOF'
|
||||
deb [signed-by=/etc/apt/keyrings/clickhouse.gpg arch=amd64,arm64] https://packages.clickhouse.com/deb stable main
|
||||
EOF
|
||||
|
||||
echo "[INFO] installing clickhouse-server and clickhouse-client ..."
|
||||
apt-get update -y
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y clickhouse-server clickhouse-client clickhouse-common-static
|
||||
fi
|
||||
|
||||
if [[ "${is_amzn2023}" == "true" ]]; then
|
||||
echo "[INFO] detected Amazon Linux 2023"
|
||||
echo "[INFO] installing prerequisites ..."
|
||||
dnf makecache -y
|
||||
dnf install -y curl ca-certificates gnupg2 dnf-plugins-core
|
||||
|
||||
echo "[INFO] configuring ClickHouse yum repository ..."
|
||||
cat >/etc/yum.repos.d/clickhouse.repo <<'EOF'
|
||||
[clickhouse-stable]
|
||||
name=ClickHouse Stable Repository
|
||||
baseurl=https://packages.clickhouse.com/rpm/stable/$basearch
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
gpgkey=https://packages.clickhouse.com/rpm/stable/repodata/repomd.xml.key
|
||||
https://packages.clickhouse.com/rpm/clickhouse-static.key
|
||||
EOF
|
||||
|
||||
echo "[INFO] installing clickhouse-server and clickhouse-client ..."
|
||||
dnf clean all
|
||||
dnf makecache -y
|
||||
if ! dnf install -y clickhouse-server clickhouse-client clickhouse-common-static; then
|
||||
dnf install -y clickhouse-server clickhouse-client
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "[INFO] enabling clickhouse-server ..."
|
||||
systemctl enable clickhouse-server >/dev/null 2>&1 || true
|
||||
systemctl restart clickhouse-server
|
||||
sleep 2
|
||||
|
||||
if [[ -n "${CLICKHOUSE_DEFAULT_PASSWORD:-}" ]]; then
|
||||
echo "[INFO] setting default user password ..."
|
||||
if [[ "${CLICKHOUSE_DEFAULT_PASSWORD}" == *"'"* ]]; then
|
||||
echo "[ERROR] CLICKHOUSE_DEFAULT_PASSWORD contains single quote, please set password manually with clickhouse-client"
|
||||
exit 1
|
||||
fi
|
||||
clickhouse-client --query "ALTER USER default IDENTIFIED WITH plaintext_password BY '${CLICKHOUSE_DEFAULT_PASSWORD}'"
|
||||
fi
|
||||
|
||||
echo "[INFO] health check ..."
|
||||
clickhouse-client --query "SELECT version()"
|
||||
echo "[OK] ClickHouse install completed: ID=${ID:-unknown}, VERSION_ID=${VERSION_ID:-unknown}"
|
||||
123
deploy/clickhouse/optimize_schema.sql
Normal file
123
deploy/clickhouse/optimize_schema.sql
Normal file
@@ -0,0 +1,123 @@
|
||||
-- =============================================================================
|
||||
-- ClickHouse logs_ingest 表优化脚本
|
||||
--
|
||||
-- 说明:
|
||||
-- - 所有 ALTER 操作均为在线操作,无需停服
|
||||
-- - 建议按阶段顺序执行,每阶段执行后观察 system.parts 确认生效
|
||||
-- - 压缩编解码器变更仅影响新写入的 part,存量数据需等 merge 或手动 OPTIMIZE
|
||||
--
|
||||
-- 执行方式:
|
||||
-- clickhouse-client --host 127.0.0.1 --port 9000 --user default --password 'xxx' < optimize_schema.sql
|
||||
-- =============================================================================
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 1:大字段压缩优化(效果最显著)
|
||||
-- =============================================
|
||||
|
||||
-- 大文本字段改用 ZSTD(3),对 JSON / HTTP 文本压缩率远优于默认 LZ4
|
||||
-- 预期效果:磁盘占用减少 40%-60%
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN request_headers String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN request_body String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN response_headers String CODEC(ZSTD(3));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN response_body String CODEC(ZSTD(3));
|
||||
|
||||
-- 中等长度文本字段用 ZSTD(1),平衡压缩率与 CPU 开销
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN ua String CODEC(ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN path String CODEC(ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN referer String CODEC(ZSTD(1));
|
||||
|
||||
-- 低基数字段改用 LowCardinality(内存+磁盘双降)
|
||||
-- method 的基数极低(GET/POST/PUT/DELETE 等),host 基数取决于站点数量
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN method LowCardinality(String);
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN log_type LowCardinality(String);
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN host LowCardinality(String);
|
||||
|
||||
-- 数值字段使用 Delta + ZSTD 编码(利用相邻行的时间/大小相关性)
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN bytes_in UInt64 CODEC(Delta, ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN bytes_out UInt64 CODEC(Delta, ZSTD(1));
|
||||
ALTER TABLE logs_ingest MODIFY COLUMN cost_ms UInt32 CODEC(Delta, ZSTD(1));
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 2:添加 Skipping Index(加速高频过滤查询)
|
||||
-- =============================================
|
||||
|
||||
-- trace_id 精确查找(查看日志详情 FindByTraceId)
|
||||
-- bloom_filter(0.01) = 1% 误判率,GRANULARITY 4 = 每 4 个 granule 一个 bloom block
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_trace_id trace_id TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- IP 精确查找
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_ip ip TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- host 模糊查询支持(tokenbf_v1 对 LIKE '%xxx%' 有效)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_host host TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
|
||||
|
||||
-- firewall_policy_id 过滤(HasFirewallPolicy: > 0)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_fw_policy firewall_policy_id TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- status 范围过滤(HasError: status >= 400)
|
||||
ALTER TABLE logs_ingest ADD INDEX IF NOT EXISTS idx_status status TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- =============================================
|
||||
-- 阶段 3:物化索引到现有数据(对存量数据生效)
|
||||
-- =============================================
|
||||
-- 注意:MATERIALIZE INDEX 会触发后台 mutation,大表可能需要一定时间
|
||||
-- 可通过 SELECT * FROM system.mutations WHERE is_done = 0 监控进度
|
||||
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_trace_id;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_ip;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_host;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_fw_policy;
|
||||
ALTER TABLE logs_ingest MATERIALIZE INDEX idx_status;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- dns_logs_ingest 表优化(DNS 日志表)
|
||||
-- =============================================================================
|
||||
|
||||
-- 大文本字段压缩
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN content_json String CODEC(ZSTD(3));
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN error String CODEC(ZSTD(1));
|
||||
|
||||
-- 低基数字段
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN question_type LowCardinality(String);
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN record_type LowCardinality(String);
|
||||
ALTER TABLE dns_logs_ingest MODIFY COLUMN networking LowCardinality(String);
|
||||
|
||||
-- request_id 精确查找
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_request_id request_id TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- remote_addr 精确查找
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_remote_addr remote_addr TYPE bloom_filter(0.01) GRANULARITY 4;
|
||||
|
||||
-- question_name 模糊查询
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_question_name question_name TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4;
|
||||
|
||||
-- domain_id 过滤
|
||||
ALTER TABLE dns_logs_ingest ADD INDEX IF NOT EXISTS idx_domain_id domain_id TYPE minmax GRANULARITY 4;
|
||||
|
||||
-- 物化索引到现有数据
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_request_id;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_remote_addr;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_question_name;
|
||||
ALTER TABLE dns_logs_ingest MATERIALIZE INDEX idx_domain_id;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- 验证命令(执行完上述 ALTER 后运行)
|
||||
-- =============================================================================
|
||||
|
||||
-- 查看列的压缩编解码器
|
||||
-- SELECT name, type, compression_codec FROM system.columns WHERE table = 'logs_ingest' AND database = currentDatabase();
|
||||
|
||||
-- 查看表的压缩率
|
||||
-- SELECT table, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed, round(sum(data_uncompressed_bytes) / sum(data_compressed_bytes), 2) AS ratio FROM system.columns WHERE table IN ('logs_ingest', 'dns_logs_ingest') GROUP BY table;
|
||||
|
||||
-- 查看各列占用的磁盘空间(找出最大的列)
|
||||
-- SELECT name, formatReadableSize(sum(data_compressed_bytes)) AS compressed, formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed FROM system.columns WHERE table = 'logs_ingest' GROUP BY name ORDER BY sum(data_compressed_bytes) DESC;
|
||||
|
||||
-- 查看 mutation 进度
|
||||
-- SELECT database, table, mutation_id, command, is_done, parts_to_do FROM system.mutations WHERE is_done = 0;
|
||||
|
||||
-- 强制触发 merge(可选,让压缩编解码器变更对存量数据生效)
|
||||
-- OPTIMIZE TABLE logs_ingest FINAL;
|
||||
-- OPTIMIZE TABLE dns_logs_ingest FINAL;
|
||||
Reference in New Issue
Block a user