feat: 自启动与自恢复机制

This commit is contained in:
2026-04-13 21:55:40 +08:00
parent 2f507a7546
commit 25c68530ba
19 changed files with 1151 additions and 451 deletions

View File

@@ -0,0 +1,97 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1091
source "${SCRIPT_DIR}/common.sh"
STEP="fault-inject"
B_SIDE_SERVICE="blitz-b-side-omnid.service"
ROS_SERVICE="blitz-ros-receiver.service"
main_pid_for_service() {
local service_name="$1"
systemctl show --property MainPID --value "${service_name}"
}
require_running_pid() {
local service_name="$1"
local pid
pid="$(main_pid_for_service "${service_name}")"
if [[ -z "${pid}" || "${pid}" == "0" ]]; then
blitz_log "${STEP}" "lookup-pid" "failure" "service=${service_name}" 1
exit 1
fi
printf '%s\n' "${pid}"
}
write_fault_flag() {
local flag_name="$1"
local flag_path="${BLITZ_RUNTIME_DIR}/${flag_name}"
printf '%s\n' "$(date +%s)" > "${flag_path}"
blitz_log "${STEP}" "flag-on" "success" "path=${flag_path}" 0
}
clear_fault_flag() {
local flag_name="$1"
local flag_path="${BLITZ_RUNTIME_DIR}/${flag_name}"
rm -f "${flag_path}"
blitz_log "${STEP}" "flag-off" "success" "path=${flag_path}" 0
}
blitz_load_boot_env
blitz_require_root "${STEP}"
blitz_prepare_runtime_dir
case "${1:-}" in
bside-crash)
kill -9 "$(require_running_pid "${B_SIDE_SERVICE}")"
;;
bside-process-freeze)
kill -STOP "$(require_running_pid "${B_SIDE_SERVICE}")"
;;
bside-video-thread-stall)
write_fault_flag "fault-injection-bside-video-thread-stall"
;;
bside-control-thread-stall)
write_fault_flag "fault-injection-bside-control-thread-stall"
;;
ros-crash)
kill -9 "$(require_running_pid "${ROS_SERVICE}")"
;;
ros-freeze)
kill -STOP "$(require_running_pid "${ROS_SERVICE}")"
;;
network-down)
if [[ "${BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION}" != "1" ]]; then
blitz_log "${STEP}" "network-down" "failure" "set BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION=1 first" 1
exit 1
fi
case "${2:-}" in
on)
write_fault_flag "fault-injection-network-down"
;;
off)
clear_fault_flag "fault-injection-network-down"
;;
*)
echo "usage: $0 network-down on|off" >&2
exit 2
;;
esac
;;
*)
cat <<'EOF'
usage:
blitz-fault-inject.sh bside-crash
blitz-fault-inject.sh bside-process-freeze
blitz-fault-inject.sh bside-video-thread-stall
blitz-fault-inject.sh bside-control-thread-stall
blitz-fault-inject.sh ros-crash
blitz-fault-inject.sh ros-freeze
blitz-fault-inject.sh network-down on|off
EOF
exit 2
;;
esac