#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck disable=SC1091 source "${SCRIPT_DIR}/common.sh" STEP="fault-inject" B_SIDE_SERVICE="blitz-b-side-omnid.service" ROS_SERVICE="blitz-ros-receiver.service" main_pid_for_service() { local service_name="$1" systemctl show --property MainPID --value "${service_name}" } wait_for_service_pid_change() { local service_name="$1" local previous_pid="$2" local timeout_sec="${3:-10}" local waited=0 local current_pid="" while (( waited < timeout_sec )); do current_pid="$(main_pid_for_service "${service_name}")" if [[ -n "${current_pid}" && "${current_pid}" != "0" && "${current_pid}" != "${previous_pid}" ]]; then printf '%s\n' "${current_pid}" return 0 fi sleep 1 waited=$(( waited + 1 )) done return 1 } require_running_pid() { local service_name="$1" local pid pid="$(main_pid_for_service "${service_name}")" if [[ -z "${pid}" || "${pid}" == "0" ]]; then blitz_log "${STEP}" "lookup-pid" "failure" "service=${service_name}" 1 exit 1 fi printf '%s\n' "${pid}" } write_fault_flag() { local flag_name="$1" local flag_path="${BLITZ_RUNTIME_DIR}/${flag_name}" printf '%s\n' "$(date +%s)" > "${flag_path}" blitz_log "${STEP}" "flag-on" "success" "path=${flag_path}" 0 } clear_fault_flag() { local flag_name="$1" local flag_path="${BLITZ_RUNTIME_DIR}/${flag_name}" rm -f "${flag_path}" blitz_log "${STEP}" "flag-off" "success" "path=${flag_path}" 0 } blitz_load_boot_env blitz_require_root "${STEP}" blitz_prepare_runtime_dir case "${1:-}" in bside-crash) target_pid="$(require_running_pid "${B_SIDE_SERVICE}")" blitz_log "${STEP}" "bside-crash" "start" "service=${B_SIDE_SERVICE} pid=${target_pid}" 0 kill -9 "${target_pid}" if restarted_pid="$(wait_for_service_pid_change "${B_SIDE_SERVICE}" "${target_pid}")"; then blitz_log "${STEP}" "bside-crash" "success" "old_pid=${target_pid} new_pid=${restarted_pid}" 0 else blitz_log "${STEP}" "bside-crash" "failure" "old_pid=${target_pid} restart_not_observed_within=10s" 1 exit 1 fi ;; bside-process-freeze) target_pid="$(require_running_pid "${B_SIDE_SERVICE}")" blitz_log "${STEP}" "bside-process-freeze" "start" "service=${B_SIDE_SERVICE} pid=${target_pid}" 0 kill -STOP "${target_pid}" blitz_log "${STEP}" "bside-process-freeze" "success" "service=${B_SIDE_SERVICE} pid=${target_pid}" 0 ;; bside-video-thread-stall) write_fault_flag "fault-injection-bside-video-thread-stall" ;; bside-control-thread-stall) write_fault_flag "fault-injection-bside-control-thread-stall" ;; ros-crash) target_pid="$(require_running_pid "${ROS_SERVICE}")" blitz_log "${STEP}" "ros-crash" "start" "service=${ROS_SERVICE} pid=${target_pid}" 0 kill -9 "${target_pid}" if restarted_pid="$(wait_for_service_pid_change "${ROS_SERVICE}" "${target_pid}")"; then blitz_log "${STEP}" "ros-crash" "success" "old_pid=${target_pid} new_pid=${restarted_pid}" 0 else blitz_log "${STEP}" "ros-crash" "failure" "old_pid=${target_pid} restart_not_observed_within=10s" 1 exit 1 fi ;; ros-freeze) target_pid="$(require_running_pid "${ROS_SERVICE}")" blitz_log "${STEP}" "ros-freeze" "start" "service=${ROS_SERVICE} pid=${target_pid}" 0 kill -STOP "${target_pid}" blitz_log "${STEP}" "ros-freeze" "success" "service=${ROS_SERVICE} pid=${target_pid}" 0 ;; network-down) if [[ "${BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION}" != "1" ]]; then blitz_log "${STEP}" "network-down" "failure" "set BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION=1 first" 1 exit 1 fi case "${2:-}" in on) write_fault_flag "fault-injection-network-down" ;; off) clear_fault_flag "fault-injection-network-down" ;; *) echo "usage: $0 network-down on|off" >&2 exit 2 ;; esac ;; *) cat <<'EOF' usage: blitz-fault-inject.sh bside-crash blitz-fault-inject.sh bside-process-freeze blitz-fault-inject.sh bside-video-thread-stall blitz-fault-inject.sh bside-control-thread-stall blitz-fault-inject.sh ros-crash blitz-fault-inject.sh ros-freeze blitz-fault-inject.sh network-down on|off EOF exit 2 ;; esac