diff --git a/scripts/boot/blitz-watchdog.sh b/scripts/boot/blitz-watchdog.sh index e41b4ac..bf8b5ca 100644 --- a/scripts/boot/blitz-watchdog.sh +++ b/scripts/boot/blitz-watchdog.sh @@ -243,6 +243,90 @@ resolve_network_interface() { [[ -n "${NETWORK_LAST_INTERFACE}" ]] } +network_route_targets() { + local target + + if [[ -n "${BLITZ_TIME_SERVER_IP:-}" ]]; then + printf '%s\n' "${BLITZ_TIME_SERVER_IP}" + fi + for target in ${BLITZ_5G_ROUTE_TARGETS//,/ }; do + if [[ -n "${target}" && "${target}" != "${BLITZ_TIME_SERVER_IP:-}" ]]; then + printf '%s\n' "${target}" + fi + done +} + +resolve_network_gateway() { + local interface_name="$1" + local default_route + local gateway="" + local tokens=() + local index + + default_route="$(ip -o route show default dev "${interface_name}" 2>/dev/null | head -n 1 || true)" + if [[ -n "${default_route}" ]]; then + read -r -a tokens <<< "${default_route}" + for (( index=0; index<${#tokens[@]}-1; index++ )); do + if [[ "${tokens[index]}" == "via" ]]; then + gateway="${tokens[index + 1]}" + break + fi + done + fi + + if [[ -n "${gateway}" ]]; then + printf '%s\n' "${gateway}" + return 0 + fi + if [[ -n "${BLITZ_5G_GATEWAY:-}" ]]; then + printf '%s\n' "${BLITZ_5G_GATEWAY}" + return 0 + fi + return 1 +} + +repair_network_routes() { + local interface_name="$1" + local gateway="" + local target + local route_output + local rc + + if [[ -z "${interface_name}" ]]; then + return 1 + fi + if ! ping -I "${interface_name}" -c 1 -W 2 "${BLITZ_TIME_SERVER_IP}" >/dev/null 2>&1; then + blitz_log "${STEP}" "route-repair-probe" "failure" "interface=${interface_name} target=${BLITZ_TIME_SERVER_IP}" 1 + return 1 + fi + + gateway="$(resolve_network_gateway "${interface_name}" || true)" + if [[ -z "${gateway}" ]]; then + blitz_log "${STEP}" "route-repair-gateway" "failure" "interface=${interface_name}" 1 + return 1 + fi + + while IFS= read -r target; do + [[ -n "${target}" ]] || continue + if ip route replace "${target}/32" via "${gateway}" dev "${interface_name}"; then + blitz_log "${STEP}" "route-repair-target" "success" "target=${target} interface=${interface_name} gateway=${gateway}" 0 + else + rc=$? + blitz_log "${STEP}" "route-repair-target" "failure" "target=${target} interface=${interface_name} gateway=${gateway}" "${rc}" + return "${rc}" + fi + done < <(network_route_targets) + + route_output="$(blitz_route_ready "${BLITZ_TIME_SERVER_IP}" "${interface_name}" || true)" + if [[ -z "${route_output}" ]]; then + blitz_log "${STEP}" "route-repair-postcheck" "failure" "interface=${interface_name} gateway=${gateway}" 1 + return 1 + fi + + blitz_log "${STEP}" "route-repair-postcheck" "success" "interface=${interface_name} gateway=${gateway} route=${route_output}" 0 + return 0 +} + network_is_healthy() { local route_output @@ -283,6 +367,15 @@ wait_for_network_recovery() { perform_network_recovery() { local rc=0 + if resolve_network_interface && repair_network_routes "${NETWORK_LAST_INTERFACE}"; then + set_last_action "route-repair" + RECOVERY_ACTION_TAKEN=1 + NETWORK_COOLDOWN_UNTIL=$(( $(now_epoch_sec) + BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC )) + NETWORK_FAIL_COUNT=0 + blitz_log "${STEP}" "network-recovery" "success" "mode=route-repair interface=${NETWORK_LAST_INTERFACE}" 0 + return 0 + fi + set_last_action "network-recovery" RECOVERY_ACTION_TAKEN=1 blitz_log "${STEP}" "network-recovery" "start" "fail_count=${NETWORK_FAIL_COUNT}" 0