diff --git a/scripts/boot/blitz-watchdog.sh b/scripts/boot/blitz-watchdog.sh index da75343..e41b4ac 100644 --- a/scripts/boot/blitz-watchdog.sh +++ b/scripts/boot/blitz-watchdog.sh @@ -172,6 +172,7 @@ record_full_restart() { restart_bside_targeted() { local fault_key="$1" local reason="$2" + local rc if register_targeted_restart "${fault_key}"; then blitz_log "${STEP}" "escalate-full-restart" "start" "reason=${reason}" 0 @@ -184,11 +185,12 @@ restart_bside_targeted() { blitz_log "${STEP}" "restart-bside" "start" "reason=${reason}" 0 if systemctl restart "${B_SIDE_SERVICE}"; then blitz_log "${STEP}" "restart-bside" "success" "reason=${reason}" 0 - else - rc=$? - blitz_log "${STEP}" "restart-bside" "failure" "reason=${reason}" "${rc}" - return "${rc}" + return 0 fi + + rc=$? + blitz_log "${STEP}" "restart-bside" "failure" "reason=${reason}" "${rc}" + return "${rc}" } full_restart_stack() { @@ -203,28 +205,32 @@ full_restart_stack() { blitz_log "${STEP}" "full-restart-stop-bside" "start" "reason=${reason}" 0 systemctl stop "${B_SIDE_SERVICE}" || true - if ! systemctl restart "${ROS_SERVICE}"; then + if systemctl restart "${ROS_SERVICE}"; then + blitz_log "${STEP}" "full-restart-restart-ros" "success" "reason=${reason}" 0 + else rc=$? blitz_log "${STEP}" "full-restart-restart-ros" "failure" "reason=${reason}" "${rc}" record_full_restart return "${rc}" fi - blitz_log "${STEP}" "full-restart-restart-ros" "success" "reason=${reason}" 0 - if ! bash "${SCRIPT_DIR}/wait-for-unix-socket.sh" --step "${STEP}" --timeout "${BLITZ_ROS_SOCKET_WAIT_SEC}"; then + if bash "${SCRIPT_DIR}/wait-for-unix-socket.sh" --step "${STEP}" --timeout "${BLITZ_ROS_SOCKET_WAIT_SEC}"; then + : + else rc=$? blitz_log "${STEP}" "full-restart-wait-socket" "failure" "reason=${reason}" "${rc}" record_full_restart return "${rc}" fi - if ! systemctl start "${B_SIDE_SERVICE}"; then + if systemctl start "${B_SIDE_SERVICE}"; then + blitz_log "${STEP}" "full-restart-start-bside" "success" "reason=${reason}" 0 + else rc=$? blitz_log "${STEP}" "full-restart-start-bside" "failure" "reason=${reason}" "${rc}" record_full_restart return "${rc}" fi - blitz_log "${STEP}" "full-restart-start-bside" "success" "reason=${reason}" 0 record_full_restart } @@ -282,13 +288,17 @@ perform_network_recovery() { blitz_log "${STEP}" "network-recovery" "start" "fail_count=${NETWORK_FAIL_COUNT}" 0 systemctl stop "${B_SIDE_SERVICE}" || true - if ! bash "${SCRIPT_DIR}/5g-dial.sh"; then + if bash "${SCRIPT_DIR}/5g-dial.sh"; then + : + else rc=$? - blitz_log "${STEP}" "network-redial" "failure" "fail_count=${NETWORK_FAIL_COUNT}" "${rc}" + blitz_log "${STEP}" "network-redial" "failure" "fail_count=${NETWORK_FAIL_COUNT} script=${SCRIPT_DIR}/5g-dial.sh" "${rc}" return "${rc}" fi - if ! wait_for_network_recovery "${BLITZ_5G_ROUTE_WAIT_SEC}"; then + if wait_for_network_recovery "${BLITZ_5G_ROUTE_WAIT_SEC}"; then + : + else rc=$? blitz_log "${STEP}" "network-recovery" "failure" "fail_count=${NETWORK_FAIL_COUNT} interface=${NETWORK_LAST_INTERFACE:-unresolved}" "${rc}" return "${rc}" diff --git a/scripts/boot/common.sh b/scripts/boot/common.sh index bf9256a..467b6f3 100644 --- a/scripts/boot/common.sh +++ b/scripts/boot/common.sh @@ -204,14 +204,18 @@ blitz_route_ready() { return 0 } -blitz_resolve_5g_interface() { - local explicit_interface="${BLITZ_5G_INTERFACE:-}" - local info_json="${BLITZ_5G_INFO_JSON:-}" +blitz_interface_exists() { + local interface_name="${1:-}" - if [[ -n "${explicit_interface}" ]]; then - printf '%s\n' "${explicit_interface}" - return 0 + if [[ -z "${interface_name}" ]]; then + return 1 fi + ip link show dev "${interface_name}" >/dev/null 2>&1 +} + +blitz_read_5g_info_interface() { + local info_json="$1" + if [[ -z "${info_json}" || ! -f "${info_json}" ]]; then return 1 fi @@ -236,6 +240,156 @@ print(interface) PY } +blitz_detect_5g_interface_from_subnet() { + local modem_subnet="${1:-${BLITZ_5G_MODEM_SUBNET:-}}" + + if [[ -z "${modem_subnet}" ]]; then + return 1 + fi + + python3 - "${modem_subnet}" <<'PY' +import ipaddress +import json +import subprocess +import sys + +subnet = ipaddress.ip_network(sys.argv[1], strict=False) +skip = {"lo", "docker0", "l4tbr0"} + +def priority(name: str) -> tuple[int, str]: + if name.startswith("enx"): + return (0, name) + if name.startswith("wwan"): + return (1, name) + if name.startswith("usb"): + return (2, name) + if name.startswith("eth"): + return (3, name) + return (9, name) + +try: + output = subprocess.check_output(["ip", "-j", "-4", "addr", "show"], text=True) + payload = json.loads(output) +except Exception: + raise SystemExit(1) + +candidates = [] +for item in payload: + ifname = str(item.get("ifname") or "").strip() + if not ifname or ifname in skip: + continue + for addr in item.get("addr_info") or []: + if addr.get("family") != "inet": + continue + local = addr.get("local") + prefixlen = addr.get("prefixlen") + if not local or prefixlen is None: + continue + try: + iface = ipaddress.ip_interface(f"{local}/{prefixlen}") + except ValueError: + continue + if iface.ip in subnet: + candidates.append((priority(ifname), ifname)) + break + +if not candidates: + raise SystemExit(1) + +candidates.sort(key=lambda item: item[0]) +print(candidates[0][1]) +PY +} + +blitz_refresh_5g_info_json() { + local interface_name="$1" + local info_json="${2:-${BLITZ_5G_INFO_JSON:-}}" + + if [[ -z "${interface_name}" || -z "${info_json}" ]]; then + return 1 + fi + + python3 - "${interface_name}" "${info_json}" <<'PY' +import json +import os +import subprocess +import sys + +interface_name = sys.argv[1] +path = sys.argv[2] + +try: + output = subprocess.check_output(["ip", "-j", "addr", "show", "dev", interface_name], text=True) + payload = json.loads(output) +except Exception: + raise SystemExit(1) + +if not payload: + raise SystemExit(1) + +item = payload[0] +ipv4 = [] +ipv6 = [] +for addr in item.get("addr_info") or []: + local = addr.get("local") + prefixlen = addr.get("prefixlen") + family = addr.get("family") + if not local or prefixlen is None: + continue + entry = f"{local}/{prefixlen}" + if family == "inet": + ipv4.append(entry) + elif family == "inet6": + ipv6.append(entry) + +data = { + "interface": interface_name, + "ipv4": ipv4, + "ipv6": ipv6, +} + +parent = os.path.dirname(path) +if parent: + os.makedirs(parent, exist_ok=True) +temp_path = f"{path}.tmp.{os.getpid()}" +with open(temp_path, "w", encoding="utf-8") as handle: + json.dump(data, handle, ensure_ascii=False, indent=2) +os.replace(temp_path, path) +PY +} + +blitz_resolve_5g_interface() { + local explicit_interface="${BLITZ_5G_INTERFACE:-}" + local info_json="${BLITZ_5G_INFO_JSON:-}" + local recorded_interface="" + local detected_interface="" + + if [[ -n "${explicit_interface}" ]]; then + if blitz_interface_exists "${explicit_interface}"; then + printf '%s\n' "${explicit_interface}" + return 0 + fi + return 1 + fi + + recorded_interface="$(blitz_read_5g_info_interface "${info_json}" || true)" + if [[ -n "${recorded_interface}" ]] && blitz_interface_exists "${recorded_interface}"; then + printf '%s\n' "${recorded_interface}" + return 0 + fi + + detected_interface="$(blitz_detect_5g_interface_from_subnet || true)" + if [[ -n "${detected_interface}" ]]; then + if [[ "${detected_interface}" != "${recorded_interface}" ]]; then + blitz_refresh_5g_info_json "${detected_interface}" "${info_json}" >/dev/null 2>&1 || true + fi + printf '%s\n' "${detected_interface}" + return 0 + fi + + return 1 +} + blitz_prepare_runtime_dir() { local runtime_dir