From ebb047c7b57033f77ab5b81989b6547486f64a91 Mon Sep 17 00:00:00 2001 From: nnbcccscdscdsc <2709767634@qq.com> Date: Tue, 14 Apr 2026 15:16:11 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E6=96=B0=E5=A2=9Egps=E7=9B=91?= =?UTF-8?q?=E6=8E=A7=E6=9C=8D=E5=8A=A1=EF=BC=88=E6=96=AD=E5=BC=80=E9=87=8D?= =?UTF-8?q?=E8=BF=9E=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/boot/README.md | 7 ++ scripts/boot/blitz-watchdog.sh | 144 ++++++++++++++++++++++++++- scripts/boot/common.sh | 4 + scripts/boot/modem_network_info.json | 6 +- scripts/boot/robot-boot.env | 4 + 5 files changed, 160 insertions(+), 5 deletions(-) diff --git a/scripts/boot/README.md b/scripts/boot/README.md index 9f3318d..a10a5bb 100644 --- a/scripts/boot/README.md +++ b/scripts/boot/README.md @@ -77,6 +77,10 @@ BLITZ_HEALTH_STALE_SEC="15" BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="15" BLITZ_NETWORK_FAIL_THRESHOLD="3" BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="30" +BLITZ_GPS_MONITOR_ENABLED="1" +BLITZ_GPS_DEVICE_GLOB="/dev/ttyCH341USB*" +BLITZ_GPS_CHECK_INTERVAL_SEC="10" +BLITZ_GPS_RESTART_UNITS="gpsd.socket gpsd.service" BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION="0" ``` @@ -157,6 +161,8 @@ Key files: - `ros-receiver.status.json` - `watchdog.status.json` +`watchdog.status.json` now also records `gps_ok` and `gps_device_present` so you can quickly tell whether the GPS USB serial node is currently visible and whether the last `gpsd` reconnect attempt succeeded. + Pretty-print them: ```bash @@ -201,6 +207,7 @@ sudo bash scripts/boot/blitz-fault-inject.sh network-down off - wait for unix socket - start `b_side` - If network checks fail repeatedly, watchdog stops `b_side`, runs `5g-dial.sh`, waits for route recovery, and then restores services. +- If GPS monitoring is enabled, watchdog checks `BLITZ_GPS_DEVICE_GLOB` every `BLITZ_GPS_CHECK_INTERVAL_SEC` seconds. When the GPS serial device disappears and later reappears, watchdog restarts the units in `BLITZ_GPS_RESTART_UNITS` so `gpsd` can bind to the new device node again. - Camera disappearance is logged as degraded state. Reappearance triggers a `b_side` restart after the device is stable. ## Notes diff --git a/scripts/boot/blitz-watchdog.sh b/scripts/boot/blitz-watchdog.sh index bf8b5ca..2ebd9b1 100644 --- a/scripts/boot/blitz-watchdog.sh +++ b/scripts/boot/blitz-watchdog.sh @@ -22,6 +22,10 @@ LAST_ACTION_EPOCH_MS=0 FULL_RESTART_WINDOW_START=0 FULL_RESTART_WINDOW_COUNT=0 NETWORK_LAST_INTERFACE="" +GPS_LAST_CHECK_SEC=0 +GPS_DEVICE_PRESENT_PREV=-1 +GPS_DEVICE_PRESENT_STATE=1 +GPS_STACK_ACTIVE_STATE=1 declare -A TARGETED_RESTART_WINDOW_START=() declare -A TARGETED_RESTART_WINDOW_COUNT=() @@ -37,6 +41,113 @@ service_is_active() { systemctl is-active --quiet "$1" } +gps_monitor_enabled() { + [[ "${BLITZ_GPS_MONITOR_ENABLED:-0}" == "1" ]] +} + +gps_stack_active() { + local units=() + local unit + + read -r -a units <<< "${BLITZ_GPS_RESTART_UNITS:-}" + if (( ${#units[@]} == 0 )); then + return 1 + fi + + for unit in "${units[@]}"; do + if service_is_active "${unit}"; then + return 0 + fi + done + return 1 +} + +restart_gps_stack() { + local reason="$1" + local devices="$2" + local units=() + local rc + + read -r -a units <<< "${BLITZ_GPS_RESTART_UNITS:-}" + if (( ${#units[@]} == 0 )); then + GPS_STACK_ACTIVE_STATE=0 + blitz_log "${STEP}" "gps-reconnect" "failure" "reason=${reason} devices=${devices} units=empty" 1 + return 1 + fi + + set_last_action "gps-reconnect" + blitz_log "${STEP}" "gps-reconnect" "start" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" 0 + if systemctl restart "${units[@]}"; then + GPS_STACK_ACTIVE_STATE=1 + blitz_log "${STEP}" "gps-reconnect" "success" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" 0 + return 0 + fi + + rc=$? + GPS_STACK_ACTIVE_STATE=0 + blitz_log "${STEP}" "gps-reconnect" "failure" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" "${rc}" + return "${rc}" +} + +check_gps_health() { + local now_sec="$1" + local check_interval_sec="${BLITZ_GPS_CHECK_INTERVAL_SEC:-10}" + local device_glob="${BLITZ_GPS_DEVICE_GLOB:-}" + local previous_present="${GPS_DEVICE_PRESENT_PREV}" + local recovery_reason="" + local device_summary="" + local -a devices=() + + if ! gps_monitor_enabled; then + GPS_DEVICE_PRESENT_STATE=1 + GPS_STACK_ACTIVE_STATE=1 + return 0 + fi + + if (( check_interval_sec < 1 )); then + check_interval_sec=1 + fi + if (( GPS_LAST_CHECK_SEC != 0 && now_sec - GPS_LAST_CHECK_SEC < check_interval_sec )); then + if (( GPS_DEVICE_PRESENT_STATE == 1 && GPS_STACK_ACTIVE_STATE == 1 )); then + return 0 + fi + return 1 + fi + GPS_LAST_CHECK_SEC="${now_sec}" + + mapfile -t devices < <(compgen -G "${device_glob}" || true) + if (( ${#devices[@]} == 0 )); then + GPS_DEVICE_PRESENT_STATE=0 + GPS_STACK_ACTIVE_STATE=0 + if (( previous_present != 0 )); then + blitz_log "${STEP}" "gps-device-check" "failure" "state=missing glob=${device_glob}" 1 + fi + GPS_DEVICE_PRESENT_PREV=0 + return 1 + fi + + device_summary="$(IFS=,; printf '%s' "${devices[*]}")" + GPS_DEVICE_PRESENT_STATE=1 + GPS_DEVICE_PRESENT_PREV=1 + + if (( previous_present == 0 )); then + blitz_log "${STEP}" "gps-device-check" "success" "state=reappeared devices=${device_summary}" 0 + recovery_reason="device-reappeared" + elif ! gps_stack_active; then + recovery_reason="gpsd-inactive" + fi + + if [[ -n "${recovery_reason}" ]]; then + if restart_gps_stack "${recovery_reason}" "${device_summary}"; then + return 0 + fi + return 1 + fi + + GPS_STACK_ACTIVE_STATE=1 + return 0 +} + status_file_fresh() { local path="$1" local max_age_sec="$2" @@ -97,6 +208,8 @@ write_watchdog_status() { local camera_ok="$4" local ros_ok="$5" local bside_ok="$6" + local gps_ok="$7" + local gps_device_present="$8" local tmp_file tmp_file="${WATCHDOG_STATUS_FILE}.tmp.$$" @@ -109,6 +222,8 @@ write_watchdog_status() { "camera_ok": ${camera_ok}, "ros_ok": ${ros_ok}, "bside_ok": ${bside_ok}, + "gps_ok": ${gps_ok}, + "gps_device_present": ${gps_device_present}, "network_fail_count": ${NETWORK_FAIL_COUNT}, "targeted_restart_count": $(targeted_restart_total), "full_restart_count": ${FULL_RESTART_WINDOW_COUNT}, @@ -427,13 +542,22 @@ while true; do camera_ok=1 ros_ok=1 bside_ok=1 + gps_ok=1 + gps_device_present=1 RECOVERY_ACTION_TAKEN=0 now_sec="$(now_epoch_sec)" + if gps_monitor_enabled; then + gps_device_present="${GPS_DEVICE_PRESENT_STATE}" + if (( GPS_DEVICE_PRESENT_STATE == 0 || GPS_STACK_ACTIVE_STATE == 0 )); then + gps_ok=0 + fi + fi + if (( BACKOFF_UNTIL > now_sec )); then fault_reason="backoff" recovery_state="backoff" - write_watchdog_status "${fault_reason}" "${recovery_state}" 0 0 0 0 + write_watchdog_status "${fault_reason}" "${recovery_state}" 0 0 0 0 "${gps_ok}" "${gps_device_present}" sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}" continue fi @@ -453,6 +577,22 @@ while true; do NETWORK_FAIL_COUNT=0 fi + if check_gps_health "${now_sec}"; then + gps_ok=1 + else + gps_ok=0 + gps_device_present="${GPS_DEVICE_PRESENT_STATE}" + if [[ "${fault_reason}" == "none" ]]; then + if (( GPS_DEVICE_PRESENT_STATE == 0 )); then + fault_reason="gps_device_missing" + else + fault_reason="gps_reconnect_failed" + fi + recovery_state="degraded" + fi + fi + gps_device_present="${GPS_DEVICE_PRESENT_STATE}" + if [[ ! -e "${OMNI_CAMERA_DEVICE}" ]]; then camera_ok=0 fault_reason="camera_missing" @@ -486,6 +626,6 @@ while true; do full_restart_stack "ros-unhealthy" || true fi - write_watchdog_status "${fault_reason}" "${recovery_state}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" + write_watchdog_status "${fault_reason}" "${recovery_state}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}" sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}" done diff --git a/scripts/boot/common.sh b/scripts/boot/common.sh index 467b6f3..faa57fd 100644 --- a/scripts/boot/common.sh +++ b/scripts/boot/common.sh @@ -73,6 +73,10 @@ blitz_load_boot_env() { export BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="${BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC:-15}" export BLITZ_NETWORK_FAIL_THRESHOLD="${BLITZ_NETWORK_FAIL_THRESHOLD:-3}" export BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="${BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC:-30}" + export BLITZ_GPS_MONITOR_ENABLED="${BLITZ_GPS_MONITOR_ENABLED:-1}" + export BLITZ_GPS_DEVICE_GLOB="${BLITZ_GPS_DEVICE_GLOB:-/dev/ttyCH341USB*}" + export BLITZ_GPS_CHECK_INTERVAL_SEC="${BLITZ_GPS_CHECK_INTERVAL_SEC:-10}" + export BLITZ_GPS_RESTART_UNITS="${BLITZ_GPS_RESTART_UNITS:-gpsd.socket gpsd.service}" export BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION="${BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION:-0}" export BLITZ_BOOT_ENV_LOADED="1" } diff --git a/scripts/boot/modem_network_info.json b/scripts/boot/modem_network_info.json index 09739b6..e7b4a1e 100644 --- a/scripts/boot/modem_network_info.json +++ b/scripts/boot/modem_network_info.json @@ -1,9 +1,9 @@ { - "interface": "enx08711b726c22", + "interface": "enxd41a57335f9d", "ipv4": [ - "192.168.225.66/22" + "192.168.225.83/22" ], "ipv6": [ - "fe80::86e0:4771:425d:8b20/64" + "fe80::18c1:e89d:e033:9857/64" ] } \ No newline at end of file diff --git a/scripts/boot/robot-boot.env b/scripts/boot/robot-boot.env index a670f73..cce0160 100644 --- a/scripts/boot/robot-boot.env +++ b/scripts/boot/robot-boot.env @@ -27,6 +27,10 @@ BLITZ_HEALTH_STALE_SEC="15" BLITZ_OMNID_THREAD_HEARTBEAT_TIMEOUT_SEC="15" BLITZ_NETWORK_FAIL_THRESHOLD="3" BLITZ_NETWORK_RECOVERY_COOLDOWN_SEC="30" +BLITZ_GPS_MONITOR_ENABLED="1" +BLITZ_GPS_DEVICE_GLOB="/dev/ttyCH341USB*" +BLITZ_GPS_CHECK_INTERVAL_SEC="10" +BLITZ_GPS_RESTART_UNITS="gpsd.socket gpsd.service" BLITZ_WATCHDOG_ALLOW_FAULT_INJECTION="0" # Boot units run b_side_omnid as root directly, so nested sudo must stay off.