feat:新增gps监控服务(断开重连)
This commit is contained in:
@@ -22,6 +22,10 @@ LAST_ACTION_EPOCH_MS=0
|
||||
FULL_RESTART_WINDOW_START=0
|
||||
FULL_RESTART_WINDOW_COUNT=0
|
||||
NETWORK_LAST_INTERFACE=""
|
||||
GPS_LAST_CHECK_SEC=0
|
||||
GPS_DEVICE_PRESENT_PREV=-1
|
||||
GPS_DEVICE_PRESENT_STATE=1
|
||||
GPS_STACK_ACTIVE_STATE=1
|
||||
declare -A TARGETED_RESTART_WINDOW_START=()
|
||||
declare -A TARGETED_RESTART_WINDOW_COUNT=()
|
||||
|
||||
@@ -37,6 +41,113 @@ service_is_active() {
|
||||
systemctl is-active --quiet "$1"
|
||||
}
|
||||
|
||||
gps_monitor_enabled() {
|
||||
[[ "${BLITZ_GPS_MONITOR_ENABLED:-0}" == "1" ]]
|
||||
}
|
||||
|
||||
gps_stack_active() {
|
||||
local units=()
|
||||
local unit
|
||||
|
||||
read -r -a units <<< "${BLITZ_GPS_RESTART_UNITS:-}"
|
||||
if (( ${#units[@]} == 0 )); then
|
||||
return 1
|
||||
fi
|
||||
|
||||
for unit in "${units[@]}"; do
|
||||
if service_is_active "${unit}"; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
restart_gps_stack() {
|
||||
local reason="$1"
|
||||
local devices="$2"
|
||||
local units=()
|
||||
local rc
|
||||
|
||||
read -r -a units <<< "${BLITZ_GPS_RESTART_UNITS:-}"
|
||||
if (( ${#units[@]} == 0 )); then
|
||||
GPS_STACK_ACTIVE_STATE=0
|
||||
blitz_log "${STEP}" "gps-reconnect" "failure" "reason=${reason} devices=${devices} units=empty" 1
|
||||
return 1
|
||||
fi
|
||||
|
||||
set_last_action "gps-reconnect"
|
||||
blitz_log "${STEP}" "gps-reconnect" "start" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" 0
|
||||
if systemctl restart "${units[@]}"; then
|
||||
GPS_STACK_ACTIVE_STATE=1
|
||||
blitz_log "${STEP}" "gps-reconnect" "success" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" 0
|
||||
return 0
|
||||
fi
|
||||
|
||||
rc=$?
|
||||
GPS_STACK_ACTIVE_STATE=0
|
||||
blitz_log "${STEP}" "gps-reconnect" "failure" "reason=${reason} devices=${devices} units=${BLITZ_GPS_RESTART_UNITS}" "${rc}"
|
||||
return "${rc}"
|
||||
}
|
||||
|
||||
check_gps_health() {
|
||||
local now_sec="$1"
|
||||
local check_interval_sec="${BLITZ_GPS_CHECK_INTERVAL_SEC:-10}"
|
||||
local device_glob="${BLITZ_GPS_DEVICE_GLOB:-}"
|
||||
local previous_present="${GPS_DEVICE_PRESENT_PREV}"
|
||||
local recovery_reason=""
|
||||
local device_summary=""
|
||||
local -a devices=()
|
||||
|
||||
if ! gps_monitor_enabled; then
|
||||
GPS_DEVICE_PRESENT_STATE=1
|
||||
GPS_STACK_ACTIVE_STATE=1
|
||||
return 0
|
||||
fi
|
||||
|
||||
if (( check_interval_sec < 1 )); then
|
||||
check_interval_sec=1
|
||||
fi
|
||||
if (( GPS_LAST_CHECK_SEC != 0 && now_sec - GPS_LAST_CHECK_SEC < check_interval_sec )); then
|
||||
if (( GPS_DEVICE_PRESENT_STATE == 1 && GPS_STACK_ACTIVE_STATE == 1 )); then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
GPS_LAST_CHECK_SEC="${now_sec}"
|
||||
|
||||
mapfile -t devices < <(compgen -G "${device_glob}" || true)
|
||||
if (( ${#devices[@]} == 0 )); then
|
||||
GPS_DEVICE_PRESENT_STATE=0
|
||||
GPS_STACK_ACTIVE_STATE=0
|
||||
if (( previous_present != 0 )); then
|
||||
blitz_log "${STEP}" "gps-device-check" "failure" "state=missing glob=${device_glob}" 1
|
||||
fi
|
||||
GPS_DEVICE_PRESENT_PREV=0
|
||||
return 1
|
||||
fi
|
||||
|
||||
device_summary="$(IFS=,; printf '%s' "${devices[*]}")"
|
||||
GPS_DEVICE_PRESENT_STATE=1
|
||||
GPS_DEVICE_PRESENT_PREV=1
|
||||
|
||||
if (( previous_present == 0 )); then
|
||||
blitz_log "${STEP}" "gps-device-check" "success" "state=reappeared devices=${device_summary}" 0
|
||||
recovery_reason="device-reappeared"
|
||||
elif ! gps_stack_active; then
|
||||
recovery_reason="gpsd-inactive"
|
||||
fi
|
||||
|
||||
if [[ -n "${recovery_reason}" ]]; then
|
||||
if restart_gps_stack "${recovery_reason}" "${device_summary}"; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
GPS_STACK_ACTIVE_STATE=1
|
||||
return 0
|
||||
}
|
||||
|
||||
status_file_fresh() {
|
||||
local path="$1"
|
||||
local max_age_sec="$2"
|
||||
@@ -97,6 +208,8 @@ write_watchdog_status() {
|
||||
local camera_ok="$4"
|
||||
local ros_ok="$5"
|
||||
local bside_ok="$6"
|
||||
local gps_ok="$7"
|
||||
local gps_device_present="$8"
|
||||
local tmp_file
|
||||
|
||||
tmp_file="${WATCHDOG_STATUS_FILE}.tmp.$$"
|
||||
@@ -109,6 +222,8 @@ write_watchdog_status() {
|
||||
"camera_ok": ${camera_ok},
|
||||
"ros_ok": ${ros_ok},
|
||||
"bside_ok": ${bside_ok},
|
||||
"gps_ok": ${gps_ok},
|
||||
"gps_device_present": ${gps_device_present},
|
||||
"network_fail_count": ${NETWORK_FAIL_COUNT},
|
||||
"targeted_restart_count": $(targeted_restart_total),
|
||||
"full_restart_count": ${FULL_RESTART_WINDOW_COUNT},
|
||||
@@ -427,13 +542,22 @@ while true; do
|
||||
camera_ok=1
|
||||
ros_ok=1
|
||||
bside_ok=1
|
||||
gps_ok=1
|
||||
gps_device_present=1
|
||||
RECOVERY_ACTION_TAKEN=0
|
||||
now_sec="$(now_epoch_sec)"
|
||||
|
||||
if gps_monitor_enabled; then
|
||||
gps_device_present="${GPS_DEVICE_PRESENT_STATE}"
|
||||
if (( GPS_DEVICE_PRESENT_STATE == 0 || GPS_STACK_ACTIVE_STATE == 0 )); then
|
||||
gps_ok=0
|
||||
fi
|
||||
fi
|
||||
|
||||
if (( BACKOFF_UNTIL > now_sec )); then
|
||||
fault_reason="backoff"
|
||||
recovery_state="backoff"
|
||||
write_watchdog_status "${fault_reason}" "${recovery_state}" 0 0 0 0
|
||||
write_watchdog_status "${fault_reason}" "${recovery_state}" 0 0 0 0 "${gps_ok}" "${gps_device_present}"
|
||||
sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}"
|
||||
continue
|
||||
fi
|
||||
@@ -453,6 +577,22 @@ while true; do
|
||||
NETWORK_FAIL_COUNT=0
|
||||
fi
|
||||
|
||||
if check_gps_health "${now_sec}"; then
|
||||
gps_ok=1
|
||||
else
|
||||
gps_ok=0
|
||||
gps_device_present="${GPS_DEVICE_PRESENT_STATE}"
|
||||
if [[ "${fault_reason}" == "none" ]]; then
|
||||
if (( GPS_DEVICE_PRESENT_STATE == 0 )); then
|
||||
fault_reason="gps_device_missing"
|
||||
else
|
||||
fault_reason="gps_reconnect_failed"
|
||||
fi
|
||||
recovery_state="degraded"
|
||||
fi
|
||||
fi
|
||||
gps_device_present="${GPS_DEVICE_PRESENT_STATE}"
|
||||
|
||||
if [[ ! -e "${OMNI_CAMERA_DEVICE}" ]]; then
|
||||
camera_ok=0
|
||||
fault_reason="camera_missing"
|
||||
@@ -486,6 +626,6 @@ while true; do
|
||||
full_restart_stack "ros-unhealthy" || true
|
||||
fi
|
||||
|
||||
write_watchdog_status "${fault_reason}" "${recovery_state}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}"
|
||||
write_watchdog_status "${fault_reason}" "${recovery_state}" "${network_ok}" "${camera_ok}" "${ros_ok}" "${bside_ok}" "${gps_ok}" "${gps_device_present}"
|
||||
sleep "${BLITZ_WATCHDOG_INTERVAL_SEC}"
|
||||
done
|
||||
|
||||
Reference in New Issue
Block a user