fix:5G不通走其他网络,5G通了走5G
This commit is contained in:
@@ -207,6 +207,8 @@ sudo bash scripts/boot/blitz-fault-inject.sh network-down off
|
||||
- wait for unix socket
|
||||
- start `b_side`
|
||||
- If network checks fail repeatedly, watchdog stops `b_side`, runs `5g-dial.sh`, waits for route recovery, and then restores services.
|
||||
- While 5G is healthy, watchdog keeps every host route listed by `BLITZ_TIME_SERVER_IP` and `BLITZ_5G_ROUTE_TARGETS` pinned to the resolved 5G interface. When 5G becomes unhealthy, watchdog deletes those host routes so traffic can fall back to the remaining default network path. If that fallback path is still reachable, watchdog keeps `b_side_omnid` running instead of treating it as a full network outage.
|
||||
- Whenever watchdog changes or restores those host routes, it logs `route-path` lines for each target so you can see which interface Linux currently chooses for `81.70.156.140`, `106.55.173.235`, and any other configured 5G-pinned target.
|
||||
- If GPS monitoring is enabled, watchdog checks `BLITZ_GPS_DEVICE_GLOB` every `BLITZ_GPS_CHECK_INTERVAL_SEC` seconds. When the GPS serial device disappears and later reappears, watchdog restarts the units in `BLITZ_GPS_RESTART_UNITS` so `gpsd` can bind to the new device node again.
|
||||
- Camera disappearance is logged as degraded state. Reappearance triggers a `b_side` restart after the device is stable.
|
||||
|
||||
|
||||
@@ -22,6 +22,8 @@ LAST_ACTION_EPOCH_MS=0
|
||||
FULL_RESTART_WINDOW_START=0
|
||||
FULL_RESTART_WINDOW_COUNT=0
|
||||
NETWORK_LAST_INTERFACE=""
|
||||
NETWORK_ROUTE_INTERFACE_LAST_KNOWN=""
|
||||
NETWORK_PRIMARY_LAST_RETRY_SEC=0
|
||||
GPS_LAST_CHECK_SEC=0
|
||||
GPS_DEVICE_PRESENT_PREV=-1
|
||||
GPS_DEVICE_PRESENT_STATE=1
|
||||
@@ -329,7 +331,7 @@ full_restart_stack() {
|
||||
return "${rc}"
|
||||
fi
|
||||
|
||||
if bash "${SCRIPT_DIR}/wait-for-unix-socket.sh" --step "${STEP}" --timeout "${BLITZ_ROS_SOCKET_WAIT_SEC}"; then
|
||||
if bash "${BOOT_SCRIPT_DIR}/wait-for-unix-socket.sh" --step "${STEP}" --timeout "${BLITZ_ROS_SOCKET_WAIT_SEC}"; then
|
||||
:
|
||||
else
|
||||
rc=$?
|
||||
@@ -355,7 +357,11 @@ network_fault_injected() {
|
||||
|
||||
resolve_network_interface() {
|
||||
NETWORK_LAST_INTERFACE="$(blitz_resolve_5g_interface || true)"
|
||||
[[ -n "${NETWORK_LAST_INTERFACE}" ]]
|
||||
if [[ -n "${NETWORK_LAST_INTERFACE}" ]]; then
|
||||
NETWORK_ROUTE_INTERFACE_LAST_KNOWN="${NETWORK_LAST_INTERFACE}"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
network_route_targets() {
|
||||
@@ -371,6 +377,84 @@ network_route_targets() {
|
||||
done
|
||||
}
|
||||
|
||||
log_target_route_paths() {
|
||||
local action="$1"
|
||||
local target
|
||||
local route_output
|
||||
|
||||
while IFS= read -r target; do
|
||||
[[ -n "${target}" ]] || continue
|
||||
route_output="$(ip route get "${target}" 2>&1 | head -n 1 || true)"
|
||||
if [[ -z "${route_output}" ]]; then
|
||||
route_output="unresolved"
|
||||
fi
|
||||
blitz_log "${STEP}" "route-path" "info" "action=${action} target=${target} route=${route_output}" 0
|
||||
done < <(network_route_targets)
|
||||
}
|
||||
|
||||
route_output_uses_interface() {
|
||||
local route_output="$1"
|
||||
local interface_name="$2"
|
||||
|
||||
[[ -n "${interface_name}" ]] || return 1
|
||||
[[ "${route_output}" == *" dev ${interface_name} "* || "${route_output}" == *" dev ${interface_name}" ]]
|
||||
}
|
||||
|
||||
route_output_uses_gateway() {
|
||||
local route_output="$1"
|
||||
local gateway="$2"
|
||||
|
||||
[[ -n "${gateway}" ]] || return 1
|
||||
[[ "${route_output}" == *"via ${gateway}"* ]]
|
||||
}
|
||||
|
||||
route_is_desired_target_route() {
|
||||
local route_output="$1"
|
||||
local interface_name="$2"
|
||||
local gateway="$3"
|
||||
|
||||
route_output_uses_interface "${route_output}" "${interface_name}" \
|
||||
&& route_output_uses_gateway "${route_output}" "${gateway}"
|
||||
}
|
||||
|
||||
route_is_managed_5g_route() {
|
||||
local route_output="$1"
|
||||
local interface_name="${2:-}"
|
||||
local gateway="${3:-}"
|
||||
|
||||
if route_output_uses_interface "${route_output}" "${interface_name}"; then
|
||||
return 0
|
||||
fi
|
||||
if route_output_uses_gateway "${route_output}" "${gateway}"; then
|
||||
return 0
|
||||
fi
|
||||
if route_output_uses_gateway "${route_output}" "${BLITZ_5G_GATEWAY:-}"; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
resolve_route_cleanup_interface() {
|
||||
local interface_name=""
|
||||
local info_json="${BLITZ_5G_INFO_JSON:-}"
|
||||
|
||||
if [[ -n "${NETWORK_LAST_INTERFACE}" ]]; then
|
||||
printf '%s\n' "${NETWORK_LAST_INTERFACE}"
|
||||
return 0
|
||||
fi
|
||||
if [[ -n "${NETWORK_ROUTE_INTERFACE_LAST_KNOWN}" ]]; then
|
||||
printf '%s\n' "${NETWORK_ROUTE_INTERFACE_LAST_KNOWN}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
interface_name="$(blitz_read_5g_info_interface "${info_json}" || true)"
|
||||
if [[ -n "${interface_name}" ]]; then
|
||||
printf '%s\n' "${interface_name}"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
resolve_network_gateway() {
|
||||
local interface_name="$1"
|
||||
local default_route
|
||||
@@ -400,18 +484,96 @@ resolve_network_gateway() {
|
||||
return 1
|
||||
}
|
||||
|
||||
repair_network_routes() {
|
||||
sync_target_routes_to_5g() {
|
||||
local interface_name="$1"
|
||||
local gateway=""
|
||||
local gateway="${2:-}"
|
||||
local route_output=""
|
||||
local updated=0
|
||||
local target
|
||||
local route_output
|
||||
local rc
|
||||
|
||||
if [[ -z "${interface_name}" ]]; then
|
||||
return 1
|
||||
fi
|
||||
if ! ping -I "${interface_name}" -c 1 -W 2 "${BLITZ_TIME_SERVER_IP}" >/dev/null 2>&1; then
|
||||
blitz_log "${STEP}" "route-repair-probe" "failure" "interface=${interface_name} target=${BLITZ_TIME_SERVER_IP}" 1
|
||||
|
||||
if [[ -z "${gateway}" ]]; then
|
||||
gateway="$(resolve_network_gateway "${interface_name}" || true)"
|
||||
fi
|
||||
if [[ -z "${gateway}" ]]; then
|
||||
blitz_log "${STEP}" "route-sync-gateway" "failure" "interface=${interface_name}" 1
|
||||
return 1
|
||||
fi
|
||||
|
||||
while IFS= read -r target; do
|
||||
[[ -n "${target}" ]] || continue
|
||||
route_output="$(ip route show "${target}/32" 2>/dev/null | head -n 1 || true)"
|
||||
if [[ -n "${route_output}" ]] && route_is_desired_target_route "${route_output}" "${interface_name}" "${gateway}"; then
|
||||
continue
|
||||
fi
|
||||
if ip route replace "${target}/32" via "${gateway}" dev "${interface_name}"; then
|
||||
updated=1
|
||||
blitz_log "${STEP}" "route-sync-target" "success" "target=${target} interface=${interface_name} gateway=${gateway}" 0
|
||||
else
|
||||
rc=$?
|
||||
blitz_log "${STEP}" "route-sync-target" "failure" "target=${target} interface=${interface_name} gateway=${gateway}" "${rc}"
|
||||
return "${rc}"
|
||||
fi
|
||||
done < <(network_route_targets)
|
||||
|
||||
if (( updated == 1 )); then
|
||||
NETWORK_ROUTE_INTERFACE_LAST_KNOWN="${interface_name}"
|
||||
log_target_route_paths "sync-to-5g"
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
clear_target_routes_from_5g() {
|
||||
local interface_name="${1:-}"
|
||||
local gateway="${2:-}"
|
||||
local route_output=""
|
||||
local target
|
||||
local removed_any=0
|
||||
local rc
|
||||
|
||||
if [[ -z "${interface_name}" ]]; then
|
||||
interface_name="$(resolve_route_cleanup_interface || true)"
|
||||
fi
|
||||
if [[ -z "${gateway}" && -n "${interface_name}" ]]; then
|
||||
gateway="$(resolve_network_gateway "${interface_name}" || true)"
|
||||
fi
|
||||
if [[ -z "${gateway}" ]]; then
|
||||
gateway="${BLITZ_5G_GATEWAY:-}"
|
||||
fi
|
||||
|
||||
while IFS= read -r target; do
|
||||
[[ -n "${target}" ]] || continue
|
||||
route_output="$(ip route show "${target}/32" 2>/dev/null | head -n 1 || true)"
|
||||
if [[ -z "${route_output}" ]] || ! route_is_managed_5g_route "${route_output}" "${interface_name}" "${gateway}"; then
|
||||
continue
|
||||
fi
|
||||
if ip route del "${target}/32"; then
|
||||
removed_any=1
|
||||
blitz_log "${STEP}" "route-clear-target" "success" "target=${target} interface=${interface_name:-unknown} gateway=${gateway:-unknown}" 0
|
||||
else
|
||||
rc=$?
|
||||
blitz_log "${STEP}" "route-clear-target" "failure" "target=${target} interface=${interface_name:-unknown} gateway=${gateway:-unknown}" "${rc}"
|
||||
return "${rc}"
|
||||
fi
|
||||
done < <(network_route_targets)
|
||||
|
||||
if (( removed_any == 1 )); then
|
||||
blitz_log "${STEP}" "route-clear" "success" "interface=${interface_name:-unknown} gateway=${gateway:-unknown}" 0
|
||||
log_target_route_paths "clear-from-5g"
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
repair_network_routes() {
|
||||
local interface_name="$1"
|
||||
local gateway=""
|
||||
local route_output
|
||||
|
||||
if [[ -z "${interface_name}" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@@ -421,23 +583,24 @@ repair_network_routes() {
|
||||
return 1
|
||||
fi
|
||||
|
||||
while IFS= read -r target; do
|
||||
[[ -n "${target}" ]] || continue
|
||||
if ip route replace "${target}/32" via "${gateway}" dev "${interface_name}"; then
|
||||
blitz_log "${STEP}" "route-repair-target" "success" "target=${target} interface=${interface_name} gateway=${gateway}" 0
|
||||
else
|
||||
rc=$?
|
||||
blitz_log "${STEP}" "route-repair-target" "failure" "target=${target} interface=${interface_name} gateway=${gateway}" "${rc}"
|
||||
return "${rc}"
|
||||
fi
|
||||
done < <(network_route_targets)
|
||||
if ! sync_target_routes_to_5g "${interface_name}" "${gateway}"; then
|
||||
clear_target_routes_from_5g "${interface_name}" "${gateway}" || true
|
||||
return 1
|
||||
fi
|
||||
|
||||
route_output="$(blitz_route_ready "${BLITZ_TIME_SERVER_IP}" "${interface_name}" || true)"
|
||||
if [[ -z "${route_output}" ]]; then
|
||||
clear_target_routes_from_5g "${interface_name}" "${gateway}" || true
|
||||
blitz_log "${STEP}" "route-repair-postcheck" "failure" "interface=${interface_name} gateway=${gateway}" 1
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! ping -I "${interface_name}" -c 1 -W 2 "${BLITZ_TIME_SERVER_IP}" >/dev/null 2>&1; then
|
||||
clear_target_routes_from_5g "${interface_name}" "${gateway}" || true
|
||||
blitz_log "${STEP}" "route-repair-probe" "failure" "interface=${interface_name} target=${BLITZ_TIME_SERVER_IP}" 1
|
||||
return 1
|
||||
fi
|
||||
|
||||
blitz_log "${STEP}" "route-repair-postcheck" "success" "interface=${interface_name} gateway=${gateway} route=${route_output}" 0
|
||||
return 0
|
||||
}
|
||||
@@ -459,6 +622,21 @@ network_is_healthy() {
|
||||
ping -I "${NETWORK_LAST_INTERFACE}" -c 1 -W 2 "${BLITZ_TIME_SERVER_IP}" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
fallback_network_is_healthy() {
|
||||
local route_output
|
||||
|
||||
if [[ -z "${BLITZ_TIME_SERVER_IP:-}" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
route_output="$(blitz_route_ready "${BLITZ_TIME_SERVER_IP}" || true)"
|
||||
if [[ -z "${route_output}" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
ping -c 1 -W 2 "${BLITZ_TIME_SERVER_IP}" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
wait_for_network_recovery() {
|
||||
local timeout_sec="$1"
|
||||
local waited=0
|
||||
@@ -496,11 +674,11 @@ perform_network_recovery() {
|
||||
blitz_log "${STEP}" "network-recovery" "start" "fail_count=${NETWORK_FAIL_COUNT}" 0
|
||||
systemctl stop "${B_SIDE_SERVICE}" || true
|
||||
|
||||
if bash "${SCRIPT_DIR}/5g-dial.sh"; then
|
||||
if bash "${BOOT_SCRIPT_DIR}/5g-dial.sh"; then
|
||||
:
|
||||
else
|
||||
rc=$?
|
||||
blitz_log "${STEP}" "network-redial" "failure" "fail_count=${NETWORK_FAIL_COUNT} script=${SCRIPT_DIR}/5g-dial.sh" "${rc}"
|
||||
blitz_log "${STEP}" "network-redial" "failure" "fail_count=${NETWORK_FAIL_COUNT} script=${BOOT_SCRIPT_DIR}/5g-dial.sh" "${rc}"
|
||||
return "${rc}"
|
||||
fi
|
||||
|
||||
@@ -565,16 +743,36 @@ while true; do
|
||||
if (( NETWORK_COOLDOWN_UNTIL > now_sec )); then
|
||||
recovery_state="recovering"
|
||||
elif ! network_is_healthy; then
|
||||
network_ok=0
|
||||
NETWORK_FAIL_COUNT=$(( NETWORK_FAIL_COUNT + 1 ))
|
||||
fault_reason="network_or_robot_unreachable"
|
||||
recovery_state="recovering"
|
||||
blitz_log "${STEP}" "network-check" "failure" "count=${NETWORK_FAIL_COUNT} interface=${NETWORK_LAST_INTERFACE:-unresolved}" 1
|
||||
if (( NETWORK_FAIL_COUNT >= BLITZ_NETWORK_FAIL_THRESHOLD )); then
|
||||
perform_network_recovery || true
|
||||
clear_target_routes_from_5g || true
|
||||
if fallback_network_is_healthy; then
|
||||
NETWORK_FAIL_COUNT=0
|
||||
fault_reason="network_fallback_active"
|
||||
recovery_state="degraded"
|
||||
blitz_log "${STEP}" "network-check" "fallback" "interface=${NETWORK_LAST_INTERFACE:-unresolved} target=${BLITZ_TIME_SERVER_IP}" 0
|
||||
if (( NETWORK_PRIMARY_LAST_RETRY_SEC == 0 || now_sec - NETWORK_PRIMARY_LAST_RETRY_SEC >= 10 )); then
|
||||
NETWORK_PRIMARY_LAST_RETRY_SEC="${now_sec}"
|
||||
if resolve_network_interface && repair_network_routes "${NETWORK_LAST_INTERFACE}"; then
|
||||
NETWORK_PRIMARY_LAST_RETRY_SEC=0
|
||||
fault_reason="none"
|
||||
recovery_state="ok"
|
||||
blitz_log "${STEP}" "network-check" "primary-restored" "interface=${NETWORK_LAST_INTERFACE} target=${BLITZ_TIME_SERVER_IP}" 0
|
||||
log_target_route_paths "primary-restored"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
network_ok=0
|
||||
NETWORK_FAIL_COUNT=$(( NETWORK_FAIL_COUNT + 1 ))
|
||||
fault_reason="network_or_robot_unreachable"
|
||||
recovery_state="recovering"
|
||||
blitz_log "${STEP}" "network-check" "failure" "count=${NETWORK_FAIL_COUNT} interface=${NETWORK_LAST_INTERFACE:-unresolved}" 1
|
||||
if (( NETWORK_FAIL_COUNT >= BLITZ_NETWORK_FAIL_THRESHOLD )); then
|
||||
perform_network_recovery || true
|
||||
fi
|
||||
fi
|
||||
else
|
||||
NETWORK_PRIMARY_LAST_RETRY_SEC=0
|
||||
NETWORK_FAIL_COUNT=0
|
||||
sync_target_routes_to_5g "${NETWORK_LAST_INTERFACE}" || true
|
||||
fi
|
||||
|
||||
if check_gps_health "${now_sec}"; then
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
{
|
||||
"interface": "enxd41a57335f9d",
|
||||
"interface": "enxb8f72c9e179a",
|
||||
"ipv4": [
|
||||
"192.168.225.83/22"
|
||||
"192.168.225.160/22"
|
||||
],
|
||||
"ipv6": [
|
||||
"fe80::18c1:e89d:e033:9857/64"
|
||||
"fe80::52ae:a1c8:a9bb:a9a8/64"
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user