From 14ce3d4e1d2dd5d6ae2017bf02565e4d2bb457c3 Mon Sep 17 00:00:00 2001 From: Mock Date: Sat, 11 Apr 2026 16:06:51 +0800 Subject: [PATCH] =?UTF-8?q?feat:=205G=E8=87=AA=E5=8A=A8=E6=8B=A8=E5=8F=B7?= =?UTF-8?q?=E3=80=81=E8=BD=AF=E4=BB=B6=E6=97=B6=E9=92=9F=E5=90=8C=E6=AD=A5?= =?UTF-8?q?=E3=80=81=E6=9C=BA=E5=99=A8=E4=BA=BA=E7=AB=AF=E6=8E=A7=E5=88=B6?= =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E8=87=AA=E5=90=AF=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/boot/5g-dial.sh | 78 ++++ scripts/boot/README.md | 379 ++++++++++++++++++ scripts/boot/boot-gate.sh | 15 + scripts/boot/common.sh | 188 +++++++++ scripts/boot/install-systemd.sh | 53 +++ scripts/boot/robot-boot.env | 25 ++ scripts/boot/start-b-side-omnid-service.sh | 18 + scripts/boot/start-ros-receiver-service.sh | 17 + scripts/boot/systemd/blitz-5g-dial.service.in | 14 + .../systemd/blitz-b-side-omnid.service.in | 15 + .../boot/systemd/blitz-boot-gate.service.in | 14 + scripts/boot/systemd/blitz-robot.target.in | 11 + .../systemd/blitz-ros-receiver.service.in | 17 + .../boot/systemd/blitz-time-sync.service.in | 14 + scripts/boot/time-sync.sh | 113 ++++++ scripts/boot/wait-for-unix-socket.sh | 49 +++ scripts/dev/start-b-side-omnid.sh | 4 + scripts/dev/start-ros-receiver.sh | 8 + 18 files changed, 1032 insertions(+) create mode 100644 scripts/boot/5g-dial.sh create mode 100644 scripts/boot/README.md create mode 100644 scripts/boot/boot-gate.sh create mode 100644 scripts/boot/common.sh create mode 100644 scripts/boot/install-systemd.sh create mode 100644 scripts/boot/robot-boot.env create mode 100644 scripts/boot/start-b-side-omnid-service.sh create mode 100644 scripts/boot/start-ros-receiver-service.sh create mode 100644 scripts/boot/systemd/blitz-5g-dial.service.in create mode 100644 scripts/boot/systemd/blitz-b-side-omnid.service.in create mode 100644 scripts/boot/systemd/blitz-boot-gate.service.in create mode 100644 scripts/boot/systemd/blitz-robot.target.in create mode 100644 scripts/boot/systemd/blitz-ros-receiver.service.in create mode 100644 scripts/boot/systemd/blitz-time-sync.service.in create mode 100644 scripts/boot/time-sync.sh create mode 100644 scripts/boot/wait-for-unix-socket.sh diff --git a/scripts/boot/5g-dial.sh b/scripts/boot/5g-dial.sh new file mode 100644 index 0000000..1801794 --- /dev/null +++ b/scripts/boot/5g-dial.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/common.sh" + +STEP="5g-dial" + +wait_for_serial() { + local serial_port="$1" + local timeout_sec="$2" + local waited=0 + + while (( waited < timeout_sec )); do + if [[ -e "${serial_port}" ]]; then + blitz_log "${STEP}" "wait-serial" "success" "serial_port=${serial_port} waited_sec=${waited}" 0 + return 0 + fi + if (( waited == 0 || waited % 5 == 0 )); then + blitz_log "${STEP}" "wait-serial" "waiting" "serial_port=${serial_port} waited_sec=${waited}" 0 + fi + sleep 1 + waited=$(( waited + 1 )) + done + + blitz_log "${STEP}" "wait-serial" "failure" "serial_port=${serial_port} timeout_sec=${timeout_sec}" 1 + return 1 +} + +wait_for_route() { + local target_ip="$1" + local timeout_sec="$2" + local waited=0 + local route_output + + while (( waited < timeout_sec )); do + route_output="$(blitz_route_ready "${target_ip}" || true)" + if [[ -n "${route_output}" ]]; then + blitz_log "${STEP}" "route-check" "success" "target_ip=${target_ip} route=${route_output}" 0 + return 0 + fi + if (( waited == 0 || waited % 5 == 0 )); then + blitz_log "${STEP}" "route-check" "waiting" "target_ip=${target_ip} waited_sec=${waited}" 0 + fi + sleep 1 + waited=$(( waited + 1 )) + done + + blitz_log "${STEP}" "route-check" "failure" "target_ip=${target_ip} timeout_sec=${timeout_sec}" 1 + return 1 +} + +blitz_load_boot_env +blitz_require_root "${STEP}" +blitz_require_command ip "${STEP}" +blitz_require_command python3 "${STEP}" +blitz_require_file "${BLITZ_5G_DIAL_DIR}/rndis_dial.py" "${STEP}" + +if [[ -z "${BLITZ_TIME_SERVER_IP}" ]]; then + blitz_log "${STEP}" "precheck" "failure" "BLITZ_TIME_SERVER_IP is empty and no fallback could be derived" 1 + exit 1 +fi + +route_output="$(blitz_route_ready "${BLITZ_TIME_SERVER_IP}" || true)" +if [[ -n "${route_output}" ]]; then + blitz_log "${STEP}" "dial" "already_up" "target_ip=${BLITZ_TIME_SERVER_IP} route=${route_output}" 0 + exit 0 +fi + +wait_for_serial "${BLITZ_5G_SERIAL_PORT}" "${BLITZ_5G_SERIAL_WAIT_SEC}" + +pushd "${BLITZ_5G_DIAL_DIR}" >/dev/null +blitz_run "${STEP}" "dial" python3 rndis_dial.py --serial-port "${BLITZ_5G_SERIAL_PORT}" +popd >/dev/null + +wait_for_route "${BLITZ_TIME_SERVER_IP}" "${BLITZ_5G_ROUTE_WAIT_SEC}" +blitz_log "${STEP}" "complete" "success" "5G dial completed and route is ready" 0 diff --git a/scripts/boot/README.md b/scripts/boot/README.md new file mode 100644 index 0000000..0091586 --- /dev/null +++ b/scripts/boot/README.md @@ -0,0 +1,379 @@ +# 机器人 B 端开机自启说明 + +这个目录是给机器人端做开机自启用的。 + +你看到这里多了不少脚本和 `systemd` 单元,不是为了让你手工一条条执行,而是为了把开机流程拆开管理: + +1. 固定启动顺序 +2. 某一步失败时可单独重试 +3. 所有动作统一写到一个本地日志文件 +4. 后面如果要把“固定延时 30 秒”换成“等待机器人原有自检完成”,只改最前面的闸门即可 + +所以平时真正需要人工执行的,通常只有这两步: + +```bash +sudo bash scripts/boot/install-systemd.sh +sudo systemctl start blitz-robot.target +``` + +以后机器人重启时,就不需要你再手工执行这些脚本了。 + +## 启动顺序 + +当前开机链路如下: + +1. `blitz-boot-gate.service` +2. `blitz-5g-dial.service` +3. `blitz-time-sync.service` +4. `blitz-ros-receiver.service` +5. `blitz-b-side-omnid.service` + +对应业务顺序就是: + +1. 先固定等待 30 秒,给机器人原有自检/自启程序让路 +2. 运行 5G 自动拨号 +3. 运行时钟同步 +4. 启动 `start-ros-receiver.sh` +5. 启动 `start-b-side-omnid.sh` + +## 日志文件 + +所有关键操作都会统一写到这个本地文件: + +```text +/var/log/blitz-robot/startup.log +``` + +每一行日志格式如下: + +```text +timestamp | step | action | result | details | exit_code +``` + +日志里会记录: + +- 做了什么 +- 实际执行了什么命令 +- 前置检查是否通过 +- 成功还是失败 +- 失败原因 +- 退出码 +- 是否发生了重试 + +## 这些文件分别是干什么的 + +- `robot-boot.env`:开机自启默认配置 +- `robot-boot.env.local`:本机覆盖配置,建议把你自己的配置写这里 +- `common.sh`:公共环境加载和统一日志函数 +- `boot-gate.sh`:启动闸门,当前逻辑是固定等待 30 秒 +- `5g-dial.sh`:等待 5G 串口出现,执行 `rndis_dial.py`,并检查路由是否真的起来 +- `time-sync.sh`:把 `chrony` 指向白名单服务器 IP 和端口,并执行一次同步 +- `start-ros-receiver-service.sh`:开机版 ROS receiver 启动包装 +- `wait-for-unix-socket.sh`:等待 ROS receiver 建好本地 unix socket +- `start-b-side-omnid-service.sh`:开机版 `b_side_omnid` 启动包装 +- `install-systemd.sh`:把 `systemd` 单元安装到 `/etc/systemd/system` +- `systemd/*.service.in`、`systemd/*.target.in`:`systemd` 模板文件 + +## 前置条件 + +你前面说过,除了时钟同步以外,其他程序环境都应该已经配好了。按这个前提,这里只强调必须确认的前置条件。 + +### 1. 机器人侧必须已有的条件 + +默认认为下面这些已经具备: + +- 系统是 Ubuntu,且使用 `systemd` +- `OmniSocketGo` 仓库已经放在机器人上 +- `scripts/dev/start-ros-receiver.sh` 原本就能正常启动 +- `scripts/dev/start-b-side-omnid.sh` 原本就能正常启动 +- `bin/b_side_omnid` 已经提前编译好 +- 5G 拨号脚本存在:`/home/nvidia/5g-test/5G/rndis_dial.py` +- 5G 串口设备是:`/dev/ttyUSB7` + +注意: + +- 开机模式下不会自动编译 `b_side_omnid` +- 如果 `bin/b_side_omnid` 不存在,服务会直接报错并写日志 + +### 2. 时钟同步需要的前置安装 + +时钟同步这一步依赖 `chrony`。 + +如果机器人侧没有安装,请先安装: + +```bash +sudo apt update +sudo apt install -y chrony +``` + +安装后建议确认: + +```bash +systemctl status chrony +chronyc tracking +``` + +### 3. 云服务器侧需要的前置条件 + +因为你的 5G 是白名单网络,所以时钟同步不能依赖公网域名或默认 NTP 池,必须只用你的白名单云服务器 IP。 + +云服务器侧需要满足: + +- 服务器上运行 `chronyd` +- 安全组 / 防火墙放通你实际使用的 UDP 端口 +- 机器人能访问这台服务器的 IP + +如果云服务器还没有安装 `chrony`,可以参考: + +```bash +sudo apt update +sudo apt install -y chrony +sudo systemctl enable chrony +sudo systemctl restart chrony +``` + +如果你不能使用标准的 `123/udp`,完全可以改成你自己的端口,例如 `10910/udp`。 + +例如云服务器 /etc/chrony/chrony.conf 里改成监听 10910:: + +```conf +port 10910 +allow 0/0 +``` + +然后重启: + +```bash +sudo systemctl restart chrony +``` + +机器人端则在 `robot-boot.env.local` 里配置: + +```bash +BLITZ_TIME_SERVER_IP="你的云服务器IP" +BLITZ_TIME_SERVER_PORT="10910" +``` + +这样 `time-sync.sh` 会自动生成: + +```conf +server 你的云服务器IP port 10910 iburst +``` + +注意:这里必须是你自己可控的 `chronyd` 服务端。公网标准 NTP 服务通常只监听 `123/udp`,不能要求它们改到 `10910`。 + +## 需要改哪些配置 + +不要直接改 `robot-boot.env`,更推荐新建: + +```text +scripts/boot/robot-boot.env.local +``` + +常见要改的是这些: + +```bash +BLITZ_BOOT_DELAY_SEC="30" +BLITZ_LOG_FILE="/var/log/blitz-robot/startup.log" + +BLITZ_5G_DIAL_DIR="/home/nvidia/5g-test/5G" +BLITZ_5G_SERIAL_PORT="/dev/ttyUSB7" + +BLITZ_TIME_SERVER_IP="你的白名单云服务器IP" +BLITZ_TIME_SERVER_PORT="10910" + +BLITZ_ROS_USER="nvidia" +``` + +如果 `BLITZ_TIME_SERVER_IP` 留空,脚本会自动回退到 `ROBOT_SIDE_OMNISOCKET_SERVER_ADDR` 的 IP 部分。 + +## 如何安装和使用 + +下面假设你当前目录就在 `OmniSocketGo` 仓库根目录。 + +### 第一步:准备本机配置 + +建议先创建: + +```bash +cp scripts/boot/robot-boot.env scripts/boot/robot-boot.env.local +``` + +然后编辑: + +```bash +vim scripts/boot/robot-boot.env.local +``` + +至少确认这几个值是对的: + +- `BLITZ_5G_DIAL_DIR` +- `BLITZ_5G_SERIAL_PORT` +- `BLITZ_TIME_SERVER_IP` +- `BLITZ_TIME_SERVER_PORT` +- `BLITZ_ROS_USER` + +### 第二步:安装 systemd 单元 + +执行: + +```bash +sudo bash scripts/boot/install-systemd.sh +``` + +这个安装脚本会做这些事情: + +1. 创建日志目录和日志文件 +2. 渲染 `systemd` 模板 +3. 把 unit 文件复制到 `/etc/systemd/system` +4. 执行 `systemctl daemon-reload` +5. 执行 `systemctl enable blitz-robot.target` + +### 第三步:立刻启动一次 + +执行: + +```bash +sudo systemctl start blitz-robot.target +``` + +### 第四步:以后重启自动生效 + +因为安装脚本已经做了 `enable`,所以后续机器人重启时会自动拉起,不需要你再手工执行。 + +如果想手工确认,也可以执行: + +```bash +sudo systemctl enable blitz-robot.target +``` + +## 如何查看是否正常 + +### 看总日志文件 + +最直接: + +```bash +tail -f /var/log/blitz-robot/startup.log +``` + +### 看各个服务状态 + +```bash +systemctl status blitz-robot.target +systemctl status blitz-boot-gate.service +systemctl status blitz-5g-dial.service +systemctl status blitz-time-sync.service +systemctl status blitz-ros-receiver.service +systemctl status blitz-b-side-omnid.service +``` + +### 看 journal + +```bash +journalctl -u blitz-robot.target -u blitz-boot-gate.service -u blitz-5g-dial.service \ + -u blitz-time-sync.service -u blitz-ros-receiver.service -u blitz-b-side-omnid.service -f +``` + +## 当前时钟同步会做什么 + +`time-sync.sh` 当前逻辑是: + +1. 读取 `BLITZ_TIME_SERVER_IP` +2. 读取 `BLITZ_TIME_SERVER_PORT` +3. 修改 `/etc/chrony/chrony.conf` +4. 注释掉原有的 `pool` 和 `server` 项 +5. 保留一个备份文件:`/etc/chrony/chrony.conf.blitz-bak` +6. 写入: + +```text +/etc/chrony/sources.d/blitz-robot.sources +``` + +7. 生成类似下面这一行: + +```conf +server 你的云服务器IP port 10910 iburst +``` + +8. 重启 `chrony` +9. 执行 `chronyc burst` +10. 执行 `chronyc waitsync` + +注意: + +- 如果同步超时,会记日志为 `soft_fail` +- 但不会阻塞后面的 ROS 和 `b_side_omnid` 启动 + +## 常见问题 + +### 1. 为什么会突然多出这么多脚本? + +因为把开机流程拆成了多个稳定的小步骤: + +- 更容易排查哪一步失败 +- 更容易让 `systemd` 自动重启 +- 更容易记录完整日志 +- 后续更容易替换“30 秒延时”为真正的机器人 ready 条件 + +你平时不需要手工逐个执行这些脚本。 + +### 2. 我是不是要手工跑 `5g-dial.sh`、`time-sync.sh`、`start-ros-receiver-service.sh`? + +正常情况下不用。 + +你只需要: + +```bash +sudo bash scripts/boot/install-systemd.sh +sudo systemctl start blitz-robot.target +``` + +### 3. 如果时钟同步失败怎么办? + +先看: + +```bash +tail -f /var/log/blitz-robot/startup.log +systemctl status blitz-time-sync.service +chronyc sources -v +chronyc tracking +``` + +优先检查: + +- `BLITZ_TIME_SERVER_IP` 是否填对 +- `BLITZ_TIME_SERVER_PORT` 是否填对 +- 云服务器是否真的跑了 `chronyd` +- 云服务器防火墙 / 安全组是否放通你配置的 UDP 端口,例如 `10910` +- 5G 白名单是否确实允许访问这个服务器 IP + +### 4. 如果 ROS receiver 没起来怎么办? + +先看: + +```bash +systemctl status blitz-ros-receiver.service +tail -f /var/log/blitz-robot/startup.log +``` + +再检查: + +- `/opt/ros/${ROS_DISTRO}/setup.bash` 是否存在 +- `${ROS_CONTROL_PY_DIR}/install/setup.bash` 是否存在 +- `ROBOT_RECEIVER_LOCAL_SOCKET_PATH` 对应的 socket 是否出现 + +### 5. 如果 b_side_omnid 没起来怎么办? + +先看: + +```bash +systemctl status blitz-b-side-omnid.service +tail -f /var/log/blitz-robot/startup.log +``` + +再检查: + +- `bin/b_side_omnid` 是否已经提前编译好 +- 摄像头设备是否存在 +- `robot-remote.env` / `robot-boot.env.local` 里的地址配置是否正确 diff --git a/scripts/boot/boot-gate.sh b/scripts/boot/boot-gate.sh new file mode 100644 index 0000000..ef22e0f --- /dev/null +++ b/scripts/boot/boot-gate.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/common.sh" + +STEP="boot-gate" + +blitz_load_boot_env + +blitz_log "${STEP}" "start" "start" "delay_sec=${BLITZ_BOOT_DELAY_SEC}" 0 +blitz_log "${STEP}" "delay" "start" "sleep ${BLITZ_BOOT_DELAY_SEC}s before starting Blitz services" 0 +sleep "${BLITZ_BOOT_DELAY_SEC}" +blitz_log "${STEP}" "delay" "success" "boot gate released after ${BLITZ_BOOT_DELAY_SEC}s" 0 diff --git a/scripts/boot/common.sh b/scripts/boot/common.sh new file mode 100644 index 0000000..5974a96 --- /dev/null +++ b/scripts/boot/common.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash +set -euo pipefail + +BOOT_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEV_SCRIPT_DIR="$(cd "${BOOT_SCRIPT_DIR}/../dev" && pwd)" + +source_with_nounset_off() { + set +u + # shellcheck disable=SC1090 + source "$1" + set -u +} + +blitz_host_from_addr() { + local value="${1:-}" + + if [[ -z "${value}" ]]; then + return 1 + fi + if [[ "${value}" == \[*\]:* ]]; then + value="${value#\[}" + printf '%s\n' "${value%%]:*}" + return 0 + fi + printf '%s\n' "${value%%:*}" +} + +blitz_load_boot_env() { + local env_file + local default_time_server + + if [[ "${BLITZ_BOOT_ENV_LOADED:-0}" == "1" ]]; then + return 0 + fi + + # shellcheck disable=SC1091 + source "${DEV_SCRIPT_DIR}/load-env.sh" + + for env_file in \ + "${BOOT_SCRIPT_DIR}/robot-boot.env" \ + "${BOOT_SCRIPT_DIR}/robot-boot.env.local" + do + if [[ -f "${env_file}" ]]; then + set -a + # shellcheck disable=SC1090 + source "${env_file}" + set +a + fi + done + + default_time_server="$(blitz_host_from_addr "${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}" || true)" + + export BLITZ_BOOT_DELAY_SEC="${BLITZ_BOOT_DELAY_SEC:-30}" + export BLITZ_LOG_FILE="${BLITZ_LOG_FILE:-/var/log/blitz-robot/startup.log}" + export BLITZ_5G_DIAL_DIR="${BLITZ_5G_DIAL_DIR:-/home/nvidia/5g-test/5G}" + export BLITZ_5G_SERIAL_PORT="${BLITZ_5G_SERIAL_PORT:-/dev/ttyUSB7}" + export BLITZ_5G_SERIAL_WAIT_SEC="${BLITZ_5G_SERIAL_WAIT_SEC:-60}" + export BLITZ_5G_ROUTE_WAIT_SEC="${BLITZ_5G_ROUTE_WAIT_SEC:-30}" + export BLITZ_TIME_SERVER_IP="${BLITZ_TIME_SERVER_IP:-${default_time_server}}" + export BLITZ_TIME_SERVER_PORT="${BLITZ_TIME_SERVER_PORT:-123}" + export BLITZ_TIME_SYNC_WAIT_SEC="${BLITZ_TIME_SYNC_WAIT_SEC:-60}" + export BLITZ_TIME_SYNC_MAX_OFFSET_SEC="${BLITZ_TIME_SYNC_MAX_OFFSET_SEC:-0.002}" + export BLITZ_TIME_SYNC_INTERVAL_SEC="${BLITZ_TIME_SYNC_INTERVAL_SEC:-1}" + export BLITZ_ROS_USER="${BLITZ_ROS_USER:-nvidia}" + export BLITZ_ROS_SOCKET_WAIT_SEC="${BLITZ_ROS_SOCKET_WAIT_SEC:-20}" + export BLITZ_BOOT_ENV_LOADED="1" +} + +blitz_timestamp() { + date '+%Y-%m-%d %H:%M:%S%z' +} + +blitz_sanitize_detail() { + local detail="${1:-}" + + detail="${detail//$'\n'/ ; }" + detail="${detail//$'\r'/ }" + printf '%s' "${detail}" +} + +blitz_log() { + local step="${1:-unknown-step}" + local action="${2:-unknown-action}" + local result="${3:-info}" + local details="${4:-}" + local exit_code="${5:-0}" + + printf '%s | %s | %s | %s | %s | %s\n' \ + "$(blitz_timestamp)" \ + "${step}" \ + "${action}" \ + "${result}" \ + "$(blitz_sanitize_detail "${details}")" \ + "${exit_code}" +} + +blitz_join_cmd() { + local cmd=() + local arg + + for arg in "$@"; do + cmd+=("$(printf '%q' "${arg}")") + done + printf '%s' "${cmd[*]}" +} + +blitz_require_command() { + local command_name="$1" + local step="${2:-precheck}" + + if command -v "${command_name}" >/dev/null 2>&1; then + blitz_log "${step}" "require-command" "success" "command=${command_name}" 0 + return 0 + fi + + blitz_log "${step}" "require-command" "failure" "missing command: ${command_name}" 127 + return 127 +} + +blitz_require_file() { + local path="$1" + local step="${2:-precheck}" + + if [[ -f "${path}" ]]; then + blitz_log "${step}" "require-file" "success" "path=${path}" 0 + return 0 + fi + + blitz_log "${step}" "require-file" "failure" "missing file: ${path}" 1 + return 1 +} + +blitz_require_executable() { + local path="$1" + local step="${2:-precheck}" + + if [[ -x "${path}" ]]; then + blitz_log "${step}" "require-executable" "success" "path=${path}" 0 + return 0 + fi + + blitz_log "${step}" "require-executable" "failure" "missing executable: ${path}" 1 + return 1 +} + +blitz_require_root() { + local step="${1:-precheck}" + + if [[ "${EUID}" -eq 0 ]]; then + blitz_log "${step}" "require-root" "success" "uid=${EUID}" 0 + return 0 + fi + + blitz_log "${step}" "require-root" "failure" "root privileges are required" 1 + return 1 +} + +blitz_run() { + local step="$1" + local action="$2" + shift 2 + + blitz_log "${step}" "${action}" "start" "$(blitz_join_cmd "$@")" 0 + if "$@"; then + blitz_log "${step}" "${action}" "success" "$(blitz_join_cmd "$@")" 0 + return 0 + fi + + local rc=$? + blitz_log "${step}" "${action}" "failure" "$(blitz_join_cmd "$@")" "${rc}" + return "${rc}" +} + +blitz_route_ready() { + local target_ip="$1" + local route_output + + route_output="$(ip route get "${target_ip}" 2>&1 || true)" + if [[ -z "${route_output}" ]]; then + return 1 + fi + if [[ "${route_output}" == *"unreachable"* || "${route_output}" == *"prohibit"* ]]; then + return 1 + fi + + printf '%s\n' "${route_output}" + return 0 +} diff --git a/scripts/boot/install-systemd.sh b/scripts/boot/install-systemd.sh new file mode 100644 index 0000000..91744cf --- /dev/null +++ b/scripts/boot/install-systemd.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/common.sh" + +SYSTEMD_TEMPLATE_DIR="${SCRIPT_DIR}/systemd" +SYSTEMD_DEST_DIR="/etc/systemd/system" + +render_template() { + local template_path="$1" + local output_path="$2" + + sed \ + -e "s|@OMNISOCKETGO_ROOT@|${OMNISOCKETGO_ROOT}|g" \ + -e "s|@BLITZ_LOG_FILE@|${BLITZ_LOG_FILE}|g" \ + -e "s|@BLITZ_ROS_USER@|${BLITZ_ROS_USER}|g" \ + "${template_path}" > "${output_path}" +} + +install_unit() { + local template_name="$1" + local temp_output + + temp_output="$(mktemp)" + render_template "${SYSTEMD_TEMPLATE_DIR}/${template_name}" "${temp_output}" + install -m 0644 "${temp_output}" "${SYSTEMD_DEST_DIR}/${template_name%.in}" + rm -f "${temp_output}" + blitz_log "install" "install-unit" "success" "unit=${SYSTEMD_DEST_DIR}/${template_name%.in}" 0 +} + +blitz_load_boot_env +blitz_require_root "install" +blitz_require_command install "install" +blitz_require_command systemctl "install" + +mkdir -p "${SYSTEMD_DEST_DIR}" +install -d -m 0755 "$(dirname "${BLITZ_LOG_FILE}")" +touch "${BLITZ_LOG_FILE}" +chmod 0644 "${BLITZ_LOG_FILE}" +blitz_log "install" "prepare-log-file" "success" "log_file=${BLITZ_LOG_FILE}" 0 + +install_unit "blitz-boot-gate.service.in" +install_unit "blitz-5g-dial.service.in" +install_unit "blitz-time-sync.service.in" +install_unit "blitz-ros-receiver.service.in" +install_unit "blitz-b-side-omnid.service.in" +install_unit "blitz-robot.target.in" + +blitz_run "install" "daemon-reload" systemctl daemon-reload +blitz_run "install" "enable-target" systemctl enable blitz-robot.target +blitz_log "install" "complete" "success" "run systemctl start blitz-robot.target to launch immediately" 0 diff --git a/scripts/boot/robot-boot.env b/scripts/boot/robot-boot.env new file mode 100644 index 0000000..3d3b547 --- /dev/null +++ b/scripts/boot/robot-boot.env @@ -0,0 +1,25 @@ +# Boot-time settings for the robot-side autostart chain. +# Override machine-specific values in robot-boot.env.local. + +BLITZ_BOOT_DELAY_SEC="30" +BLITZ_LOG_FILE="/var/log/blitz-robot/startup.log" + +BLITZ_5G_DIAL_DIR="/home/nvidia/5g-test/5G" +BLITZ_5G_SERIAL_PORT="/dev/ttyUSB7" +# 最多等 60 秒让 5G 模块对应的串口设备出现,比如 /dev/ttyUSB7 +BLITZ_5G_SERIAL_WAIT_SEC="60" +# 拨号命令执行完以后,最多再等 30 秒,检查到你目标服务器 IP 的路由真的起来 +BLITZ_5G_ROUTE_WAIT_SEC="30" + +# Leave empty to fall back to the host part of ROBOT_SIDE_OMNISOCKET_SERVER_ADDR. +BLITZ_TIME_SERVER_IP="81.70.156.140" +BLITZ_TIME_SERVER_PORT="10910" +BLITZ_TIME_SYNC_WAIT_SEC="30" +BLITZ_TIME_SYNC_MAX_OFFSET_SEC="0.002" +BLITZ_TIME_SYNC_INTERVAL_SEC="1" + +BLITZ_ROS_USER="nvidia" +BLITZ_ROS_SOCKET_WAIT_SEC="20" + +# Boot units run b_side_omnid as root directly, so nested sudo must stay off. +B_SIDE_OMNID_USE_SUDO="0" diff --git a/scripts/boot/start-b-side-omnid-service.sh b/scripts/boot/start-b-side-omnid-service.sh new file mode 100644 index 0000000..d61d553 --- /dev/null +++ b/scripts/boot/start-b-side-omnid-service.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/common.sh" + +STEP="b-side-omnid" + +blitz_load_boot_env + +blitz_require_executable "${OMNISOCKETGO_ROOT}/bin/b_side_omnid" "${STEP}" + +export OMNI_BOOT_MODE="1" +export B_SIDE_OMNID_USE_SUDO="0" + +blitz_log "${STEP}" "start" "start" "exec bash ${OMNISOCKETGO_ROOT}/scripts/dev/start-b-side-omnid.sh" 0 +exec bash "${OMNISOCKETGO_ROOT}/scripts/dev/start-b-side-omnid.sh" diff --git a/scripts/boot/start-ros-receiver-service.sh b/scripts/boot/start-ros-receiver-service.sh new file mode 100644 index 0000000..2541e66 --- /dev/null +++ b/scripts/boot/start-ros-receiver-service.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/common.sh" + +STEP="ros-receiver" + +blitz_load_boot_env + +blitz_require_file "/opt/ros/${ROS_DISTRO}/setup.bash" "${STEP}" +blitz_require_file "${ROS_CONTROL_PY_DIR}/install/setup.bash" "${STEP}" + +export OMNI_BOOT_MODE="1" +blitz_log "${STEP}" "start" "start" "exec bash ${OMNISOCKETGO_ROOT}/scripts/dev/start-ros-receiver.sh" 0 +exec bash "${OMNISOCKETGO_ROOT}/scripts/dev/start-ros-receiver.sh" diff --git a/scripts/boot/systemd/blitz-5g-dial.service.in b/scripts/boot/systemd/blitz-5g-dial.service.in new file mode 100644 index 0000000..a9ce2f0 --- /dev/null +++ b/scripts/boot/systemd/blitz-5g-dial.service.in @@ -0,0 +1,14 @@ +[Unit] +Description=Blitz robot 5G dial +After=blitz-boot-gate.service +Requires=blitz-boot-gate.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/5g-dial.sh +StandardOutput=append:@BLITZ_LOG_FILE@ +StandardError=append:@BLITZ_LOG_FILE@ + +[Install] +WantedBy=blitz-robot.target diff --git a/scripts/boot/systemd/blitz-b-side-omnid.service.in b/scripts/boot/systemd/blitz-b-side-omnid.service.in new file mode 100644 index 0000000..93d3502 --- /dev/null +++ b/scripts/boot/systemd/blitz-b-side-omnid.service.in @@ -0,0 +1,15 @@ +[Unit] +Description=Blitz robot b-side omnid +After=blitz-time-sync.service blitz-ros-receiver.service +Requires=blitz-time-sync.service blitz-ros-receiver.service + +[Service] +Type=simple +ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-b-side-omnid-service.sh +Restart=always +RestartSec=2 +StandardOutput=append:@BLITZ_LOG_FILE@ +StandardError=append:@BLITZ_LOG_FILE@ + +[Install] +WantedBy=blitz-robot.target diff --git a/scripts/boot/systemd/blitz-boot-gate.service.in b/scripts/boot/systemd/blitz-boot-gate.service.in new file mode 100644 index 0000000..6c1b1ab --- /dev/null +++ b/scripts/boot/systemd/blitz-boot-gate.service.in @@ -0,0 +1,14 @@ +[Unit] +Description=Blitz robot boot gate +After=multi-user.target network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/boot-gate.sh +StandardOutput=append:@BLITZ_LOG_FILE@ +StandardError=append:@BLITZ_LOG_FILE@ + +[Install] +WantedBy=blitz-robot.target diff --git a/scripts/boot/systemd/blitz-robot.target.in b/scripts/boot/systemd/blitz-robot.target.in new file mode 100644 index 0000000..299220b --- /dev/null +++ b/scripts/boot/systemd/blitz-robot.target.in @@ -0,0 +1,11 @@ +[Unit] +Description=Blitz robot boot chain +Wants=blitz-boot-gate.service +Wants=blitz-5g-dial.service +Wants=blitz-time-sync.service +Wants=blitz-ros-receiver.service +Wants=blitz-b-side-omnid.service +After=multi-user.target + +[Install] +WantedBy=multi-user.target diff --git a/scripts/boot/systemd/blitz-ros-receiver.service.in b/scripts/boot/systemd/blitz-ros-receiver.service.in new file mode 100644 index 0000000..317121b --- /dev/null +++ b/scripts/boot/systemd/blitz-ros-receiver.service.in @@ -0,0 +1,17 @@ +[Unit] +Description=Blitz robot ROS receiver +After=blitz-time-sync.service +Requires=blitz-time-sync.service + +[Service] +Type=simple +User=@BLITZ_ROS_USER@ +ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/start-ros-receiver-service.sh +ExecStartPost=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/wait-for-unix-socket.sh --step ros-receiver +Restart=always +RestartSec=2 +StandardOutput=append:@BLITZ_LOG_FILE@ +StandardError=append:@BLITZ_LOG_FILE@ + +[Install] +WantedBy=blitz-robot.target diff --git a/scripts/boot/systemd/blitz-time-sync.service.in b/scripts/boot/systemd/blitz-time-sync.service.in new file mode 100644 index 0000000..3f87363 --- /dev/null +++ b/scripts/boot/systemd/blitz-time-sync.service.in @@ -0,0 +1,14 @@ +[Unit] +Description=Blitz robot private chrony sync +After=blitz-5g-dial.service +Requires=blitz-5g-dial.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/bin/bash @OMNISOCKETGO_ROOT@/scripts/boot/time-sync.sh +StandardOutput=append:@BLITZ_LOG_FILE@ +StandardError=append:@BLITZ_LOG_FILE@ + +[Install] +WantedBy=blitz-robot.target diff --git a/scripts/boot/time-sync.sh b/scripts/boot/time-sync.sh new file mode 100644 index 0000000..396d82c --- /dev/null +++ b/scripts/boot/time-sync.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/common.sh" + +STEP="time-sync" +CHRONY_SOURCES_DIR="/etc/chrony/sources.d" +CHRONY_SOURCE_FILE="${CHRONY_SOURCES_DIR}/blitz-robot.sources" +CHRONY_MAIN_CONF="/etc/chrony/chrony.conf" +CHRONY_MAIN_CONF_BAK="/etc/chrony/chrony.conf.blitz-bak" + +chrony_unit_name() { + if systemctl list-unit-files chrony.service --no-legend 2>/dev/null | grep -q '^chrony\.service'; then + printf '%s\n' "chrony.service" + return 0 + fi + if systemctl list-unit-files chronyd.service --no-legend 2>/dev/null | grep -q '^chronyd\.service'; then + printf '%s\n' "chronyd.service" + return 0 + fi + printf '%s\n' "chrony.service" +} + +ensure_chrony_main_conf() { + local temp_file + + blitz_require_file "${CHRONY_MAIN_CONF}" "${STEP}" + mkdir -p "${CHRONY_SOURCES_DIR}" + + if [[ ! -f "${CHRONY_MAIN_CONF_BAK}" ]]; then + cp -a "${CHRONY_MAIN_CONF}" "${CHRONY_MAIN_CONF_BAK}" + blitz_log "${STEP}" "backup-config" "success" "backup=${CHRONY_MAIN_CONF_BAK}" 0 + fi + + temp_file="$(mktemp)" + awk ' + /^[[:space:]]*#/ { print; next } + /^[[:space:]]*(pool|server)[[:space:]]+/ { + print "# blitz-managed-disabled " $0 + next + } + { print } + ' "${CHRONY_MAIN_CONF}" > "${temp_file}" + + if ! grep -Eq '^[[:space:]]*sourcedir[[:space:]]+/etc/chrony/sources\.d([[:space:]]|$)' "${temp_file}"; then + printf '\n# blitz-managed\nsourcedir /etc/chrony/sources.d\n' >> "${temp_file}" + fi + + if ! cmp -s "${temp_file}" "${CHRONY_MAIN_CONF}"; then + cp "${temp_file}" "${CHRONY_MAIN_CONF}" + blitz_log "${STEP}" "rewrite-main-config" "success" "commented non-Blitz pool/server entries in ${CHRONY_MAIN_CONF}" 0 + else + blitz_log "${STEP}" "rewrite-main-config" "success" "main config already matches Blitz expectations" 0 + fi + + rm -f "${temp_file}" +} + +write_chrony_source_file() { + local temp_file + + temp_file="$(mktemp)" + cat < "${temp_file}" +# blitz-managed +server ${BLITZ_TIME_SERVER_IP} port ${BLITZ_TIME_SERVER_PORT} iburst +EOF + + if [[ ! -f "${CHRONY_SOURCE_FILE}" ]] || ! cmp -s "${temp_file}" "${CHRONY_SOURCE_FILE}"; then + cp "${temp_file}" "${CHRONY_SOURCE_FILE}" + blitz_log "${STEP}" "write-source" "success" "source_file=${CHRONY_SOURCE_FILE} server=${BLITZ_TIME_SERVER_IP} port=${BLITZ_TIME_SERVER_PORT}" 0 + else + blitz_log "${STEP}" "write-source" "success" "source_file already matches ${BLITZ_TIME_SERVER_IP}:${BLITZ_TIME_SERVER_PORT}" 0 + fi + + rm -f "${temp_file}" +} + +blitz_load_boot_env +blitz_require_root "${STEP}" +blitz_require_command systemctl "${STEP}" +blitz_require_command chronyc "${STEP}" + +if [[ -z "${BLITZ_TIME_SERVER_IP}" ]]; then + blitz_log "${STEP}" "precheck" "failure" "BLITZ_TIME_SERVER_IP is empty and no fallback could be derived" 1 + exit 1 +fi +if ! [[ "${BLITZ_TIME_SERVER_PORT}" =~ ^[0-9]+$ ]] || (( BLITZ_TIME_SERVER_PORT < 1 || BLITZ_TIME_SERVER_PORT > 65535 )); then + blitz_log "${STEP}" "precheck" "failure" "BLITZ_TIME_SERVER_PORT must be an integer between 1 and 65535" 1 + exit 1 +fi + +ensure_chrony_main_conf +write_chrony_source_file + +CHRONY_UNIT="$(chrony_unit_name)" +blitz_run "${STEP}" "restart-chrony" systemctl restart "${CHRONY_UNIT}" +blitz_run "${STEP}" "burst" chronyc burst + +blitz_log "${STEP}" "waitsync" "start" "server=${BLITZ_TIME_SERVER_IP} port=${BLITZ_TIME_SERVER_PORT} wait_sec=${BLITZ_TIME_SYNC_WAIT_SEC} max_offset_sec=${BLITZ_TIME_SYNC_MAX_OFFSET_SEC} interval_sec=${BLITZ_TIME_SYNC_INTERVAL_SEC}" 0 +if chronyc waitsync "${BLITZ_TIME_SYNC_WAIT_SEC}" "${BLITZ_TIME_SYNC_MAX_OFFSET_SEC}" 1000 "${BLITZ_TIME_SYNC_INTERVAL_SEC}"; then + blitz_log "${STEP}" "waitsync" "success" "chrony synchronized to ${BLITZ_TIME_SERVER_IP}:${BLITZ_TIME_SERVER_PORT}" 0 +else + rc=$? + blitz_log "${STEP}" "waitsync" "soft_fail" "chrony did not synchronize to ${BLITZ_TIME_SERVER_IP}:${BLITZ_TIME_SERVER_PORT} within the configured timeout" "${rc}" +fi + +blitz_log "${STEP}" "tracking" "start" "chronyc tracking" 0 +chronyc tracking || true +blitz_log "${STEP}" "sources" "start" "chronyc sources -v" 0 +chronyc sources -v || true +blitz_log "${STEP}" "complete" "success" "time-sync step finished" 0 diff --git a/scripts/boot/wait-for-unix-socket.sh b/scripts/boot/wait-for-unix-socket.sh new file mode 100644 index 0000000..2d4d411 --- /dev/null +++ b/scripts/boot/wait-for-unix-socket.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/common.sh" + +STEP="ros-receiver" +SOCKET_PATH="" +TIMEOUT_SEC="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --path) + SOCKET_PATH="$2" + shift 2 + ;; + --timeout) + TIMEOUT_SEC="$2" + shift 2 + ;; + --step) + STEP="$2" + shift 2 + ;; + *) + blitz_log "${STEP}" "wait-socket-arg" "failure" "unknown argument: $1" 2 + exit 2 + ;; + esac +done + +blitz_load_boot_env + +SOCKET_PATH="${SOCKET_PATH:-${ROBOT_RECEIVER_LOCAL_SOCKET_PATH}}" +TIMEOUT_SEC="${TIMEOUT_SEC:-${BLITZ_ROS_SOCKET_WAIT_SEC}}" + +blitz_log "${STEP}" "wait-socket" "start" "path=${SOCKET_PATH} timeout_sec=${TIMEOUT_SEC}" 0 + +for (( waited=0; waited< TIMEOUT_SEC; waited++ )); do + if [[ -S "${SOCKET_PATH}" ]]; then + blitz_log "${STEP}" "wait-socket" "success" "path=${SOCKET_PATH} waited_sec=${waited}" 0 + exit 0 + fi + sleep 1 +done + +blitz_log "${STEP}" "wait-socket" "failure" "path=${SOCKET_PATH} timeout_sec=${TIMEOUT_SEC}" 1 +exit 1 diff --git a/scripts/dev/start-b-side-omnid.sh b/scripts/dev/start-b-side-omnid.sh index 2c1dfdf..e671a02 100755 --- a/scripts/dev/start-b-side-omnid.sh +++ b/scripts/dev/start-b-side-omnid.sh @@ -15,6 +15,10 @@ export OMNI_CONTROL_SERVER_ADDR="${OMNI_CONTROL_SERVER_ADDR}" export OMNI_CONTROL_RELAY_VIA="${OMNI_CONTROL_RELAY_VIA}" if [[ ! -x "./bin/b_side_omnid" ]]; then + if [[ "${OMNI_BOOT_MODE:-0}" == "1" ]]; then + echo "Missing ./bin/b_side_omnid in boot mode; build it before enabling the autostart service." >&2 + exit 1 + fi make b_side_omnid fi diff --git a/scripts/dev/start-ros-receiver.sh b/scripts/dev/start-ros-receiver.sh index 4c28c51..6c90630 100755 --- a/scripts/dev/start-ros-receiver.sh +++ b/scripts/dev/start-ros-receiver.sh @@ -12,9 +12,17 @@ source_with_nounset_off() { # shellcheck disable=SC1091 source "${SCRIPT_DIR}/load-env.sh" +if [[ ! -f "/opt/ros/${ROS_DISTRO}/setup.bash" ]]; then + echo "Missing ROS distro setup: /opt/ros/${ROS_DISTRO}/setup.bash" >&2 + exit 1 +fi source_with_nounset_off "/opt/ros/${ROS_DISTRO}/setup.bash" cd "${ROS_CONTROL_PY_DIR}" +if [[ ! -f "install/setup.bash" ]]; then + echo "Missing ROS workspace setup: ${ROS_CONTROL_PY_DIR}/install/setup.bash" >&2 + exit 1 +fi source_with_nounset_off "install/setup.bash" launch_args=(