From d819f9ca4dc14b00adad97c922321c9e8d02627f Mon Sep 17 00:00:00 2001 From: Mock Date: Sat, 11 Apr 2026 20:43:14 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20KCP=E6=97=A5=E5=BF=97=E5=9C=A8A=E7=AB=AF?= =?UTF-8?q?=E8=AE=B0=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 7 +- scripts/dev/README.md | 27 ++++++++ scripts/dev/load-env.sh | 7 ++ scripts/dev/log-network-summary.py | 100 +++++++++++++++++++++++++++++ scripts/dev/robot-remote.env | 7 ++ scripts/dev/start-backend.sh | 36 ++++++++++- 6 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 scripts/dev/log-network-summary.py diff --git a/README.md b/README.md index 28cba04..046c630 100644 --- a/README.md +++ b/README.md @@ -34,10 +34,13 @@ Server `D` runs the KCP hub on `0.0.0.0:10909`: ```bash ./bin/kcpserver -listen 0.0.0.0:10909 \ -telemetry-peer peer-a-telemetry \ - -kcp-ts-debug-log logs/d-kcp-ts.jsonl \ - -kcp-session-stats-log logs/d-kcp-stats.jsonl + -telemetry-interval 1000ms \ + -kcp-session-stats-log logs/d-kcp-stats.jsonl \ + -kcp-session-stats-interval 1000ms ``` +For multi-hour runs, keep `-latency-log` and `-kcp-ts-debug-log` off unless you are collecting a short repro trace. + Relay `C` runs a raw UDP forwarder to `D`: ```bash diff --git a/scripts/dev/README.md b/scripts/dev/README.md index 97cb49e..1a54741 100644 --- a/scripts/dev/README.md +++ b/scripts/dev/README.md @@ -26,6 +26,7 @@ If your `robot-command-center` is elsewhere, set `ROBOT_COMMAND_CENTER_ROOT` in - `load-env.sh`: loads the shared environment into the current shell - `apply-camera-controls.sh`: applies the camera preset before `b_side_omnid` starts - `start-backend.sh`: starts Django ASGI with `uvicorn` +- `log-network-summary.py`: polls the backend `network/latest` API and appends compact JSONL snapshots - `start-frontend.sh`: starts the Vite dev server - `start-ros-receiver.sh`: starts the ROS2 `udp_teleop_bridge` receiver - `start-b-side-omnid.sh`: applies camera controls, then starts `./bin/b_side_omnid` and uses `sudo -E` by default @@ -81,6 +82,12 @@ Edit `scripts/dev/robot-remote.env` for shared changes such as: - `OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS` - `OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS` - `OMNI_VIDEO_MAX_FRAME_AGE_MS` +- `OMNISOCKET_TELEMETRY_PEER_ID` +- `OMNISOCKET_TELEMETRY_INTERVAL_MS` +- `OMNISOCKET_TELEMETRY_STALE_AFTER_MS` +- `OMNI_NETWORK_SUMMARY_LOG_ENABLED` +- `OMNI_NETWORK_SUMMARY_LOG_PATH` +- `OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS` Camera presets use `v4l2-ctl` from `v4l-utils` on the robot side. @@ -96,6 +103,25 @@ New repair knobs: - `OMNI_VIDEO_SOFT_BACKPRESSURE_SEGMENTS`, `OMNI_VIDEO_HARD_BACKPRESSURE_SEGMENTS`, and `OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS` are used by `b_side_omnid` - `OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS` is used by `b_side_omnid` - `OMNI_VIDEO_MAX_FRAME_AGE_MS` is used by `start-backend.sh` on the A-side backend, not by `b_side_omnid` +- `OMNISOCKET_TELEMETRY_INTERVAL_MS` and `OMNISOCKET_TELEMETRY_STALE_AFTER_MS` tune the backend's D-side telemetry freshness window +- `OMNI_NETWORK_SUMMARY_LOG_*` controls the A-side JSONL summary logger that polls `GET /api/network/latest/` + +Default long-run network logging: + +- A-side starts a compact JSONL logger by default at `${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl` +- The default A-side polling interval is `2000 ms` +- For D-side long runs, prefer: + +```bash +./bin/kcpserver -listen 0.0.0.0:10909 \ + -telemetry-peer peer-a-telemetry \ + -telemetry-interval 1000ms \ + -kcp-session-stats-log logs/d-kcp-stats.jsonl \ + -kcp-session-stats-interval 1000ms +``` + +- Keep `-latency-log` and `-kcp-ts-debug-log` off by default for multi-hour runs +- Do not continuously redirect relay `C` stderr to a file unless you are reproducing a short issue window Put machine-specific overrides into `scripts/dev/robot-remote.env.local`. Example: @@ -103,6 +129,7 @@ Put machine-specific overrides into `scripts/dev/robot-remote.env.local`. Exampl ROBOT_COMMAND_CENTER_ROOT="$HOME/Documents/robot-command-center" OMNI_CAMERA_DEVICE="/dev/video30" B_SIDE_OMNID_USE_SUDO="0" +OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="5000" ``` Default camera behavior is the `night` preset: diff --git a/scripts/dev/load-env.sh b/scripts/dev/load-env.sh index 097dca7..2381dd7 100644 --- a/scripts/dev/load-env.sh +++ b/scripts/dev/load-env.sh @@ -100,6 +100,13 @@ export BACKEND_HOST="${BACKEND_HOST:-0.0.0.0}" export BACKEND_PORT="${BACKEND_PORT:-8001}" export FRONTEND_HOST="${FRONTEND_HOST:-0.0.0.0}" export FRONTEND_PORT="${FRONTEND_PORT:-5173}" +export OMNISOCKET_TELEMETRY_PEER_ID="${OMNISOCKET_TELEMETRY_PEER_ID:-peer-a-telemetry}" +export OMNISOCKET_TELEMETRY_INTERVAL_MS="${OMNISOCKET_TELEMETRY_INTERVAL_MS:-1000}" +export OMNISOCKET_TELEMETRY_STALE_AFTER_MS="${OMNISOCKET_TELEMETRY_STALE_AFTER_MS:-3000}" +export OMNI_NETWORK_SUMMARY_LOG_ENABLED="${OMNI_NETWORK_SUMMARY_LOG_ENABLED:-1}" +export OMNI_NETWORK_SUMMARY_LOG_PATH="${OMNI_NETWORK_SUMMARY_LOG_PATH:-${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl}" +export OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS:-2000}" +export OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC="${OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC:-3}" export CONTROL_SIDE_OMNISOCKET_SERVER_ADDR="${CONTROL_SIDE_OMNISOCKET_SERVER_ADDR:-}" export CONTROL_SIDE_OMNISOCKET_RELAY_VIA="${CONTROL_SIDE_OMNISOCKET_RELAY_VIA:-}" export ROBOT_SIDE_OMNISOCKET_SERVER_ADDR="${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}" diff --git a/scripts/dev/log-network-summary.py b/scripts/dev/log-network-summary.py new file mode 100644 index 0000000..174df92 --- /dev/null +++ b/scripts/dev/log-network-summary.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import signal +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path + + +STOP_REQUESTED = False + + +def handle_signal(signum: int, frame: object) -> None: + del signum, frame + global STOP_REQUESTED + STOP_REQUESTED = True + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Poll /api/network/latest/ and append JSONL snapshots.") + parser.add_argument("--url", required=True, help="HTTP endpoint that returns the network summary JSON.") + parser.add_argument("--output", required=True, help="Output JSONL path.") + parser.add_argument( + "--interval-ms", + type=int, + default=2000, + help="Polling interval in milliseconds. Default: 2000.", + ) + parser.add_argument( + "--request-timeout-sec", + type=float, + default=3.0, + help="Single request timeout in seconds. Default: 3.0.", + ) + return parser.parse_args() + + +def sleep_with_stop(seconds: float) -> None: + deadline = time.monotonic() + max(0.0, seconds) + while not STOP_REQUESTED: + remaining = deadline - time.monotonic() + if remaining <= 0.0: + return + time.sleep(min(remaining, 0.2)) + + +def fetch_json(url: str, timeout_sec: float) -> str: + request = urllib.request.Request( + url, + headers={ + "Accept": "application/json", + "Cache-Control": "no-cache", + }, + method="GET", + ) + with urllib.request.urlopen(request, timeout=timeout_sec) as response: + charset = response.headers.get_content_charset("utf-8") + payload = response.read().decode(charset) + parsed = json.loads(payload) + return json.dumps(parsed, separators=(",", ":"), ensure_ascii=False) + + +def main() -> int: + args = parse_args() + interval_sec = max(args.interval_ms, 200) / 1000.0 + output_path = Path(args.output) + last_error_log_monotonic = 0.0 + + signal.signal(signal.SIGINT, handle_signal) + signal.signal(signal.SIGTERM, handle_signal) + + output_path.parent.mkdir(parents=True, exist_ok=True) + + with output_path.open("a", encoding="utf-8") as output_file: + while not STOP_REQUESTED: + started = time.monotonic() + try: + line = fetch_json(args.url, args.request_timeout_sec) + except (TimeoutError, urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as error: + now = time.monotonic() + if now - last_error_log_monotonic >= 10.0: + print(f"[network-summary] poll failed: {error}", file=sys.stderr) + last_error_log_monotonic = now + else: + output_file.write(line) + output_file.write("\n") + output_file.flush() + + elapsed = time.monotonic() - started + sleep_with_stop(max(0.0, interval_sec - elapsed)) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/dev/robot-remote.env b/scripts/dev/robot-remote.env index bb8a295..12a9c7b 100644 --- a/scripts/dev/robot-remote.env +++ b/scripts/dev/robot-remote.env @@ -19,6 +19,13 @@ PYTHON_VENV_PATH="${OMNISOCKETGO_ROOT}/.venv" BACKEND_HOST="0.0.0.0" BACKEND_PORT="8001" +OMNISOCKET_TELEMETRY_PEER_ID="peer-a-telemetry" +OMNISOCKET_TELEMETRY_INTERVAL_MS="1000" +OMNISOCKET_TELEMETRY_STALE_AFTER_MS="3000" +OMNI_NETWORK_SUMMARY_LOG_ENABLED="1" +OMNI_NETWORK_SUMMARY_LOG_PATH="${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl" +OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="5000" +OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC="3" FRONTEND_HOST="0.0.0.0" FRONTEND_PORT="5173" diff --git a/scripts/dev/start-backend.sh b/scripts/dev/start-backend.sh index 0f1d79a..ec26e56 100755 --- a/scripts/dev/start-backend.sh +++ b/scripts/dev/start-backend.sh @@ -16,4 +16,38 @@ source "${PYTHON_VENV_PATH}/bin/activate" cd "${BACKEND_DIR}" export OMNISOCKET_SERVER_ADDR="${CONTROL_SIDE_OMNISOCKET_SERVER_ADDR}" export OMNISOCKET_RELAY_VIA="${CONTROL_SIDE_OMNISOCKET_RELAY_VIA}" -exec python -m uvicorn config.asgi:application --host "${BACKEND_HOST}" --port "${BACKEND_PORT}" + +logger_pid="" + +cleanup() { + if [[ -n "${logger_pid}" ]]; then + kill "${logger_pid}" 2>/dev/null || true + wait "${logger_pid}" 2>/dev/null || true + fi +} + +start_network_summary_logger() { + local logger_url + local logger_dir + + if [[ "${OMNI_NETWORK_SUMMARY_LOG_ENABLED}" != "1" ]]; then + return + fi + + logger_url="http://127.0.0.1:${BACKEND_PORT}/api/network/latest/" + logger_dir="$(dirname "${OMNI_NETWORK_SUMMARY_LOG_PATH}")" + mkdir -p "${logger_dir}" + + python "${SCRIPT_DIR}/log-network-summary.py" \ + --url "${logger_url}" \ + --output "${OMNI_NETWORK_SUMMARY_LOG_PATH}" \ + --interval-ms "${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS}" \ + --request-timeout-sec "${OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC}" & + logger_pid=$! + echo "[start-backend] network summary logger -> ${OMNI_NETWORK_SUMMARY_LOG_PATH} (${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS} ms)" >&2 +} + +trap cleanup EXIT INT TERM + +start_network_summary_logger +python -m uvicorn config.asgi:application --host "${BACKEND_HOST}" --port "${BACKEND_PORT}"