fix: KCP日志在A端记录

This commit is contained in:
2026-04-11 20:43:14 +08:00
parent 9009107a64
commit d819f9ca4d
6 changed files with 181 additions and 3 deletions

View File

@@ -34,10 +34,13 @@ Server `D` runs the KCP hub on `0.0.0.0:10909`:
```bash
./bin/kcpserver -listen 0.0.0.0:10909 \
-telemetry-peer peer-a-telemetry \
-kcp-ts-debug-log logs/d-kcp-ts.jsonl \
-kcp-session-stats-log logs/d-kcp-stats.jsonl
-telemetry-interval 1000ms \
-kcp-session-stats-log logs/d-kcp-stats.jsonl \
-kcp-session-stats-interval 1000ms
```
For multi-hour runs, keep `-latency-log` and `-kcp-ts-debug-log` off unless you are collecting a short repro trace.
Relay `C` runs a raw UDP forwarder to `D`:
```bash

View File

@@ -26,6 +26,7 @@ If your `robot-command-center` is elsewhere, set `ROBOT_COMMAND_CENTER_ROOT` in
- `load-env.sh`: loads the shared environment into the current shell
- `apply-camera-controls.sh`: applies the camera preset before `b_side_omnid` starts
- `start-backend.sh`: starts Django ASGI with `uvicorn`
- `log-network-summary.py`: polls the backend `network/latest` API and appends compact JSONL snapshots
- `start-frontend.sh`: starts the Vite dev server
- `start-ros-receiver.sh`: starts the ROS2 `udp_teleop_bridge` receiver
- `start-b-side-omnid.sh`: applies camera controls, then starts `./bin/b_side_omnid` and uses `sudo -E` by default
@@ -81,6 +82,12 @@ Edit `scripts/dev/robot-remote.env` for shared changes such as:
- `OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS`
- `OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS`
- `OMNI_VIDEO_MAX_FRAME_AGE_MS`
- `OMNISOCKET_TELEMETRY_PEER_ID`
- `OMNISOCKET_TELEMETRY_INTERVAL_MS`
- `OMNISOCKET_TELEMETRY_STALE_AFTER_MS`
- `OMNI_NETWORK_SUMMARY_LOG_ENABLED`
- `OMNI_NETWORK_SUMMARY_LOG_PATH`
- `OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS`
Camera presets use `v4l2-ctl` from `v4l-utils` on the robot side.
@@ -96,6 +103,25 @@ New repair knobs:
- `OMNI_VIDEO_SOFT_BACKPRESSURE_SEGMENTS`, `OMNI_VIDEO_HARD_BACKPRESSURE_SEGMENTS`, and `OMNI_VIDEO_HARD_BACKPRESSURE_HOLD_MS` are used by `b_side_omnid`
- `OMNI_CONTROL_SERVER_IDLE_RECONNECT_MS` is used by `b_side_omnid`
- `OMNI_VIDEO_MAX_FRAME_AGE_MS` is used by `start-backend.sh` on the A-side backend, not by `b_side_omnid`
- `OMNISOCKET_TELEMETRY_INTERVAL_MS` and `OMNISOCKET_TELEMETRY_STALE_AFTER_MS` tune the backend's D-side telemetry freshness window
- `OMNI_NETWORK_SUMMARY_LOG_*` controls the A-side JSONL summary logger that polls `GET /api/network/latest/`
Default long-run network logging:
- A-side starts a compact JSONL logger by default at `${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl`
- The default A-side polling interval is `2000 ms`
- For D-side long runs, prefer:
```bash
./bin/kcpserver -listen 0.0.0.0:10909 \
-telemetry-peer peer-a-telemetry \
-telemetry-interval 1000ms \
-kcp-session-stats-log logs/d-kcp-stats.jsonl \
-kcp-session-stats-interval 1000ms
```
- Keep `-latency-log` and `-kcp-ts-debug-log` off by default for multi-hour runs
- Do not continuously redirect relay `C` stderr to a file unless you are reproducing a short issue window
Put machine-specific overrides into `scripts/dev/robot-remote.env.local`. Example:
@@ -103,6 +129,7 @@ Put machine-specific overrides into `scripts/dev/robot-remote.env.local`. Exampl
ROBOT_COMMAND_CENTER_ROOT="$HOME/Documents/robot-command-center"
OMNI_CAMERA_DEVICE="/dev/video30"
B_SIDE_OMNID_USE_SUDO="0"
OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="5000"
```
Default camera behavior is the `night` preset:

View File

@@ -100,6 +100,13 @@ export BACKEND_HOST="${BACKEND_HOST:-0.0.0.0}"
export BACKEND_PORT="${BACKEND_PORT:-8001}"
export FRONTEND_HOST="${FRONTEND_HOST:-0.0.0.0}"
export FRONTEND_PORT="${FRONTEND_PORT:-5173}"
export OMNISOCKET_TELEMETRY_PEER_ID="${OMNISOCKET_TELEMETRY_PEER_ID:-peer-a-telemetry}"
export OMNISOCKET_TELEMETRY_INTERVAL_MS="${OMNISOCKET_TELEMETRY_INTERVAL_MS:-1000}"
export OMNISOCKET_TELEMETRY_STALE_AFTER_MS="${OMNISOCKET_TELEMETRY_STALE_AFTER_MS:-3000}"
export OMNI_NETWORK_SUMMARY_LOG_ENABLED="${OMNI_NETWORK_SUMMARY_LOG_ENABLED:-1}"
export OMNI_NETWORK_SUMMARY_LOG_PATH="${OMNI_NETWORK_SUMMARY_LOG_PATH:-${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl}"
export OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS:-2000}"
export OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC="${OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC:-3}"
export CONTROL_SIDE_OMNISOCKET_SERVER_ADDR="${CONTROL_SIDE_OMNISOCKET_SERVER_ADDR:-}"
export CONTROL_SIDE_OMNISOCKET_RELAY_VIA="${CONTROL_SIDE_OMNISOCKET_RELAY_VIA:-}"
export ROBOT_SIDE_OMNISOCKET_SERVER_ADDR="${ROBOT_SIDE_OMNISOCKET_SERVER_ADDR:-}"

View File

@@ -0,0 +1,100 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import signal
import sys
import time
import urllib.error
import urllib.request
from pathlib import Path
STOP_REQUESTED = False
def handle_signal(signum: int, frame: object) -> None:
del signum, frame
global STOP_REQUESTED
STOP_REQUESTED = True
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Poll /api/network/latest/ and append JSONL snapshots.")
parser.add_argument("--url", required=True, help="HTTP endpoint that returns the network summary JSON.")
parser.add_argument("--output", required=True, help="Output JSONL path.")
parser.add_argument(
"--interval-ms",
type=int,
default=2000,
help="Polling interval in milliseconds. Default: 2000.",
)
parser.add_argument(
"--request-timeout-sec",
type=float,
default=3.0,
help="Single request timeout in seconds. Default: 3.0.",
)
return parser.parse_args()
def sleep_with_stop(seconds: float) -> None:
deadline = time.monotonic() + max(0.0, seconds)
while not STOP_REQUESTED:
remaining = deadline - time.monotonic()
if remaining <= 0.0:
return
time.sleep(min(remaining, 0.2))
def fetch_json(url: str, timeout_sec: float) -> str:
request = urllib.request.Request(
url,
headers={
"Accept": "application/json",
"Cache-Control": "no-cache",
},
method="GET",
)
with urllib.request.urlopen(request, timeout=timeout_sec) as response:
charset = response.headers.get_content_charset("utf-8")
payload = response.read().decode(charset)
parsed = json.loads(payload)
return json.dumps(parsed, separators=(",", ":"), ensure_ascii=False)
def main() -> int:
args = parse_args()
interval_sec = max(args.interval_ms, 200) / 1000.0
output_path = Path(args.output)
last_error_log_monotonic = 0.0
signal.signal(signal.SIGINT, handle_signal)
signal.signal(signal.SIGTERM, handle_signal)
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("a", encoding="utf-8") as output_file:
while not STOP_REQUESTED:
started = time.monotonic()
try:
line = fetch_json(args.url, args.request_timeout_sec)
except (TimeoutError, urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as error:
now = time.monotonic()
if now - last_error_log_monotonic >= 10.0:
print(f"[network-summary] poll failed: {error}", file=sys.stderr)
last_error_log_monotonic = now
else:
output_file.write(line)
output_file.write("\n")
output_file.flush()
elapsed = time.monotonic() - started
sleep_with_stop(max(0.0, interval_sec - elapsed))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -19,6 +19,13 @@ PYTHON_VENV_PATH="${OMNISOCKETGO_ROOT}/.venv"
BACKEND_HOST="0.0.0.0"
BACKEND_PORT="8001"
OMNISOCKET_TELEMETRY_PEER_ID="peer-a-telemetry"
OMNISOCKET_TELEMETRY_INTERVAL_MS="1000"
OMNISOCKET_TELEMETRY_STALE_AFTER_MS="3000"
OMNI_NETWORK_SUMMARY_LOG_ENABLED="1"
OMNI_NETWORK_SUMMARY_LOG_PATH="${OMNISOCKETGO_ROOT}/logs/a-network-summary.jsonl"
OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS="5000"
OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC="3"
FRONTEND_HOST="0.0.0.0"
FRONTEND_PORT="5173"

View File

@@ -16,4 +16,38 @@ source "${PYTHON_VENV_PATH}/bin/activate"
cd "${BACKEND_DIR}"
export OMNISOCKET_SERVER_ADDR="${CONTROL_SIDE_OMNISOCKET_SERVER_ADDR}"
export OMNISOCKET_RELAY_VIA="${CONTROL_SIDE_OMNISOCKET_RELAY_VIA}"
exec python -m uvicorn config.asgi:application --host "${BACKEND_HOST}" --port "${BACKEND_PORT}"
logger_pid=""
cleanup() {
if [[ -n "${logger_pid}" ]]; then
kill "${logger_pid}" 2>/dev/null || true
wait "${logger_pid}" 2>/dev/null || true
fi
}
start_network_summary_logger() {
local logger_url
local logger_dir
if [[ "${OMNI_NETWORK_SUMMARY_LOG_ENABLED}" != "1" ]]; then
return
fi
logger_url="http://127.0.0.1:${BACKEND_PORT}/api/network/latest/"
logger_dir="$(dirname "${OMNI_NETWORK_SUMMARY_LOG_PATH}")"
mkdir -p "${logger_dir}"
python "${SCRIPT_DIR}/log-network-summary.py" \
--url "${logger_url}" \
--output "${OMNI_NETWORK_SUMMARY_LOG_PATH}" \
--interval-ms "${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS}" \
--request-timeout-sec "${OMNI_NETWORK_SUMMARY_LOG_REQUEST_TIMEOUT_SEC}" &
logger_pid=$!
echo "[start-backend] network summary logger -> ${OMNI_NETWORK_SUMMARY_LOG_PATH} (${OMNI_NETWORK_SUMMARY_LOG_INTERVAL_MS} ms)" >&2
}
trap cleanup EXIT INT TERM
start_network_summary_logger
python -m uvicorn config.asgi:application --host "${BACKEND_HOST}" --port "${BACKEND_PORT}"